parishad 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. parishad/__init__.py +70 -0
  2. parishad/__main__.py +10 -0
  3. parishad/checker/__init__.py +25 -0
  4. parishad/checker/deterministic.py +644 -0
  5. parishad/checker/ensemble.py +496 -0
  6. parishad/checker/retrieval.py +546 -0
  7. parishad/cli/__init__.py +6 -0
  8. parishad/cli/code.py +3254 -0
  9. parishad/cli/main.py +1158 -0
  10. parishad/cli/prarambh.py +99 -0
  11. parishad/cli/sthapana.py +368 -0
  12. parishad/config/modes.py +139 -0
  13. parishad/config/pipeline.core.yaml +128 -0
  14. parishad/config/pipeline.extended.yaml +172 -0
  15. parishad/config/pipeline.fast.yaml +89 -0
  16. parishad/config/user_config.py +115 -0
  17. parishad/data/catalog.py +118 -0
  18. parishad/data/models.json +108 -0
  19. parishad/memory/__init__.py +79 -0
  20. parishad/models/__init__.py +181 -0
  21. parishad/models/backends/__init__.py +247 -0
  22. parishad/models/backends/base.py +211 -0
  23. parishad/models/backends/huggingface.py +318 -0
  24. parishad/models/backends/llama_cpp.py +239 -0
  25. parishad/models/backends/mlx_lm.py +141 -0
  26. parishad/models/backends/ollama.py +253 -0
  27. parishad/models/backends/openai_api.py +193 -0
  28. parishad/models/backends/transformers_hf.py +198 -0
  29. parishad/models/costs.py +385 -0
  30. parishad/models/downloader.py +1557 -0
  31. parishad/models/optimizations.py +871 -0
  32. parishad/models/profiles.py +610 -0
  33. parishad/models/reliability.py +876 -0
  34. parishad/models/runner.py +651 -0
  35. parishad/models/tokenization.py +287 -0
  36. parishad/orchestrator/__init__.py +24 -0
  37. parishad/orchestrator/config_loader.py +210 -0
  38. parishad/orchestrator/engine.py +1113 -0
  39. parishad/orchestrator/exceptions.py +14 -0
  40. parishad/roles/__init__.py +71 -0
  41. parishad/roles/base.py +712 -0
  42. parishad/roles/dandadhyaksha.py +163 -0
  43. parishad/roles/darbari.py +246 -0
  44. parishad/roles/majumdar.py +274 -0
  45. parishad/roles/pantapradhan.py +150 -0
  46. parishad/roles/prerak.py +357 -0
  47. parishad/roles/raja.py +345 -0
  48. parishad/roles/sacheev.py +203 -0
  49. parishad/roles/sainik.py +427 -0
  50. parishad/roles/sar_senapati.py +164 -0
  51. parishad/roles/vidushak.py +69 -0
  52. parishad/tools/__init__.py +7 -0
  53. parishad/tools/base.py +57 -0
  54. parishad/tools/fs.py +110 -0
  55. parishad/tools/perception.py +96 -0
  56. parishad/tools/retrieval.py +74 -0
  57. parishad/tools/shell.py +103 -0
  58. parishad/utils/__init__.py +7 -0
  59. parishad/utils/hardware.py +122 -0
  60. parishad/utils/logging.py +79 -0
  61. parishad/utils/scanner.py +164 -0
  62. parishad/utils/text.py +61 -0
  63. parishad/utils/tracing.py +133 -0
  64. parishad-0.1.0.dist-info/METADATA +256 -0
  65. parishad-0.1.0.dist-info/RECORD +68 -0
  66. parishad-0.1.0.dist-info/WHEEL +4 -0
  67. parishad-0.1.0.dist-info/entry_points.txt +2 -0
  68. parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,108 @@
1
+ {
2
+ "version": "0.1.0",
3
+ "updated": "2024-12-30",
4
+ "sources": {
5
+ "ollama": {
6
+ "name": "Ollama",
7
+ "icon": "🦙",
8
+ "color": "#4a9eff",
9
+ "url": "https://ollama.ai/library",
10
+ "models": [
11
+ {"name": "Llama 3.2 1B", "shortcut": "llama3.2:1b", "size_gb": 0.8, "params": "1B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Compact and fast", "tags": ["lightweight", "fast", "chat"]},
12
+ {"name": "Llama 3.2 3B", "shortcut": "llama3.2:3b", "size_gb": 2.0, "params": "3B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Balanced performance", "tags": ["balanced", "chat", "code"]},
13
+ {"name": "Llama 3.1 8B", "shortcut": "llama3.1:8b", "size_gb": 5.0, "params": "8B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Meta's flagship model", "tags": ["powerful", "chat", "reasoning"]},
14
+ {"name": "Llama 3.1 70B", "shortcut": "llama3.1:70b", "size_gb": 40.0, "params": "70B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Largest Llama model", "tags": ["large", "powerful", "reasoning"]},
15
+ {"name": "Qwen 2.5 0.5B", "shortcut": "qwen2.5:0.5b", "size_gb": 0.4, "params": "0.5B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Ultra-lightweight", "tags": ["tiny", "fast", "efficient"]},
16
+ {"name": "Qwen 2.5 1.5B", "shortcut": "qwen2.5:1.5b", "size_gb": 1.0, "params": "1.5B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Compact Qwen", "tags": ["lightweight", "fast", "multilingual"]},
17
+ {"name": "Qwen 2.5 3B", "shortcut": "qwen2.5:3b", "size_gb": 2.0, "params": "3B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Balanced Qwen", "tags": ["balanced", "multilingual", "code"]},
18
+ {"name": "Qwen 2.5 7B", "shortcut": "qwen2.5:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Excellent reasoning", "tags": ["powerful", "reasoning", "multilingual"]},
19
+ {"name": "Qwen 2.5 14B", "shortcut": "qwen2.5:14b", "size_gb": 9.0, "params": "14B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Large Qwen model", "tags": ["large", "reasoning", "multilingual"]},
20
+ {"name": "Qwen 2.5 32B", "shortcut": "qwen2.5:32b", "size_gb": 20.0, "params": "32B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Very large Qwen", "tags": ["very-large", "reasoning", "multilingual"]},
21
+ {"name": "Qwen 2.5 72B", "shortcut": "qwen2.5:72b", "size_gb": 45.0, "params": "72B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Largest Qwen model", "tags": ["largest", "powerful", "multilingual"]},
22
+ {"name": "Qwen 2.5 Coder 1.5B", "shortcut": "qwen2.5-coder:1.5b", "size_gb": 1.0, "params": "1.5B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Small coding model", "tags": ["code", "fast", "programming"]},
23
+ {"name": "Qwen 2.5 Coder 7B", "shortcut": "qwen2.5-coder:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Powerful coding model", "tags": ["code", "programming", "powerful"]},
24
+ {"name": "Qwen 2.5 Coder 32B", "shortcut": "qwen2.5-coder:32b", "size_gb": 20.0, "params": "32B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Best coding model", "tags": ["code", "programming", "large"]},
25
+ {"name": "Mistral 7B", "shortcut": "mistral:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Fast and capable", "tags": ["fast", "efficient", "chat"]},
26
+ {"name": "Mistral Small 22B", "shortcut": "mistral-small:22b", "size_gb": 14.0, "params": "22B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Enterprise-grade", "tags": ["enterprise", "powerful", "reasoning"]},
27
+ {"name": "Mistral Large 123B", "shortcut": "mistral-large:123b", "size_gb": 75.0, "params": "123B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Flagship Mistral", "tags": ["flagship", "very-large", "reasoning"]},
28
+ {"name": "Mixtral 8x7B", "shortcut": "mixtral:8x7b", "size_gb": 26.0, "params": "8x7B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "MoE architecture", "tags": ["moe", "efficient", "powerful"]},
29
+ {"name": "Mixtral 8x22B", "shortcut": "mixtral:8x22b", "size_gb": 80.0, "params": "8x22B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Large MoE model", "tags": ["moe", "very-large", "powerful"]},
30
+ {"name": "Phi-3 Mini", "shortcut": "phi3:mini", "size_gb": 2.5, "params": "3.8B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Compact powerhouse", "tags": ["compact", "efficient", "reasoning"]},
31
+ {"name": "Phi-3 Medium", "shortcut": "phi3:medium", "size_gb": 8.0, "params": "14B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Medium Phi model", "tags": ["balanced", "reasoning", "math"]},
32
+ {"name": "Phi-3.5 Mini", "shortcut": "phi3.5:mini", "size_gb": 2.5, "params": "3.8B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Latest Phi mini", "tags": ["compact", "fast", "reasoning"]},
33
+ {"name": "Gemma 2 2B", "shortcut": "gemma2:2b", "size_gb": 1.5, "params": "2B", "quantization": "Q4_K_M", "distributor": "Google", "description": "Lightweight Gemma", "tags": ["lightweight", "fast", "efficient"]},
34
+ {"name": "Gemma 2 9B", "shortcut": "gemma2:9b", "size_gb": 5.5, "params": "9B", "quantization": "Q4_K_M", "distributor": "Google", "description": "Balanced Gemma", "tags": ["balanced", "reasoning", "chat"]},
35
+ {"name": "Gemma 2 27B", "shortcut": "gemma2:27b", "size_gb": 16.0, "params": "27B", "quantization": "Q4_K_M", "distributor": "Google", "description": "Large Gemma model", "tags": ["large", "powerful", "reasoning"]},
36
+ {"name": "DeepSeek R1 1.5B", "shortcut": "deepseek-r1:1.5b", "size_gb": 1.0, "params": "1.5B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Compact reasoning", "tags": ["reasoning", "compact", "efficient"]},
37
+ {"name": "DeepSeek R1 7B", "shortcut": "deepseek-r1:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Strong reasoning", "tags": ["reasoning", "powerful", "math"]},
38
+ {"name": "DeepSeek R1 14B", "shortcut": "deepseek-r1:14b", "size_gb": 9.0, "params": "14B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Large reasoning model", "tags": ["reasoning", "large", "math"]},
39
+ {"name": "DeepSeek R1 32B", "shortcut": "deepseek-r1:32b", "size_gb": 20.0, "params": "32B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Very large reasoning", "tags": ["reasoning", "very-large", "math"]},
40
+ {"name": "DeepSeek R1 70B", "shortcut": "deepseek-r1:70b", "size_gb": 45.0, "params": "70B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Largest DeepSeek R1", "tags": ["reasoning", "largest", "math"]},
41
+ {"name": "DeepSeek Coder V2", "shortcut": "deepseek-coder-v2:16b", "size_gb": 10.0, "params": "16B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Advanced coder", "tags": ["code", "programming", "powerful"]},
42
+ {"name": "CodeLlama 7B", "shortcut": "codellama:7b", "size_gb": 4.0, "params": "7B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Code-focused Llama", "tags": ["code", "programming", "balanced"]},
43
+ {"name": "CodeLlama 13B", "shortcut": "codellama:13b", "size_gb": 7.5, "params": "13B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Larger code model", "tags": ["code", "programming", "powerful"]},
44
+ {"name": "CodeLlama 34B", "shortcut": "codellama:34b", "size_gb": 20.0, "params": "34B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Best CodeLlama", "tags": ["code", "programming", "large"]},
45
+ {"name": "StarCoder 2 3B", "shortcut": "starcoder2:3b", "size_gb": 2.0, "params": "3B", "quantization": "Q4_K_M", "distributor": "BigCode", "description": "Compact coder", "tags": ["code", "fast", "programming"]},
46
+ {"name": "StarCoder 2 7B", "shortcut": "starcoder2:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "BigCode", "description": "Balanced coder", "tags": ["code", "balanced", "programming"]},
47
+ {"name": "StarCoder 2 15B", "shortcut": "starcoder2:15b", "size_gb": 9.0, "params": "15B", "quantization": "Q4_K_M", "distributor": "BigCode", "description": "Large coder", "tags": ["code", "powerful", "programming"]},
48
+ {"name": "Granite 3 2B", "shortcut": "granite3-dense:2b", "size_gb": 1.5, "params": "2B", "quantization": "Q4_K_M", "distributor": "IBM", "description": "Compact enterprise", "tags": ["enterprise", "compact", "efficient"]},
49
+ {"name": "Granite 3 8B", "shortcut": "granite3-dense:8b", "size_gb": 5.0, "params": "8B", "quantization": "Q4_K_M", "distributor": "IBM", "description": "Enterprise model", "tags": ["enterprise", "balanced", "reasoning"]},
50
+ {"name": "Granite Code 3B", "shortcut": "granite-code:3b", "size_gb": 2.0, "params": "3B", "quantization": "Q4_K_M", "distributor": "IBM", "description": "Enterprise coder", "tags": ["code", "enterprise", "programming"]},
51
+ {"name": "Granite Code 8B", "shortcut": "granite-code:8b", "size_gb": 5.0, "params": "8B", "quantization": "Q4_K_M", "distributor": "IBM", "description": "Large enterprise coder", "tags": ["code", "enterprise", "powerful"]},
52
+ {"name": "Orca 2 7B", "shortcut": "orca2:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Reasoning focused", "tags": ["reasoning", "balanced", "efficient"]},
53
+ {"name": "Orca 2 13B", "shortcut": "orca2:13b", "size_gb": 7.5, "params": "13B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Large reasoning", "tags": ["reasoning", "large", "powerful"]},
54
+ {"name": "Neural Chat 7B", "shortcut": "neural-chat:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Intel", "description": "Optimized for Intel", "tags": ["chat", "intel", "optimized"]},
55
+ {"name": "Vicuna 7B", "shortcut": "vicuna:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "LMSYS", "description": "Fine-tuned chatbot", "tags": ["chat", "fine-tuned", "balanced"]},
56
+ {"name": "Vicuna 13B", "shortcut": "vicuna:13b", "size_gb": 7.5, "params": "13B", "quantization": "Q4_K_M", "distributor": "LMSYS", "description": "Larger chatbot", "tags": ["chat", "fine-tuned", "powerful"]},
57
+ {"name": "OpenHermes 2.5 7B", "shortcut": "openhermes:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Teknium", "description": "General assistant", "tags": ["chat", "general", "balanced"]},
58
+ {"name": "Dolphin Mixtral 8x7B", "shortcut": "dolphin-mixtral:8x7b", "size_gb": 26.0, "params": "8x7B", "quantization": "Q4_K_M", "distributor": "Cognitive Computations", "description": "Uncensored MoE", "tags": ["uncensored", "moe", "powerful"]},
59
+ {"name": "Wizard Coder 15B", "shortcut": "wizard-coder:15b", "size_gb": 9.0, "params": "15B", "quantization": "Q4_K_M", "distributor": "WizardLM", "description": "Wizard coding model", "tags": ["code", "wizard", "powerful"]},
60
+ {"name": "MathStral 7B", "shortcut": "mathstral:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Math specialized", "tags": ["math", "reasoning", "specialized"]},
61
+ {"name": "Nomic Embed Text", "shortcut": "nomic-embed-text", "size_gb": 0.3, "params": "137M", "quantization": "FP16", "distributor": "Nomic AI", "description": "Text embeddings", "tags": ["embeddings", "text", "rag"]},
62
+ {"name": "MxBAI Embed Large", "shortcut": "mxbai-embed-large", "size_gb": 0.7, "params": "334M", "quantization": "FP16", "distributor": "MixedBread", "description": "Large embeddings", "tags": ["embeddings", "large", "rag"]}
63
+ ]
64
+ },
65
+ "huggingface": {
66
+ "name": "HuggingFace",
67
+ "icon": "🤗",
68
+ "color": "#ffcc00",
69
+ "url": "https://huggingface.co/models",
70
+ "models": [
71
+ {"name": "Llama 3.2 1B Instruct", "shortcut": "bartowski/Llama-3.2-1B-Instruct-GGUF", "size_gb": 0.8, "params": "1B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Compact instruction model", "tags": ["lightweight", "instruct", "chat"]},
72
+ {"name": "Llama 3.2 3B Instruct", "shortcut": "bartowski/Llama-3.2-3B-Instruct-GGUF", "size_gb": 2.0, "params": "3B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Balanced instruction model", "tags": ["balanced", "instruct", "chat"]},
73
+ {"name": "Llama 3.1 8B Instruct", "shortcut": "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", "size_gb": 5.0, "params": "8B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Flagship instruction model", "tags": ["powerful", "instruct", "chat"]},
74
+ {"name": "Qwen 2.5 7B Instruct", "shortcut": "Qwen/Qwen2.5-7B-Instruct-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Multilingual GGUF", "tags": ["multilingual", "instruct", "reasoning"]},
75
+ {"name": "Mistral 7B Instruct v0.3", "shortcut": "bartowski/Mistral-7B-Instruct-v0.3-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Latest Mistral instruct", "tags": ["instruct", "fast", "efficient"]},
76
+ {"name": "Phi-3 Mini 4K Instruct", "shortcut": "microsoft/Phi-3-mini-4k-instruct-gguf", "size_gb": 2.5, "params": "3.8B", "quantization": "Q4", "distributor": "Microsoft", "description": "Compact reasoning", "tags": ["compact", "reasoning", "math"]},
77
+ {"name": "Phi-3.5 Mini Instruct", "shortcut": "bartowski/Phi-3.5-mini-instruct-GGUF", "size_gb": 2.5, "params": "3.8B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Latest Phi mini", "tags": ["compact", "fast", "reasoning"]},
78
+ {"name": "Gemma 2 9B IT", "shortcut": "bartowski/gemma-2-9b-it-GGUF", "size_gb": 5.5, "params": "9B", "quantization": "Q4_K_M", "distributor": "Google", "description": "Instruction tuned Gemma", "tags": ["instruct", "balanced", "chat"]},
79
+ {"name": "DeepSeek R1 Distill Qwen 7B", "shortcut": "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Distilled reasoning", "tags": ["reasoning", "distilled", "efficient"]},
80
+ {"name": "DeepSeek Coder V2 Lite", "shortcut": "bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF", "size_gb": 10.0, "params": "16B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Advanced coding", "tags": ["code", "programming", "powerful"]},
81
+ {"name": "CodeLlama 7B Instruct", "shortcut": "TheBloke/CodeLlama-7B-Instruct-GGUF", "size_gb": 4.0, "params": "7B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Code instruction", "tags": ["code", "instruct", "programming"]},
82
+ {"name": "StarCoder2 7B", "shortcut": "QuantFactory/starcoder2-7b-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "BigCode", "description": "Open source coder", "tags": ["code", "open-source", "programming"]},
83
+ {"name": "Zephyr 7B Beta", "shortcut": "TheBloke/zephyr-7b-beta-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "HuggingFace", "description": "Fine-tuned Mistral", "tags": ["chat", "fine-tuned", "helpful"]},
84
+ {"name": "OpenChat 3.5 7B", "shortcut": "TheBloke/openchat_3.5-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "OpenChat", "description": "Top-tier chatbot", "tags": ["chat", "conversational", "helpful"]}
85
+ ]
86
+ },
87
+ "lmstudio": {
88
+ "name": "LM Studio",
89
+ "icon": "🎨",
90
+ "color": "#9966ff",
91
+ "url": "https://lmstudio.ai/models",
92
+ "models": [
93
+ {"name": "Llama 3.2 1B Instruct Q4", "shortcut": "lmstudio-community/Llama-3.2-1B-Instruct-GGUF", "size_gb": 0.7, "params": "1B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Optimized for LM Studio", "tags": ["gguf", "optimized", "lightweight"]},
94
+ {"name": "Llama 3.2 3B Instruct Q4", "shortcut": "lmstudio-community/Llama-3.2-3B-Instruct-GGUF", "size_gb": 1.8, "params": "3B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Balanced GGUF model", "tags": ["gguf", "balanced", "efficient"]},
95
+ {"name": "Llama 3.1 8B Instruct Q4", "shortcut": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", "size_gb": 4.5, "params": "8B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Powerful GGUF model", "tags": ["gguf", "powerful", "versatile"]},
96
+ {"name": "Qwen 2.5 7B Instruct Q4", "shortcut": "lmstudio-community/Qwen2.5-7B-Instruct-GGUF", "size_gb": 4.3, "params": "7B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Multilingual GGUF", "tags": ["gguf", "multilingual", "reasoning"]},
97
+ {"name": "Qwen 2.5 14B Instruct Q4", "shortcut": "lmstudio-community/Qwen2.5-14B-Instruct-GGUF", "size_gb": 8.5, "params": "14B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Large Qwen GGUF", "tags": ["gguf", "large", "multilingual"]},
98
+ {"name": "Mistral 7B Instruct Q4", "shortcut": "lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF", "size_gb": 4.1, "params": "7B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Fast Mistral GGUF", "tags": ["gguf", "fast", "efficient"]},
99
+ {"name": "Phi-3 Mini Q4", "shortcut": "lmstudio-community/Phi-3-mini-4k-instruct-GGUF", "size_gb": 2.2, "params": "3.8B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Compact Phi GGUF", "tags": ["gguf", "compact", "reasoning"]},
100
+ {"name": "Phi-3.5 Mini Q4", "shortcut": "lmstudio-community/Phi-3.5-mini-instruct-GGUF", "size_gb": 2.2, "params": "3.8B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Latest Phi GGUF", "tags": ["gguf", "compact", "fast"]},
101
+ {"name": "Gemma 2 9B IT Q4", "shortcut": "lmstudio-community/gemma-2-9b-it-GGUF", "size_gb": 5.2, "params": "9B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Gemma 2 GGUF", "tags": ["gguf", "balanced", "reasoning"]},
102
+ {"name": "DeepSeek R1 Distill 7B Q4", "shortcut": "lmstudio-community/DeepSeek-R1-Distill-Qwen-7B-GGUF", "size_gb": 4.3, "params": "7B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Reasoning GGUF", "tags": ["gguf", "reasoning", "distilled"]},
103
+ {"name": "CodeLlama 7B Instruct Q4", "shortcut": "lmstudio-community/CodeLlama-7b-Instruct-GGUF", "size_gb": 3.8, "params": "7B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Coding GGUF", "tags": ["gguf", "code", "programming"]},
104
+ {"name": "Qwen 2.5 Coder 7B Q4", "shortcut": "lmstudio-community/Qwen2.5-Coder-7B-Instruct-GGUF", "size_gb": 4.3, "params": "7B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Coding specialist GGUF", "tags": ["gguf", "code", "programming"]}
105
+ ]
106
+ }
107
+ }
108
+ }
@@ -0,0 +1,79 @@
1
+ """
2
+ Parishad Memory System.
3
+ Provides vector storage and retrieval capabilities.
4
+ """
5
+
6
+ from typing import Any, List, Dict, Optional
7
+
8
+ class VectorStore:
9
+ """
10
+ VectorStore implementation using ChromaDB.
11
+ """
12
+
13
+ def __init__(self, collection_name: str = "parishad_memory", persist_dir: str = "./.parishad_data/vector_store"):
14
+ import chromadb
15
+ import os
16
+
17
+ # Ensure directory exists
18
+ os.makedirs(persist_dir, exist_ok=True)
19
+
20
+ self.client = chromadb.PersistentClient(path=persist_dir)
21
+ self.collection = self.client.get_or_create_collection(name=collection_name)
22
+
23
+ def add(self, documents: List[str], metadata: Optional[List[Dict[str, Any]]] = None, ids: Optional[List[str]] = None) -> None:
24
+ """Add documents to the store."""
25
+ if not documents:
26
+ return
27
+
28
+ # Generate IDs if not provided
29
+ if ids is None:
30
+ import uuid
31
+ ids = [str(uuid.uuid4()) for _ in documents]
32
+
33
+ # Ensure metadata is provided for all docs
34
+ if metadata is None:
35
+ metadata = [{} for _ in documents]
36
+
37
+ self.collection.add(
38
+ documents=documents,
39
+ metadatas=metadata,
40
+ ids=ids
41
+ )
42
+
43
+ def query(self, query_text: str, n_results: int = 5) -> List[Dict[str, Any]]:
44
+ """
45
+ Query the store.
46
+ Returns list of dicts with 'content', 'metadata', 'distance'.
47
+ """
48
+ try:
49
+ results = self.collection.query(
50
+ query_texts=[query_text],
51
+ n_results=n_results
52
+ )
53
+
54
+ # Chroma returns lists of lists (one per query)
55
+ if not results['documents'] or not results['documents'][0]:
56
+ return []
57
+
58
+ formatted_results = []
59
+
60
+ # Zip the first (and only) query result lists
61
+ doc_list = results['documents'][0]
62
+ meta_list = results['metadatas'][0]
63
+
64
+ # Handle distances if available (cosine distance usually)
65
+ dist_list = results['distances'][0] if results['distances'] else [0.0] * len(doc_list)
66
+
67
+ for doc, meta, dist in zip(doc_list, meta_list, dist_list):
68
+ formatted_results.append({
69
+ 'content': doc,
70
+ 'metadata': meta,
71
+ 'distance': dist
72
+ })
73
+
74
+ return formatted_results
75
+
76
+ except Exception as e:
77
+ # Fallback or empty on error
78
+ print(f"Vector search error: {e}")
79
+ return []
@@ -0,0 +1,181 @@
1
+ """Model abstraction layer for Parishad."""
2
+
3
+ from .runner import (
4
+ ModelRunner,
5
+ ModelConfig,
6
+ SlotConfig,
7
+ Backend,
8
+ ModelRunnerError,
9
+ UnknownSlotError,
10
+ ModelBackendError,
11
+ BackendNotAvailableError,
12
+ TransformersBackend,
13
+ OpenAIBackend,
14
+ )
15
+
16
+ from .backends import (
17
+ BackendConfig,
18
+ BackendResult,
19
+ BackendError,
20
+ ModelBackend,
21
+ is_backend_available,
22
+ get_available_backends,
23
+ )
24
+
25
+ from .tokenization import (
26
+ estimate_tokens,
27
+ estimate_tokens_simple,
28
+ estimate_tokens_hybrid,
29
+ estimate_prompt_tokens,
30
+ count_tokens_tiktoken,
31
+ is_tiktoken_available,
32
+ get_tokenizer,
33
+ register_tokenizer,
34
+ )
35
+
36
+ from .costs import (
37
+ estimate_cost,
38
+ estimate_query_cost,
39
+ get_model_pricing,
40
+ ModelPricing,
41
+ CostMetrics,
42
+ estimate_flops,
43
+ get_model_size,
44
+ )
45
+
46
+ from .profiles import (
47
+ ProfileManager,
48
+ ProfileMode,
49
+ ProfileDefinition,
50
+ HardwareCapability,
51
+ EnvironmentInfo,
52
+ detect_environment,
53
+ get_default_profile,
54
+ get_profile_manager,
55
+ quick_runner,
56
+ BUILTIN_PROFILES,
57
+ )
58
+
59
+ # Task 7: Performance optimizations
60
+ from .optimizations import (
61
+ ResponseCache,
62
+ PersistentCache,
63
+ RequestBatcher,
64
+ ConnectionPool,
65
+ RateLimiter,
66
+ OptimizedRunner,
67
+ )
68
+
69
+ # Task 8: Reliability
70
+ from .reliability import (
71
+ RetryStrategy,
72
+ RetryPolicy,
73
+ TimeoutConfig,
74
+ TimeoutManager,
75
+ CircuitState,
76
+ CircuitBreakerConfig,
77
+ CircuitBreaker,
78
+ CircuitOpenError,
79
+ FallbackChain,
80
+ HealthStatus,
81
+ HealthChecker,
82
+ ResilientBackend,
83
+ )
84
+
85
+ # Model Download Manager
86
+ from .downloader import (
87
+ ModelManager,
88
+ ModelRegistry,
89
+ ModelInfo,
90
+ ModelSource,
91
+ ModelFormat,
92
+ HuggingFaceDownloader,
93
+ OllamaManager,
94
+ LMStudioManager,
95
+ DownloadProgress,
96
+ DEFAULT_MODEL_DIR,
97
+ )
98
+
99
+
100
+ __all__ = [
101
+ # Main classes
102
+ "ModelRunner",
103
+ "ModelConfig",
104
+ "SlotConfig",
105
+ "Backend",
106
+ # Backend protocol
107
+ "ModelBackend",
108
+ "BackendConfig",
109
+ "BackendResult",
110
+ # Exceptions
111
+ "ModelRunnerError",
112
+ "UnknownSlotError",
113
+ "ModelBackendError",
114
+ "BackendNotAvailableError",
115
+ "BackendError",
116
+ # Backend implementations
117
+ "TransformersBackend",
118
+ "OpenAIBackend",
119
+ # Backend utilities
120
+ "is_backend_available",
121
+ "get_available_backends",
122
+ # Tokenization
123
+ "estimate_tokens",
124
+ "estimate_tokens_simple",
125
+ "estimate_tokens_hybrid",
126
+ "estimate_prompt_tokens",
127
+ "count_tokens_tiktoken",
128
+ "is_tiktoken_available",
129
+ "get_tokenizer",
130
+ "register_tokenizer",
131
+ # Cost estimation
132
+ "estimate_cost",
133
+ "estimate_query_cost",
134
+ "get_model_pricing",
135
+ "ModelPricing",
136
+ "CostMetrics",
137
+ "estimate_flops",
138
+ "get_model_size",
139
+ # Profile management
140
+ "ProfileManager",
141
+ "ProfileMode",
142
+ "ProfileDefinition",
143
+ "HardwareCapability",
144
+ "EnvironmentInfo",
145
+ "detect_environment",
146
+ "get_default_profile",
147
+ "get_profile_manager",
148
+ "quick_runner",
149
+ "BUILTIN_PROFILES",
150
+ # Performance optimizations (Task 7)
151
+ "ResponseCache",
152
+ "PersistentCache",
153
+ "RequestBatcher",
154
+ "ConnectionPool",
155
+ "RateLimiter",
156
+ "OptimizedRunner",
157
+ # Reliability (Task 8)
158
+ "RetryStrategy",
159
+ "RetryPolicy",
160
+ "TimeoutConfig",
161
+ "TimeoutManager",
162
+ "CircuitState",
163
+ "CircuitBreakerConfig",
164
+ "CircuitBreaker",
165
+ "CircuitOpenError",
166
+ "FallbackChain",
167
+ "HealthStatus",
168
+ "HealthChecker",
169
+ "ResilientBackend",
170
+ # Model Download Manager
171
+ "ModelManager",
172
+ "ModelRegistry",
173
+ "ModelInfo",
174
+ "ModelSource",
175
+ "ModelFormat",
176
+ "HuggingFaceDownloader",
177
+ "OllamaManager",
178
+ "LMStudioManager",
179
+ "DownloadProgress",
180
+ "DEFAULT_MODEL_DIR",
181
+ ]
@@ -0,0 +1,247 @@
1
+ """
2
+ Backend implementations for Parishad model runners.
3
+
4
+ This package provides pluggable backend implementations for different LLM providers:
5
+ - LlamaCppBackend: Local GGUF models via llama-cpp-python
6
+ - OpenAIBackend: OpenAI API (and compatible endpoints)
7
+ - OllamaBackend: Ollama local server (OpenAI-compatible)
8
+ - OllamaNativeBackend: Ollama using native API
9
+ - TransformersBackend: HuggingFace Transformers models
10
+ - HuggingFaceBackend: HuggingFace Inference API (cloud)
11
+ - HuggingFaceChatBackend: HuggingFace chat completion API
12
+ - MlxBackend: Apple Silicon (M1/M2/M3/M4) via MLX
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ # Base classes and types - always available
18
+ from .base import (
19
+ BackendError,
20
+ BackendConfig,
21
+ BackendResult,
22
+ ModelBackend,
23
+ BaseBackend,
24
+ )
25
+
26
+ # Conditional imports for optional backends
27
+ _LLAMA_CPP_AVAILABLE = False
28
+ _OPENAI_AVAILABLE = False
29
+ _OLLAMA_AVAILABLE = False
30
+ _OLLAMA_NATIVE_AVAILABLE = False
31
+ _TRANSFORMERS_AVAILABLE = False
32
+ _HUGGINGFACE_AVAILABLE = False
33
+ _MLX_AVAILABLE = False
34
+
35
+
36
+ # LlamaCpp
37
+ try:
38
+ from .llama_cpp import LlamaCppBackend
39
+ _LLAMA_CPP_AVAILABLE = True
40
+ except ImportError:
41
+ LlamaCppBackend = None # type: ignore
42
+
43
+ # OpenAI
44
+ try:
45
+ from .openai_api import OpenAIBackend, OllamaBackend
46
+ _OPENAI_AVAILABLE = True
47
+ _OLLAMA_AVAILABLE = True
48
+ except ImportError:
49
+ OpenAIBackend = None # type: ignore
50
+ OllamaBackend = None # type: ignore
51
+
52
+ # Transformers
53
+ try:
54
+ from .transformers_hf import TransformersBackend
55
+ _TRANSFORMERS_AVAILABLE = True
56
+ except ImportError:
57
+ TransformersBackend = None # type: ignore
58
+
59
+ # HuggingFace Inference API
60
+ try:
61
+ from .huggingface import HuggingFaceBackend, HuggingFaceChatBackend
62
+ _HUGGINGFACE_AVAILABLE = True
63
+ except ImportError:
64
+ HuggingFaceBackend = None # type: ignore
65
+ HuggingFaceChatBackend = None # type: ignore
66
+
67
+ # Ollama Native
68
+ try:
69
+ from .ollama import OllamaNativeBackend
70
+ _OLLAMA_NATIVE_AVAILABLE = True
71
+ except ImportError:
72
+ OllamaNativeBackend = None # type: ignore
73
+
74
+ # MLX
75
+ try:
76
+ from .mlx_lm import MlxBackend
77
+ _MLX_AVAILABLE = True
78
+ except ImportError:
79
+ MlxBackend = None # type: ignore
80
+
81
+
82
+
83
+
84
+ def is_backend_available(name: str) -> bool:
85
+ """Check if a backend's dependencies are installed."""
86
+ if name == "llama_cpp":
87
+ try:
88
+ import llama_cpp
89
+ return True
90
+ except ImportError:
91
+ return False
92
+ elif name == "openai":
93
+ try:
94
+ import openai
95
+ return True
96
+ except ImportError:
97
+ return False
98
+ elif name == "transformers":
99
+ try:
100
+ import transformers
101
+ import torch
102
+ return True
103
+ except ImportError:
104
+ return False
105
+ elif name == "ollama":
106
+ try:
107
+ import openai
108
+ return True
109
+ except ImportError:
110
+ return False
111
+ elif name == "mlx":
112
+ try:
113
+ import mlx_lm
114
+ return True
115
+ except ImportError:
116
+ return False
117
+
118
+ elif name == "ollama_native":
119
+ try:
120
+ import requests
121
+ return True
122
+ except ImportError:
123
+ return False
124
+ elif name in ("huggingface", "huggingface_chat"):
125
+ try:
126
+ import huggingface_hub
127
+ return True
128
+ except ImportError:
129
+ return False
130
+ return False
131
+
132
+
133
+ def get_available_backends() -> list[str]:
134
+ """Get a list of currently available backends."""
135
+ backends = []
136
+
137
+ if is_backend_available("llama_cpp"):
138
+ backends.append("llama_cpp")
139
+ if is_backend_available("openai"):
140
+ backends.append("openai")
141
+ if is_backend_available("transformers"):
142
+ backends.append("transformers")
143
+ if is_backend_available("ollama"):
144
+ backends.append("ollama")
145
+ if is_backend_available("mlx"):
146
+ backends.append("mlx")
147
+ if is_backend_available("ollama_native"):
148
+ backends.append("ollama_native")
149
+ if is_backend_available("huggingface"):
150
+ backends.append("huggingface")
151
+ backends.append("huggingface_chat")
152
+
153
+ return backends
154
+
155
+
156
+ def get_backend(backend_name: str) -> BaseBackend:
157
+ """
158
+ Factory function to get a backend instance by name.
159
+
160
+ Args:
161
+ backend_name: Name of the backend
162
+
163
+ Returns:
164
+ Backend instance (not loaded)
165
+
166
+ Raises:
167
+ ValueError: If backend unknown or deps not installed
168
+ """
169
+ backend_map = {
170
+ "llama_cpp": (LlamaCppBackend, _LLAMA_CPP_AVAILABLE),
171
+ "openai": (OpenAIBackend, _OPENAI_AVAILABLE),
172
+ "ollama": (OllamaBackend, _OLLAMA_AVAILABLE),
173
+ "ollama_native": (OllamaNativeBackend, _OLLAMA_NATIVE_AVAILABLE),
174
+ "transformers": (TransformersBackend, _TRANSFORMERS_AVAILABLE),
175
+ "huggingface": (HuggingFaceBackend, _HUGGINGFACE_AVAILABLE),
176
+ "huggingface_chat": (HuggingFaceChatBackend, _HUGGINGFACE_AVAILABLE),
177
+ "mlx": (MlxBackend, _MLX_AVAILABLE),
178
+ }
179
+
180
+ if backend_name not in backend_map:
181
+ available = ", ".join(backend_map.keys())
182
+ raise ValueError(
183
+ f"Unknown backend: '{backend_name}'. "
184
+ f"Available backends: {available}"
185
+ )
186
+
187
+ backend_class, is_available = backend_map[backend_name]
188
+
189
+ if not is_available or backend_class is None:
190
+ raise ValueError(
191
+ f"Backend '{backend_name}' is not available. "
192
+ f"Required dependencies are not installed. "
193
+ f"Available backends: {', '.join(get_available_backends())}"
194
+ )
195
+
196
+ return backend_class()
197
+
198
+
199
+ # Stub backend for testing
200
+ class StubBackend(BaseBackend):
201
+ """Simple stub backend for testing."""
202
+
203
+ _name = "stub"
204
+
205
+ def load(self, config: BackendConfig) -> None:
206
+ self._config = config
207
+ self._model_id = config.model_id
208
+ self._loaded = True
209
+
210
+ def generate(
211
+ self,
212
+ prompt: str,
213
+ max_tokens: int,
214
+ temperature: float,
215
+ top_p: float,
216
+ stop: list[str] | None = None,
217
+ ) -> BackendResult:
218
+ return BackendResult(
219
+ text="[STUB RESPONSE]",
220
+ tokens_in=self._estimate_tokens(prompt),
221
+ tokens_out=10,
222
+ model_id=self._model_id,
223
+ )
224
+
225
+
226
+ __all__ = [
227
+ # Base classes
228
+ "BackendError",
229
+ "BackendConfig",
230
+ "BackendResult",
231
+ "ModelBackend",
232
+ "BaseBackend",
233
+ # Backend implementations
234
+ "LlamaCppBackend",
235
+ "OpenAIBackend",
236
+ "OllamaBackend",
237
+ "OllamaNativeBackend",
238
+ "TransformersBackend",
239
+ "HuggingFaceBackend",
240
+ "HuggingFaceChatBackend",
241
+ "MlxBackend",
242
+ "StubBackend",
243
+ # Utilities
244
+ "is_backend_available",
245
+ "get_available_backends",
246
+ "get_backend",
247
+ ]