parishad 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parishad/__init__.py +70 -0
- parishad/__main__.py +10 -0
- parishad/checker/__init__.py +25 -0
- parishad/checker/deterministic.py +644 -0
- parishad/checker/ensemble.py +496 -0
- parishad/checker/retrieval.py +546 -0
- parishad/cli/__init__.py +6 -0
- parishad/cli/code.py +3254 -0
- parishad/cli/main.py +1158 -0
- parishad/cli/prarambh.py +99 -0
- parishad/cli/sthapana.py +368 -0
- parishad/config/modes.py +139 -0
- parishad/config/pipeline.core.yaml +128 -0
- parishad/config/pipeline.extended.yaml +172 -0
- parishad/config/pipeline.fast.yaml +89 -0
- parishad/config/user_config.py +115 -0
- parishad/data/catalog.py +118 -0
- parishad/data/models.json +108 -0
- parishad/memory/__init__.py +79 -0
- parishad/models/__init__.py +181 -0
- parishad/models/backends/__init__.py +247 -0
- parishad/models/backends/base.py +211 -0
- parishad/models/backends/huggingface.py +318 -0
- parishad/models/backends/llama_cpp.py +239 -0
- parishad/models/backends/mlx_lm.py +141 -0
- parishad/models/backends/ollama.py +253 -0
- parishad/models/backends/openai_api.py +193 -0
- parishad/models/backends/transformers_hf.py +198 -0
- parishad/models/costs.py +385 -0
- parishad/models/downloader.py +1557 -0
- parishad/models/optimizations.py +871 -0
- parishad/models/profiles.py +610 -0
- parishad/models/reliability.py +876 -0
- parishad/models/runner.py +651 -0
- parishad/models/tokenization.py +287 -0
- parishad/orchestrator/__init__.py +24 -0
- parishad/orchestrator/config_loader.py +210 -0
- parishad/orchestrator/engine.py +1113 -0
- parishad/orchestrator/exceptions.py +14 -0
- parishad/roles/__init__.py +71 -0
- parishad/roles/base.py +712 -0
- parishad/roles/dandadhyaksha.py +163 -0
- parishad/roles/darbari.py +246 -0
- parishad/roles/majumdar.py +274 -0
- parishad/roles/pantapradhan.py +150 -0
- parishad/roles/prerak.py +357 -0
- parishad/roles/raja.py +345 -0
- parishad/roles/sacheev.py +203 -0
- parishad/roles/sainik.py +427 -0
- parishad/roles/sar_senapati.py +164 -0
- parishad/roles/vidushak.py +69 -0
- parishad/tools/__init__.py +7 -0
- parishad/tools/base.py +57 -0
- parishad/tools/fs.py +110 -0
- parishad/tools/perception.py +96 -0
- parishad/tools/retrieval.py +74 -0
- parishad/tools/shell.py +103 -0
- parishad/utils/__init__.py +7 -0
- parishad/utils/hardware.py +122 -0
- parishad/utils/logging.py +79 -0
- parishad/utils/scanner.py +164 -0
- parishad/utils/text.py +61 -0
- parishad/utils/tracing.py +133 -0
- parishad-0.1.0.dist-info/METADATA +256 -0
- parishad-0.1.0.dist-info/RECORD +68 -0
- parishad-0.1.0.dist-info/WHEEL +4 -0
- parishad-0.1.0.dist-info/entry_points.txt +2 -0
- parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "0.1.0",
|
|
3
|
+
"updated": "2024-12-30",
|
|
4
|
+
"sources": {
|
|
5
|
+
"ollama": {
|
|
6
|
+
"name": "Ollama",
|
|
7
|
+
"icon": "🦙",
|
|
8
|
+
"color": "#4a9eff",
|
|
9
|
+
"url": "https://ollama.ai/library",
|
|
10
|
+
"models": [
|
|
11
|
+
{"name": "Llama 3.2 1B", "shortcut": "llama3.2:1b", "size_gb": 0.8, "params": "1B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Compact and fast", "tags": ["lightweight", "fast", "chat"]},
|
|
12
|
+
{"name": "Llama 3.2 3B", "shortcut": "llama3.2:3b", "size_gb": 2.0, "params": "3B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Balanced performance", "tags": ["balanced", "chat", "code"]},
|
|
13
|
+
{"name": "Llama 3.1 8B", "shortcut": "llama3.1:8b", "size_gb": 5.0, "params": "8B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Meta's flagship model", "tags": ["powerful", "chat", "reasoning"]},
|
|
14
|
+
{"name": "Llama 3.1 70B", "shortcut": "llama3.1:70b", "size_gb": 40.0, "params": "70B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Largest Llama model", "tags": ["large", "powerful", "reasoning"]},
|
|
15
|
+
{"name": "Qwen 2.5 0.5B", "shortcut": "qwen2.5:0.5b", "size_gb": 0.4, "params": "0.5B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Ultra-lightweight", "tags": ["tiny", "fast", "efficient"]},
|
|
16
|
+
{"name": "Qwen 2.5 1.5B", "shortcut": "qwen2.5:1.5b", "size_gb": 1.0, "params": "1.5B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Compact Qwen", "tags": ["lightweight", "fast", "multilingual"]},
|
|
17
|
+
{"name": "Qwen 2.5 3B", "shortcut": "qwen2.5:3b", "size_gb": 2.0, "params": "3B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Balanced Qwen", "tags": ["balanced", "multilingual", "code"]},
|
|
18
|
+
{"name": "Qwen 2.5 7B", "shortcut": "qwen2.5:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Excellent reasoning", "tags": ["powerful", "reasoning", "multilingual"]},
|
|
19
|
+
{"name": "Qwen 2.5 14B", "shortcut": "qwen2.5:14b", "size_gb": 9.0, "params": "14B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Large Qwen model", "tags": ["large", "reasoning", "multilingual"]},
|
|
20
|
+
{"name": "Qwen 2.5 32B", "shortcut": "qwen2.5:32b", "size_gb": 20.0, "params": "32B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Very large Qwen", "tags": ["very-large", "reasoning", "multilingual"]},
|
|
21
|
+
{"name": "Qwen 2.5 72B", "shortcut": "qwen2.5:72b", "size_gb": 45.0, "params": "72B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Largest Qwen model", "tags": ["largest", "powerful", "multilingual"]},
|
|
22
|
+
{"name": "Qwen 2.5 Coder 1.5B", "shortcut": "qwen2.5-coder:1.5b", "size_gb": 1.0, "params": "1.5B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Small coding model", "tags": ["code", "fast", "programming"]},
|
|
23
|
+
{"name": "Qwen 2.5 Coder 7B", "shortcut": "qwen2.5-coder:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Powerful coding model", "tags": ["code", "programming", "powerful"]},
|
|
24
|
+
{"name": "Qwen 2.5 Coder 32B", "shortcut": "qwen2.5-coder:32b", "size_gb": 20.0, "params": "32B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Best coding model", "tags": ["code", "programming", "large"]},
|
|
25
|
+
{"name": "Mistral 7B", "shortcut": "mistral:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Fast and capable", "tags": ["fast", "efficient", "chat"]},
|
|
26
|
+
{"name": "Mistral Small 22B", "shortcut": "mistral-small:22b", "size_gb": 14.0, "params": "22B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Enterprise-grade", "tags": ["enterprise", "powerful", "reasoning"]},
|
|
27
|
+
{"name": "Mistral Large 123B", "shortcut": "mistral-large:123b", "size_gb": 75.0, "params": "123B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Flagship Mistral", "tags": ["flagship", "very-large", "reasoning"]},
|
|
28
|
+
{"name": "Mixtral 8x7B", "shortcut": "mixtral:8x7b", "size_gb": 26.0, "params": "8x7B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "MoE architecture", "tags": ["moe", "efficient", "powerful"]},
|
|
29
|
+
{"name": "Mixtral 8x22B", "shortcut": "mixtral:8x22b", "size_gb": 80.0, "params": "8x22B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Large MoE model", "tags": ["moe", "very-large", "powerful"]},
|
|
30
|
+
{"name": "Phi-3 Mini", "shortcut": "phi3:mini", "size_gb": 2.5, "params": "3.8B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Compact powerhouse", "tags": ["compact", "efficient", "reasoning"]},
|
|
31
|
+
{"name": "Phi-3 Medium", "shortcut": "phi3:medium", "size_gb": 8.0, "params": "14B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Medium Phi model", "tags": ["balanced", "reasoning", "math"]},
|
|
32
|
+
{"name": "Phi-3.5 Mini", "shortcut": "phi3.5:mini", "size_gb": 2.5, "params": "3.8B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Latest Phi mini", "tags": ["compact", "fast", "reasoning"]},
|
|
33
|
+
{"name": "Gemma 2 2B", "shortcut": "gemma2:2b", "size_gb": 1.5, "params": "2B", "quantization": "Q4_K_M", "distributor": "Google", "description": "Lightweight Gemma", "tags": ["lightweight", "fast", "efficient"]},
|
|
34
|
+
{"name": "Gemma 2 9B", "shortcut": "gemma2:9b", "size_gb": 5.5, "params": "9B", "quantization": "Q4_K_M", "distributor": "Google", "description": "Balanced Gemma", "tags": ["balanced", "reasoning", "chat"]},
|
|
35
|
+
{"name": "Gemma 2 27B", "shortcut": "gemma2:27b", "size_gb": 16.0, "params": "27B", "quantization": "Q4_K_M", "distributor": "Google", "description": "Large Gemma model", "tags": ["large", "powerful", "reasoning"]},
|
|
36
|
+
{"name": "DeepSeek R1 1.5B", "shortcut": "deepseek-r1:1.5b", "size_gb": 1.0, "params": "1.5B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Compact reasoning", "tags": ["reasoning", "compact", "efficient"]},
|
|
37
|
+
{"name": "DeepSeek R1 7B", "shortcut": "deepseek-r1:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Strong reasoning", "tags": ["reasoning", "powerful", "math"]},
|
|
38
|
+
{"name": "DeepSeek R1 14B", "shortcut": "deepseek-r1:14b", "size_gb": 9.0, "params": "14B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Large reasoning model", "tags": ["reasoning", "large", "math"]},
|
|
39
|
+
{"name": "DeepSeek R1 32B", "shortcut": "deepseek-r1:32b", "size_gb": 20.0, "params": "32B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Very large reasoning", "tags": ["reasoning", "very-large", "math"]},
|
|
40
|
+
{"name": "DeepSeek R1 70B", "shortcut": "deepseek-r1:70b", "size_gb": 45.0, "params": "70B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Largest DeepSeek R1", "tags": ["reasoning", "largest", "math"]},
|
|
41
|
+
{"name": "DeepSeek Coder V2", "shortcut": "deepseek-coder-v2:16b", "size_gb": 10.0, "params": "16B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Advanced coder", "tags": ["code", "programming", "powerful"]},
|
|
42
|
+
{"name": "CodeLlama 7B", "shortcut": "codellama:7b", "size_gb": 4.0, "params": "7B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Code-focused Llama", "tags": ["code", "programming", "balanced"]},
|
|
43
|
+
{"name": "CodeLlama 13B", "shortcut": "codellama:13b", "size_gb": 7.5, "params": "13B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Larger code model", "tags": ["code", "programming", "powerful"]},
|
|
44
|
+
{"name": "CodeLlama 34B", "shortcut": "codellama:34b", "size_gb": 20.0, "params": "34B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Best CodeLlama", "tags": ["code", "programming", "large"]},
|
|
45
|
+
{"name": "StarCoder 2 3B", "shortcut": "starcoder2:3b", "size_gb": 2.0, "params": "3B", "quantization": "Q4_K_M", "distributor": "BigCode", "description": "Compact coder", "tags": ["code", "fast", "programming"]},
|
|
46
|
+
{"name": "StarCoder 2 7B", "shortcut": "starcoder2:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "BigCode", "description": "Balanced coder", "tags": ["code", "balanced", "programming"]},
|
|
47
|
+
{"name": "StarCoder 2 15B", "shortcut": "starcoder2:15b", "size_gb": 9.0, "params": "15B", "quantization": "Q4_K_M", "distributor": "BigCode", "description": "Large coder", "tags": ["code", "powerful", "programming"]},
|
|
48
|
+
{"name": "Granite 3 2B", "shortcut": "granite3-dense:2b", "size_gb": 1.5, "params": "2B", "quantization": "Q4_K_M", "distributor": "IBM", "description": "Compact enterprise", "tags": ["enterprise", "compact", "efficient"]},
|
|
49
|
+
{"name": "Granite 3 8B", "shortcut": "granite3-dense:8b", "size_gb": 5.0, "params": "8B", "quantization": "Q4_K_M", "distributor": "IBM", "description": "Enterprise model", "tags": ["enterprise", "balanced", "reasoning"]},
|
|
50
|
+
{"name": "Granite Code 3B", "shortcut": "granite-code:3b", "size_gb": 2.0, "params": "3B", "quantization": "Q4_K_M", "distributor": "IBM", "description": "Enterprise coder", "tags": ["code", "enterprise", "programming"]},
|
|
51
|
+
{"name": "Granite Code 8B", "shortcut": "granite-code:8b", "size_gb": 5.0, "params": "8B", "quantization": "Q4_K_M", "distributor": "IBM", "description": "Large enterprise coder", "tags": ["code", "enterprise", "powerful"]},
|
|
52
|
+
{"name": "Orca 2 7B", "shortcut": "orca2:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Reasoning focused", "tags": ["reasoning", "balanced", "efficient"]},
|
|
53
|
+
{"name": "Orca 2 13B", "shortcut": "orca2:13b", "size_gb": 7.5, "params": "13B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Large reasoning", "tags": ["reasoning", "large", "powerful"]},
|
|
54
|
+
{"name": "Neural Chat 7B", "shortcut": "neural-chat:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Intel", "description": "Optimized for Intel", "tags": ["chat", "intel", "optimized"]},
|
|
55
|
+
{"name": "Vicuna 7B", "shortcut": "vicuna:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "LMSYS", "description": "Fine-tuned chatbot", "tags": ["chat", "fine-tuned", "balanced"]},
|
|
56
|
+
{"name": "Vicuna 13B", "shortcut": "vicuna:13b", "size_gb": 7.5, "params": "13B", "quantization": "Q4_K_M", "distributor": "LMSYS", "description": "Larger chatbot", "tags": ["chat", "fine-tuned", "powerful"]},
|
|
57
|
+
{"name": "OpenHermes 2.5 7B", "shortcut": "openhermes:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Teknium", "description": "General assistant", "tags": ["chat", "general", "balanced"]},
|
|
58
|
+
{"name": "Dolphin Mixtral 8x7B", "shortcut": "dolphin-mixtral:8x7b", "size_gb": 26.0, "params": "8x7B", "quantization": "Q4_K_M", "distributor": "Cognitive Computations", "description": "Uncensored MoE", "tags": ["uncensored", "moe", "powerful"]},
|
|
59
|
+
{"name": "Wizard Coder 15B", "shortcut": "wizard-coder:15b", "size_gb": 9.0, "params": "15B", "quantization": "Q4_K_M", "distributor": "WizardLM", "description": "Wizard coding model", "tags": ["code", "wizard", "powerful"]},
|
|
60
|
+
{"name": "MathStral 7B", "shortcut": "mathstral:7b", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Math specialized", "tags": ["math", "reasoning", "specialized"]},
|
|
61
|
+
{"name": "Nomic Embed Text", "shortcut": "nomic-embed-text", "size_gb": 0.3, "params": "137M", "quantization": "FP16", "distributor": "Nomic AI", "description": "Text embeddings", "tags": ["embeddings", "text", "rag"]},
|
|
62
|
+
{"name": "MxBAI Embed Large", "shortcut": "mxbai-embed-large", "size_gb": 0.7, "params": "334M", "quantization": "FP16", "distributor": "MixedBread", "description": "Large embeddings", "tags": ["embeddings", "large", "rag"]}
|
|
63
|
+
]
|
|
64
|
+
},
|
|
65
|
+
"huggingface": {
|
|
66
|
+
"name": "HuggingFace",
|
|
67
|
+
"icon": "🤗",
|
|
68
|
+
"color": "#ffcc00",
|
|
69
|
+
"url": "https://huggingface.co/models",
|
|
70
|
+
"models": [
|
|
71
|
+
{"name": "Llama 3.2 1B Instruct", "shortcut": "bartowski/Llama-3.2-1B-Instruct-GGUF", "size_gb": 0.8, "params": "1B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Compact instruction model", "tags": ["lightweight", "instruct", "chat"]},
|
|
72
|
+
{"name": "Llama 3.2 3B Instruct", "shortcut": "bartowski/Llama-3.2-3B-Instruct-GGUF", "size_gb": 2.0, "params": "3B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Balanced instruction model", "tags": ["balanced", "instruct", "chat"]},
|
|
73
|
+
{"name": "Llama 3.1 8B Instruct", "shortcut": "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", "size_gb": 5.0, "params": "8B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Flagship instruction model", "tags": ["powerful", "instruct", "chat"]},
|
|
74
|
+
{"name": "Qwen 2.5 7B Instruct", "shortcut": "Qwen/Qwen2.5-7B-Instruct-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Alibaba", "description": "Multilingual GGUF", "tags": ["multilingual", "instruct", "reasoning"]},
|
|
75
|
+
{"name": "Mistral 7B Instruct v0.3", "shortcut": "bartowski/Mistral-7B-Instruct-v0.3-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "Mistral AI", "description": "Latest Mistral instruct", "tags": ["instruct", "fast", "efficient"]},
|
|
76
|
+
{"name": "Phi-3 Mini 4K Instruct", "shortcut": "microsoft/Phi-3-mini-4k-instruct-gguf", "size_gb": 2.5, "params": "3.8B", "quantization": "Q4", "distributor": "Microsoft", "description": "Compact reasoning", "tags": ["compact", "reasoning", "math"]},
|
|
77
|
+
{"name": "Phi-3.5 Mini Instruct", "shortcut": "bartowski/Phi-3.5-mini-instruct-GGUF", "size_gb": 2.5, "params": "3.8B", "quantization": "Q4_K_M", "distributor": "Microsoft", "description": "Latest Phi mini", "tags": ["compact", "fast", "reasoning"]},
|
|
78
|
+
{"name": "Gemma 2 9B IT", "shortcut": "bartowski/gemma-2-9b-it-GGUF", "size_gb": 5.5, "params": "9B", "quantization": "Q4_K_M", "distributor": "Google", "description": "Instruction tuned Gemma", "tags": ["instruct", "balanced", "chat"]},
|
|
79
|
+
{"name": "DeepSeek R1 Distill Qwen 7B", "shortcut": "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Distilled reasoning", "tags": ["reasoning", "distilled", "efficient"]},
|
|
80
|
+
{"name": "DeepSeek Coder V2 Lite", "shortcut": "bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF", "size_gb": 10.0, "params": "16B", "quantization": "Q4_K_M", "distributor": "DeepSeek", "description": "Advanced coding", "tags": ["code", "programming", "powerful"]},
|
|
81
|
+
{"name": "CodeLlama 7B Instruct", "shortcut": "TheBloke/CodeLlama-7B-Instruct-GGUF", "size_gb": 4.0, "params": "7B", "quantization": "Q4_K_M", "distributor": "Meta", "description": "Code instruction", "tags": ["code", "instruct", "programming"]},
|
|
82
|
+
{"name": "StarCoder2 7B", "shortcut": "QuantFactory/starcoder2-7b-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "BigCode", "description": "Open source coder", "tags": ["code", "open-source", "programming"]},
|
|
83
|
+
{"name": "Zephyr 7B Beta", "shortcut": "TheBloke/zephyr-7b-beta-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "HuggingFace", "description": "Fine-tuned Mistral", "tags": ["chat", "fine-tuned", "helpful"]},
|
|
84
|
+
{"name": "OpenChat 3.5 7B", "shortcut": "TheBloke/openchat_3.5-GGUF", "size_gb": 4.5, "params": "7B", "quantization": "Q4_K_M", "distributor": "OpenChat", "description": "Top-tier chatbot", "tags": ["chat", "conversational", "helpful"]}
|
|
85
|
+
]
|
|
86
|
+
},
|
|
87
|
+
"lmstudio": {
|
|
88
|
+
"name": "LM Studio",
|
|
89
|
+
"icon": "🎨",
|
|
90
|
+
"color": "#9966ff",
|
|
91
|
+
"url": "https://lmstudio.ai/models",
|
|
92
|
+
"models": [
|
|
93
|
+
{"name": "Llama 3.2 1B Instruct Q4", "shortcut": "lmstudio-community/Llama-3.2-1B-Instruct-GGUF", "size_gb": 0.7, "params": "1B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Optimized for LM Studio", "tags": ["gguf", "optimized", "lightweight"]},
|
|
94
|
+
{"name": "Llama 3.2 3B Instruct Q4", "shortcut": "lmstudio-community/Llama-3.2-3B-Instruct-GGUF", "size_gb": 1.8, "params": "3B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Balanced GGUF model", "tags": ["gguf", "balanced", "efficient"]},
|
|
95
|
+
{"name": "Llama 3.1 8B Instruct Q4", "shortcut": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", "size_gb": 4.5, "params": "8B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Powerful GGUF model", "tags": ["gguf", "powerful", "versatile"]},
|
|
96
|
+
{"name": "Qwen 2.5 7B Instruct Q4", "shortcut": "lmstudio-community/Qwen2.5-7B-Instruct-GGUF", "size_gb": 4.3, "params": "7B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Multilingual GGUF", "tags": ["gguf", "multilingual", "reasoning"]},
|
|
97
|
+
{"name": "Qwen 2.5 14B Instruct Q4", "shortcut": "lmstudio-community/Qwen2.5-14B-Instruct-GGUF", "size_gb": 8.5, "params": "14B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Large Qwen GGUF", "tags": ["gguf", "large", "multilingual"]},
|
|
98
|
+
{"name": "Mistral 7B Instruct Q4", "shortcut": "lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF", "size_gb": 4.1, "params": "7B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Fast Mistral GGUF", "tags": ["gguf", "fast", "efficient"]},
|
|
99
|
+
{"name": "Phi-3 Mini Q4", "shortcut": "lmstudio-community/Phi-3-mini-4k-instruct-GGUF", "size_gb": 2.2, "params": "3.8B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Compact Phi GGUF", "tags": ["gguf", "compact", "reasoning"]},
|
|
100
|
+
{"name": "Phi-3.5 Mini Q4", "shortcut": "lmstudio-community/Phi-3.5-mini-instruct-GGUF", "size_gb": 2.2, "params": "3.8B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Latest Phi GGUF", "tags": ["gguf", "compact", "fast"]},
|
|
101
|
+
{"name": "Gemma 2 9B IT Q4", "shortcut": "lmstudio-community/gemma-2-9b-it-GGUF", "size_gb": 5.2, "params": "9B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Gemma 2 GGUF", "tags": ["gguf", "balanced", "reasoning"]},
|
|
102
|
+
{"name": "DeepSeek R1 Distill 7B Q4", "shortcut": "lmstudio-community/DeepSeek-R1-Distill-Qwen-7B-GGUF", "size_gb": 4.3, "params": "7B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Reasoning GGUF", "tags": ["gguf", "reasoning", "distilled"]},
|
|
103
|
+
{"name": "CodeLlama 7B Instruct Q4", "shortcut": "lmstudio-community/CodeLlama-7b-Instruct-GGUF", "size_gb": 3.8, "params": "7B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Coding GGUF", "tags": ["gguf", "code", "programming"]},
|
|
104
|
+
{"name": "Qwen 2.5 Coder 7B Q4", "shortcut": "lmstudio-community/Qwen2.5-Coder-7B-Instruct-GGUF", "size_gb": 4.3, "params": "7B", "quantization": "Q4_K_M", "distributor": "LM Studio", "description": "Coding specialist GGUF", "tags": ["gguf", "code", "programming"]}
|
|
105
|
+
]
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parishad Memory System.
|
|
3
|
+
Provides vector storage and retrieval capabilities.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, List, Dict, Optional
|
|
7
|
+
|
|
8
|
+
class VectorStore:
|
|
9
|
+
"""
|
|
10
|
+
VectorStore implementation using ChromaDB.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, collection_name: str = "parishad_memory", persist_dir: str = "./.parishad_data/vector_store"):
|
|
14
|
+
import chromadb
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
# Ensure directory exists
|
|
18
|
+
os.makedirs(persist_dir, exist_ok=True)
|
|
19
|
+
|
|
20
|
+
self.client = chromadb.PersistentClient(path=persist_dir)
|
|
21
|
+
self.collection = self.client.get_or_create_collection(name=collection_name)
|
|
22
|
+
|
|
23
|
+
def add(self, documents: List[str], metadata: Optional[List[Dict[str, Any]]] = None, ids: Optional[List[str]] = None) -> None:
|
|
24
|
+
"""Add documents to the store."""
|
|
25
|
+
if not documents:
|
|
26
|
+
return
|
|
27
|
+
|
|
28
|
+
# Generate IDs if not provided
|
|
29
|
+
if ids is None:
|
|
30
|
+
import uuid
|
|
31
|
+
ids = [str(uuid.uuid4()) for _ in documents]
|
|
32
|
+
|
|
33
|
+
# Ensure metadata is provided for all docs
|
|
34
|
+
if metadata is None:
|
|
35
|
+
metadata = [{} for _ in documents]
|
|
36
|
+
|
|
37
|
+
self.collection.add(
|
|
38
|
+
documents=documents,
|
|
39
|
+
metadatas=metadata,
|
|
40
|
+
ids=ids
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def query(self, query_text: str, n_results: int = 5) -> List[Dict[str, Any]]:
|
|
44
|
+
"""
|
|
45
|
+
Query the store.
|
|
46
|
+
Returns list of dicts with 'content', 'metadata', 'distance'.
|
|
47
|
+
"""
|
|
48
|
+
try:
|
|
49
|
+
results = self.collection.query(
|
|
50
|
+
query_texts=[query_text],
|
|
51
|
+
n_results=n_results
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Chroma returns lists of lists (one per query)
|
|
55
|
+
if not results['documents'] or not results['documents'][0]:
|
|
56
|
+
return []
|
|
57
|
+
|
|
58
|
+
formatted_results = []
|
|
59
|
+
|
|
60
|
+
# Zip the first (and only) query result lists
|
|
61
|
+
doc_list = results['documents'][0]
|
|
62
|
+
meta_list = results['metadatas'][0]
|
|
63
|
+
|
|
64
|
+
# Handle distances if available (cosine distance usually)
|
|
65
|
+
dist_list = results['distances'][0] if results['distances'] else [0.0] * len(doc_list)
|
|
66
|
+
|
|
67
|
+
for doc, meta, dist in zip(doc_list, meta_list, dist_list):
|
|
68
|
+
formatted_results.append({
|
|
69
|
+
'content': doc,
|
|
70
|
+
'metadata': meta,
|
|
71
|
+
'distance': dist
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
return formatted_results
|
|
75
|
+
|
|
76
|
+
except Exception as e:
|
|
77
|
+
# Fallback or empty on error
|
|
78
|
+
print(f"Vector search error: {e}")
|
|
79
|
+
return []
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""Model abstraction layer for Parishad."""
|
|
2
|
+
|
|
3
|
+
from .runner import (
|
|
4
|
+
ModelRunner,
|
|
5
|
+
ModelConfig,
|
|
6
|
+
SlotConfig,
|
|
7
|
+
Backend,
|
|
8
|
+
ModelRunnerError,
|
|
9
|
+
UnknownSlotError,
|
|
10
|
+
ModelBackendError,
|
|
11
|
+
BackendNotAvailableError,
|
|
12
|
+
TransformersBackend,
|
|
13
|
+
OpenAIBackend,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
from .backends import (
|
|
17
|
+
BackendConfig,
|
|
18
|
+
BackendResult,
|
|
19
|
+
BackendError,
|
|
20
|
+
ModelBackend,
|
|
21
|
+
is_backend_available,
|
|
22
|
+
get_available_backends,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
from .tokenization import (
|
|
26
|
+
estimate_tokens,
|
|
27
|
+
estimate_tokens_simple,
|
|
28
|
+
estimate_tokens_hybrid,
|
|
29
|
+
estimate_prompt_tokens,
|
|
30
|
+
count_tokens_tiktoken,
|
|
31
|
+
is_tiktoken_available,
|
|
32
|
+
get_tokenizer,
|
|
33
|
+
register_tokenizer,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
from .costs import (
|
|
37
|
+
estimate_cost,
|
|
38
|
+
estimate_query_cost,
|
|
39
|
+
get_model_pricing,
|
|
40
|
+
ModelPricing,
|
|
41
|
+
CostMetrics,
|
|
42
|
+
estimate_flops,
|
|
43
|
+
get_model_size,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
from .profiles import (
|
|
47
|
+
ProfileManager,
|
|
48
|
+
ProfileMode,
|
|
49
|
+
ProfileDefinition,
|
|
50
|
+
HardwareCapability,
|
|
51
|
+
EnvironmentInfo,
|
|
52
|
+
detect_environment,
|
|
53
|
+
get_default_profile,
|
|
54
|
+
get_profile_manager,
|
|
55
|
+
quick_runner,
|
|
56
|
+
BUILTIN_PROFILES,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Task 7: Performance optimizations
|
|
60
|
+
from .optimizations import (
|
|
61
|
+
ResponseCache,
|
|
62
|
+
PersistentCache,
|
|
63
|
+
RequestBatcher,
|
|
64
|
+
ConnectionPool,
|
|
65
|
+
RateLimiter,
|
|
66
|
+
OptimizedRunner,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Task 8: Reliability
|
|
70
|
+
from .reliability import (
|
|
71
|
+
RetryStrategy,
|
|
72
|
+
RetryPolicy,
|
|
73
|
+
TimeoutConfig,
|
|
74
|
+
TimeoutManager,
|
|
75
|
+
CircuitState,
|
|
76
|
+
CircuitBreakerConfig,
|
|
77
|
+
CircuitBreaker,
|
|
78
|
+
CircuitOpenError,
|
|
79
|
+
FallbackChain,
|
|
80
|
+
HealthStatus,
|
|
81
|
+
HealthChecker,
|
|
82
|
+
ResilientBackend,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Model Download Manager
|
|
86
|
+
from .downloader import (
|
|
87
|
+
ModelManager,
|
|
88
|
+
ModelRegistry,
|
|
89
|
+
ModelInfo,
|
|
90
|
+
ModelSource,
|
|
91
|
+
ModelFormat,
|
|
92
|
+
HuggingFaceDownloader,
|
|
93
|
+
OllamaManager,
|
|
94
|
+
LMStudioManager,
|
|
95
|
+
DownloadProgress,
|
|
96
|
+
DEFAULT_MODEL_DIR,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
__all__ = [
|
|
101
|
+
# Main classes
|
|
102
|
+
"ModelRunner",
|
|
103
|
+
"ModelConfig",
|
|
104
|
+
"SlotConfig",
|
|
105
|
+
"Backend",
|
|
106
|
+
# Backend protocol
|
|
107
|
+
"ModelBackend",
|
|
108
|
+
"BackendConfig",
|
|
109
|
+
"BackendResult",
|
|
110
|
+
# Exceptions
|
|
111
|
+
"ModelRunnerError",
|
|
112
|
+
"UnknownSlotError",
|
|
113
|
+
"ModelBackendError",
|
|
114
|
+
"BackendNotAvailableError",
|
|
115
|
+
"BackendError",
|
|
116
|
+
# Backend implementations
|
|
117
|
+
"TransformersBackend",
|
|
118
|
+
"OpenAIBackend",
|
|
119
|
+
# Backend utilities
|
|
120
|
+
"is_backend_available",
|
|
121
|
+
"get_available_backends",
|
|
122
|
+
# Tokenization
|
|
123
|
+
"estimate_tokens",
|
|
124
|
+
"estimate_tokens_simple",
|
|
125
|
+
"estimate_tokens_hybrid",
|
|
126
|
+
"estimate_prompt_tokens",
|
|
127
|
+
"count_tokens_tiktoken",
|
|
128
|
+
"is_tiktoken_available",
|
|
129
|
+
"get_tokenizer",
|
|
130
|
+
"register_tokenizer",
|
|
131
|
+
# Cost estimation
|
|
132
|
+
"estimate_cost",
|
|
133
|
+
"estimate_query_cost",
|
|
134
|
+
"get_model_pricing",
|
|
135
|
+
"ModelPricing",
|
|
136
|
+
"CostMetrics",
|
|
137
|
+
"estimate_flops",
|
|
138
|
+
"get_model_size",
|
|
139
|
+
# Profile management
|
|
140
|
+
"ProfileManager",
|
|
141
|
+
"ProfileMode",
|
|
142
|
+
"ProfileDefinition",
|
|
143
|
+
"HardwareCapability",
|
|
144
|
+
"EnvironmentInfo",
|
|
145
|
+
"detect_environment",
|
|
146
|
+
"get_default_profile",
|
|
147
|
+
"get_profile_manager",
|
|
148
|
+
"quick_runner",
|
|
149
|
+
"BUILTIN_PROFILES",
|
|
150
|
+
# Performance optimizations (Task 7)
|
|
151
|
+
"ResponseCache",
|
|
152
|
+
"PersistentCache",
|
|
153
|
+
"RequestBatcher",
|
|
154
|
+
"ConnectionPool",
|
|
155
|
+
"RateLimiter",
|
|
156
|
+
"OptimizedRunner",
|
|
157
|
+
# Reliability (Task 8)
|
|
158
|
+
"RetryStrategy",
|
|
159
|
+
"RetryPolicy",
|
|
160
|
+
"TimeoutConfig",
|
|
161
|
+
"TimeoutManager",
|
|
162
|
+
"CircuitState",
|
|
163
|
+
"CircuitBreakerConfig",
|
|
164
|
+
"CircuitBreaker",
|
|
165
|
+
"CircuitOpenError",
|
|
166
|
+
"FallbackChain",
|
|
167
|
+
"HealthStatus",
|
|
168
|
+
"HealthChecker",
|
|
169
|
+
"ResilientBackend",
|
|
170
|
+
# Model Download Manager
|
|
171
|
+
"ModelManager",
|
|
172
|
+
"ModelRegistry",
|
|
173
|
+
"ModelInfo",
|
|
174
|
+
"ModelSource",
|
|
175
|
+
"ModelFormat",
|
|
176
|
+
"HuggingFaceDownloader",
|
|
177
|
+
"OllamaManager",
|
|
178
|
+
"LMStudioManager",
|
|
179
|
+
"DownloadProgress",
|
|
180
|
+
"DEFAULT_MODEL_DIR",
|
|
181
|
+
]
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Backend implementations for Parishad model runners.
|
|
3
|
+
|
|
4
|
+
This package provides pluggable backend implementations for different LLM providers:
|
|
5
|
+
- LlamaCppBackend: Local GGUF models via llama-cpp-python
|
|
6
|
+
- OpenAIBackend: OpenAI API (and compatible endpoints)
|
|
7
|
+
- OllamaBackend: Ollama local server (OpenAI-compatible)
|
|
8
|
+
- OllamaNativeBackend: Ollama using native API
|
|
9
|
+
- TransformersBackend: HuggingFace Transformers models
|
|
10
|
+
- HuggingFaceBackend: HuggingFace Inference API (cloud)
|
|
11
|
+
- HuggingFaceChatBackend: HuggingFace chat completion API
|
|
12
|
+
- MlxBackend: Apple Silicon (M1/M2/M3/M4) via MLX
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
# Base classes and types - always available
|
|
18
|
+
from .base import (
|
|
19
|
+
BackendError,
|
|
20
|
+
BackendConfig,
|
|
21
|
+
BackendResult,
|
|
22
|
+
ModelBackend,
|
|
23
|
+
BaseBackend,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Conditional imports for optional backends
|
|
27
|
+
_LLAMA_CPP_AVAILABLE = False
|
|
28
|
+
_OPENAI_AVAILABLE = False
|
|
29
|
+
_OLLAMA_AVAILABLE = False
|
|
30
|
+
_OLLAMA_NATIVE_AVAILABLE = False
|
|
31
|
+
_TRANSFORMERS_AVAILABLE = False
|
|
32
|
+
_HUGGINGFACE_AVAILABLE = False
|
|
33
|
+
_MLX_AVAILABLE = False
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# LlamaCpp
|
|
37
|
+
try:
|
|
38
|
+
from .llama_cpp import LlamaCppBackend
|
|
39
|
+
_LLAMA_CPP_AVAILABLE = True
|
|
40
|
+
except ImportError:
|
|
41
|
+
LlamaCppBackend = None # type: ignore
|
|
42
|
+
|
|
43
|
+
# OpenAI
|
|
44
|
+
try:
|
|
45
|
+
from .openai_api import OpenAIBackend, OllamaBackend
|
|
46
|
+
_OPENAI_AVAILABLE = True
|
|
47
|
+
_OLLAMA_AVAILABLE = True
|
|
48
|
+
except ImportError:
|
|
49
|
+
OpenAIBackend = None # type: ignore
|
|
50
|
+
OllamaBackend = None # type: ignore
|
|
51
|
+
|
|
52
|
+
# Transformers
|
|
53
|
+
try:
|
|
54
|
+
from .transformers_hf import TransformersBackend
|
|
55
|
+
_TRANSFORMERS_AVAILABLE = True
|
|
56
|
+
except ImportError:
|
|
57
|
+
TransformersBackend = None # type: ignore
|
|
58
|
+
|
|
59
|
+
# HuggingFace Inference API
|
|
60
|
+
try:
|
|
61
|
+
from .huggingface import HuggingFaceBackend, HuggingFaceChatBackend
|
|
62
|
+
_HUGGINGFACE_AVAILABLE = True
|
|
63
|
+
except ImportError:
|
|
64
|
+
HuggingFaceBackend = None # type: ignore
|
|
65
|
+
HuggingFaceChatBackend = None # type: ignore
|
|
66
|
+
|
|
67
|
+
# Ollama Native
|
|
68
|
+
try:
|
|
69
|
+
from .ollama import OllamaNativeBackend
|
|
70
|
+
_OLLAMA_NATIVE_AVAILABLE = True
|
|
71
|
+
except ImportError:
|
|
72
|
+
OllamaNativeBackend = None # type: ignore
|
|
73
|
+
|
|
74
|
+
# MLX
|
|
75
|
+
try:
|
|
76
|
+
from .mlx_lm import MlxBackend
|
|
77
|
+
_MLX_AVAILABLE = True
|
|
78
|
+
except ImportError:
|
|
79
|
+
MlxBackend = None # type: ignore
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def is_backend_available(name: str) -> bool:
|
|
85
|
+
"""Check if a backend's dependencies are installed."""
|
|
86
|
+
if name == "llama_cpp":
|
|
87
|
+
try:
|
|
88
|
+
import llama_cpp
|
|
89
|
+
return True
|
|
90
|
+
except ImportError:
|
|
91
|
+
return False
|
|
92
|
+
elif name == "openai":
|
|
93
|
+
try:
|
|
94
|
+
import openai
|
|
95
|
+
return True
|
|
96
|
+
except ImportError:
|
|
97
|
+
return False
|
|
98
|
+
elif name == "transformers":
|
|
99
|
+
try:
|
|
100
|
+
import transformers
|
|
101
|
+
import torch
|
|
102
|
+
return True
|
|
103
|
+
except ImportError:
|
|
104
|
+
return False
|
|
105
|
+
elif name == "ollama":
|
|
106
|
+
try:
|
|
107
|
+
import openai
|
|
108
|
+
return True
|
|
109
|
+
except ImportError:
|
|
110
|
+
return False
|
|
111
|
+
elif name == "mlx":
|
|
112
|
+
try:
|
|
113
|
+
import mlx_lm
|
|
114
|
+
return True
|
|
115
|
+
except ImportError:
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
elif name == "ollama_native":
|
|
119
|
+
try:
|
|
120
|
+
import requests
|
|
121
|
+
return True
|
|
122
|
+
except ImportError:
|
|
123
|
+
return False
|
|
124
|
+
elif name in ("huggingface", "huggingface_chat"):
|
|
125
|
+
try:
|
|
126
|
+
import huggingface_hub
|
|
127
|
+
return True
|
|
128
|
+
except ImportError:
|
|
129
|
+
return False
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def get_available_backends() -> list[str]:
|
|
134
|
+
"""Get a list of currently available backends."""
|
|
135
|
+
backends = []
|
|
136
|
+
|
|
137
|
+
if is_backend_available("llama_cpp"):
|
|
138
|
+
backends.append("llama_cpp")
|
|
139
|
+
if is_backend_available("openai"):
|
|
140
|
+
backends.append("openai")
|
|
141
|
+
if is_backend_available("transformers"):
|
|
142
|
+
backends.append("transformers")
|
|
143
|
+
if is_backend_available("ollama"):
|
|
144
|
+
backends.append("ollama")
|
|
145
|
+
if is_backend_available("mlx"):
|
|
146
|
+
backends.append("mlx")
|
|
147
|
+
if is_backend_available("ollama_native"):
|
|
148
|
+
backends.append("ollama_native")
|
|
149
|
+
if is_backend_available("huggingface"):
|
|
150
|
+
backends.append("huggingface")
|
|
151
|
+
backends.append("huggingface_chat")
|
|
152
|
+
|
|
153
|
+
return backends
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def get_backend(backend_name: str) -> BaseBackend:
|
|
157
|
+
"""
|
|
158
|
+
Factory function to get a backend instance by name.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
backend_name: Name of the backend
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Backend instance (not loaded)
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
ValueError: If backend unknown or deps not installed
|
|
168
|
+
"""
|
|
169
|
+
backend_map = {
|
|
170
|
+
"llama_cpp": (LlamaCppBackend, _LLAMA_CPP_AVAILABLE),
|
|
171
|
+
"openai": (OpenAIBackend, _OPENAI_AVAILABLE),
|
|
172
|
+
"ollama": (OllamaBackend, _OLLAMA_AVAILABLE),
|
|
173
|
+
"ollama_native": (OllamaNativeBackend, _OLLAMA_NATIVE_AVAILABLE),
|
|
174
|
+
"transformers": (TransformersBackend, _TRANSFORMERS_AVAILABLE),
|
|
175
|
+
"huggingface": (HuggingFaceBackend, _HUGGINGFACE_AVAILABLE),
|
|
176
|
+
"huggingface_chat": (HuggingFaceChatBackend, _HUGGINGFACE_AVAILABLE),
|
|
177
|
+
"mlx": (MlxBackend, _MLX_AVAILABLE),
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if backend_name not in backend_map:
|
|
181
|
+
available = ", ".join(backend_map.keys())
|
|
182
|
+
raise ValueError(
|
|
183
|
+
f"Unknown backend: '{backend_name}'. "
|
|
184
|
+
f"Available backends: {available}"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
backend_class, is_available = backend_map[backend_name]
|
|
188
|
+
|
|
189
|
+
if not is_available or backend_class is None:
|
|
190
|
+
raise ValueError(
|
|
191
|
+
f"Backend '{backend_name}' is not available. "
|
|
192
|
+
f"Required dependencies are not installed. "
|
|
193
|
+
f"Available backends: {', '.join(get_available_backends())}"
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
return backend_class()
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# Stub backend for testing
|
|
200
|
+
class StubBackend(BaseBackend):
|
|
201
|
+
"""Simple stub backend for testing."""
|
|
202
|
+
|
|
203
|
+
_name = "stub"
|
|
204
|
+
|
|
205
|
+
def load(self, config: BackendConfig) -> None:
|
|
206
|
+
self._config = config
|
|
207
|
+
self._model_id = config.model_id
|
|
208
|
+
self._loaded = True
|
|
209
|
+
|
|
210
|
+
def generate(
|
|
211
|
+
self,
|
|
212
|
+
prompt: str,
|
|
213
|
+
max_tokens: int,
|
|
214
|
+
temperature: float,
|
|
215
|
+
top_p: float,
|
|
216
|
+
stop: list[str] | None = None,
|
|
217
|
+
) -> BackendResult:
|
|
218
|
+
return BackendResult(
|
|
219
|
+
text="[STUB RESPONSE]",
|
|
220
|
+
tokens_in=self._estimate_tokens(prompt),
|
|
221
|
+
tokens_out=10,
|
|
222
|
+
model_id=self._model_id,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
__all__ = [
|
|
227
|
+
# Base classes
|
|
228
|
+
"BackendError",
|
|
229
|
+
"BackendConfig",
|
|
230
|
+
"BackendResult",
|
|
231
|
+
"ModelBackend",
|
|
232
|
+
"BaseBackend",
|
|
233
|
+
# Backend implementations
|
|
234
|
+
"LlamaCppBackend",
|
|
235
|
+
"OpenAIBackend",
|
|
236
|
+
"OllamaBackend",
|
|
237
|
+
"OllamaNativeBackend",
|
|
238
|
+
"TransformersBackend",
|
|
239
|
+
"HuggingFaceBackend",
|
|
240
|
+
"HuggingFaceChatBackend",
|
|
241
|
+
"MlxBackend",
|
|
242
|
+
"StubBackend",
|
|
243
|
+
# Utilities
|
|
244
|
+
"is_backend_available",
|
|
245
|
+
"get_available_backends",
|
|
246
|
+
"get_backend",
|
|
247
|
+
]
|