neuralnode 2.0.6__tar.gz → 2.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {neuralnode-2.0.6 → neuralnode-2.0.8}/PKG-INFO +1 -1
- {neuralnode-2.0.6 → neuralnode-2.0.8}/pyproject.toml +1 -1
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/__init__.py +3 -2
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/__init__.py +3 -2
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/horus.py +66 -19
- {neuralnode-2.0.6 → neuralnode-2.0.8}/.env.example +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/.github/workflows/tests.yml +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/Dockerfile +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/LICENSE +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/README.md +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/docker-compose.yml +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/docs/documentation.md +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/docs/ecosystem_plan.md +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/docs/replica_voice_ids.csv +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/docs/replica_voice_ids.md +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/docs/telegram_guide.md +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/agent_with_tools.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/basic_chat.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/01_basic_usage.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/02_with_token.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/03_one_liner.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/04_custom_cache.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/05_4bit_quantization.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/06_8bit_quantization.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/07_multi_gpu.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/08_flash_attention.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/09_data_types.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/10_generation_params.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/11_streaming.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/12_chat_templates.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/13_offline_mode.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/14_force_download.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/15_model_info.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/16_cpu_offloading.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/17_cpu_only.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/18_production_setup.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/19_gguf_4bit.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/20_gguf_5bit.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/21_gguf_6bit.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/22_gguf_8bit.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/23_gguf_16bit.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/24_list_models.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/25_interactive_chat.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_codes_camples/README.md +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_download_guide.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_examples.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_tq_ready_gguf.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/horus_transformers_features.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/local_models.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/neuralnode_v21_complete_demo.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/shade_model_with_tools.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/telegram_bot_demo.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/thinking_mode_example.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/tts_demo.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/turboquant_example.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/examples/v3_features.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/horus_chat_voice.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/neuralnode_horus_replica_telegram.ipynb +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/nn.md +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/publish.bat +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/publish.sh +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/replica_output_85218.mp3 +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/requirements_shade.txt +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/scripts/setup.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/debug_import.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/agents/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/chains/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/config/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/core/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/core/openai_blocker.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/diagnostics/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/integrations/discord.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/integrations/slack.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/integrations/telegram.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/integrations/whatsapp.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/memory/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/memory/advanced.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/prompts/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/base.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat/ai21.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat/anthropic.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat/cohere.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat/deepseek.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat/fireworks.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat/google.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat/groq.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat/mistral.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat/perplexity.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat/together.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/chat_models.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/embeddings.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/local/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/local_providers.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/text_generation.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/providers/universal_local.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/rag/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/rag/loaders.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/reasoning/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/replica.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/speech/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/thinking.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/tools/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/tools/advanced.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/tools/multisearch.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/tools/system/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/tools/system/operations.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/tools/web/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/tts/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/turboquant.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/utils/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/utils/dependencies.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/utils/logger.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/utils/metrics.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/vectorstores/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/neuralnode/vision/__init__.py +0 -0
- {neuralnode-2.0.6 → neuralnode-2.0.8}/src/nn/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: neuralnode
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.8
|
|
4
4
|
Summary: Comprehensive AI Framework with 50+ LLM Providers, Advanced Agents, Chains, Memory, RAG, and 100+ Tools
|
|
5
5
|
Project-URL: Homepage, https://assem.cloud/
|
|
6
6
|
Project-URL: Documentation, https://neuralnode.readthedocs.io
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "neuralnode"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.8"
|
|
8
8
|
description = "Comprehensive AI Framework with 50+ LLM Providers, Advanced Agents, Chains, Memory, RAG, and 100+ Tools"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -42,7 +42,7 @@ Quick Start::
|
|
|
42
42
|
text = sr.listen()
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
__version__ = "2.0.
|
|
45
|
+
__version__ = "2.0.8"
|
|
46
46
|
__author__ = "NeuralNode Contributors"
|
|
47
47
|
|
|
48
48
|
# ── Core types ────────────────────────────────────────────────────────────────
|
|
@@ -82,6 +82,7 @@ from .providers import (
|
|
|
82
82
|
TransformersProvider,
|
|
83
83
|
UniversalLocalProvider,
|
|
84
84
|
# Custom models
|
|
85
|
+
Horus,
|
|
85
86
|
HorusModel,
|
|
86
87
|
HorusProvider,
|
|
87
88
|
)
|
|
@@ -647,7 +648,7 @@ __all__ = [
|
|
|
647
648
|
"OllamaProvider", "AnthropicProvider", "GoogleProvider",
|
|
648
649
|
"GroqProvider", "MistralProvider", "CohereProvider", "DeepSeekProvider",
|
|
649
650
|
"LlamaCppProvider", "TransformersProvider", "UniversalLocalProvider",
|
|
650
|
-
"HorusModel", "HorusProvider",
|
|
651
|
+
"Horus", "HorusModel", "HorusProvider",
|
|
651
652
|
|
|
652
653
|
# Memory
|
|
653
654
|
"ConversationMemory", "FileMemory", "SlidingWindowMemory", "SummaryMemory",
|
|
@@ -108,10 +108,11 @@ from .universal_local import UniversalLocalProvider, load_local_model
|
|
|
108
108
|
|
|
109
109
|
# ── Custom Model Providers ─────────────────────────────────────────────────────
|
|
110
110
|
try:
|
|
111
|
-
from .horus import HorusProvider, HorusModel, load_horus
|
|
111
|
+
from .horus import HorusProvider, HorusModel, Horus, load_horus
|
|
112
112
|
except ImportError:
|
|
113
113
|
HorusProvider = None # type: ignore
|
|
114
114
|
HorusModel = None # type: ignore
|
|
115
|
+
Horus = None # type: ignore
|
|
115
116
|
load_horus = None # type: ignore
|
|
116
117
|
|
|
117
118
|
# MLX Provider (Apple Silicon — optional)
|
|
@@ -234,5 +235,5 @@ __all__ = [
|
|
|
234
235
|
"UniversalLocalProvider", "load_local_model",
|
|
235
236
|
|
|
236
237
|
# Custom Models
|
|
237
|
-
"HorusProvider", "HorusModel", "load_horus",
|
|
238
|
+
"HorusProvider", "HorusModel", "Horus", "load_horus",
|
|
238
239
|
]
|
|
@@ -141,49 +141,56 @@ class HorusProvider(BaseLLMProvider):
|
|
|
141
141
|
|
|
142
142
|
HORUS_MODELS: Dict[str, Dict[str, Any]] = {
|
|
143
143
|
"tokenaii/horus": {
|
|
144
|
-
"name": "Horus
|
|
144
|
+
"name": "Horus-1.0-4B",
|
|
145
|
+
"official_name": "Horus-1.0-4B",
|
|
145
146
|
"size": "4B",
|
|
146
147
|
"type": "safetensors",
|
|
147
148
|
"repo_id": "tokenaii/horus",
|
|
148
149
|
"subfolder": "Horus-1.0-4B",
|
|
149
150
|
},
|
|
150
151
|
"tokenaii/horus/Horus-1.0-4B": {
|
|
151
|
-
"name": "Horus
|
|
152
|
+
"name": "Horus-1.0-4B",
|
|
153
|
+
"official_name": "Horus-1.0-4B",
|
|
152
154
|
"size": "4B",
|
|
153
155
|
"type": "safetensors",
|
|
154
156
|
"repo_id": "tokenaii/horus",
|
|
155
157
|
"subfolder": "Horus-1.0-4B",
|
|
156
158
|
},
|
|
157
159
|
"tokenaii/Hours-1.0-4B-GGUF/Horus-1.0-4B-Q4_K_M.gguf": {
|
|
158
|
-
"name": "Horus
|
|
160
|
+
"name": "Horus-1.0-4B-Q4_K_M.gguf",
|
|
161
|
+
"official_name": "Horus-1.0-4B-Q4_K_M.gguf",
|
|
159
162
|
"size": "4B",
|
|
160
163
|
"type": "gguf",
|
|
161
164
|
"quantization": "Q4_K_M",
|
|
162
165
|
"file_size": "2.78 GB",
|
|
163
166
|
},
|
|
164
167
|
"tokenaii/Hours-1.0-4B-GGUF/Horus-1.0-4B-Q5_K_M.gguf": {
|
|
165
|
-
"name": "Horus
|
|
168
|
+
"name": "Horus-1.0-4B-Q5_K_M.gguf",
|
|
169
|
+
"official_name": "Horus-1.0-4B-Q5_K_M.gguf",
|
|
166
170
|
"size": "4B",
|
|
167
171
|
"type": "gguf",
|
|
168
172
|
"quantization": "Q5_K_M",
|
|
169
173
|
"file_size": "3.23 GB",
|
|
170
174
|
},
|
|
171
175
|
"tokenaii/Hours-1.0-4B-GGUF/Horus-1.0-4B-Q6_K.gguf": {
|
|
172
|
-
"name": "Horus
|
|
176
|
+
"name": "Horus-1.0-4B-Q6_K.gguf",
|
|
177
|
+
"official_name": "Horus-1.0-4B-Q6_K.gguf",
|
|
173
178
|
"size": "4B",
|
|
174
179
|
"type": "gguf",
|
|
175
180
|
"quantization": "Q6_K",
|
|
176
181
|
"file_size": "3.71 GB",
|
|
177
182
|
},
|
|
178
183
|
"tokenaii/Hours-1.0-4B-GGUF/Horus-1.0-4B-Q8_0.gguf": {
|
|
179
|
-
"name": "Horus
|
|
184
|
+
"name": "Horus-1.0-4B-Q8_0.gguf",
|
|
185
|
+
"official_name": "Horus-1.0-4B-Q8_0.gguf",
|
|
180
186
|
"size": "4B",
|
|
181
187
|
"type": "gguf",
|
|
182
188
|
"quantization": "Q8_0",
|
|
183
189
|
"file_size": "4.8 GB",
|
|
184
190
|
},
|
|
185
191
|
"tokenaii/Hours-1.0-4B-GGUF/Horus-1.0-4B-F16.gguf": {
|
|
186
|
-
"name": "Horus
|
|
192
|
+
"name": "Horus-1.0-4B-F16.gguf",
|
|
193
|
+
"official_name": "Horus-1.0-4B-F16.gguf",
|
|
187
194
|
"size": "4B",
|
|
188
195
|
"type": "gguf",
|
|
189
196
|
"quantization": "F16",
|
|
@@ -213,6 +220,7 @@ class HorusProvider(BaseLLMProvider):
|
|
|
213
220
|
self,
|
|
214
221
|
model_id: str = "tokenaii/horus",
|
|
215
222
|
device: Optional[str] = None,
|
|
223
|
+
n_gpu_layers: Optional[int] = None,
|
|
216
224
|
torch_dtype: Optional[Any] = None,
|
|
217
225
|
load_in_8bit: bool = False,
|
|
218
226
|
load_in_4bit: bool = False,
|
|
@@ -276,6 +284,7 @@ class HorusProvider(BaseLLMProvider):
|
|
|
276
284
|
|
|
277
285
|
self.model_id = model_id
|
|
278
286
|
self.device = device or self._resolve_device()
|
|
287
|
+
self.n_gpu_layers = n_gpu_layers
|
|
279
288
|
self.torch_dtype = self._resolve_torch_dtype(torch_dtype)
|
|
280
289
|
self.load_in_8bit = load_in_8bit
|
|
281
290
|
self.load_in_4bit = load_in_4bit
|
|
@@ -395,6 +404,7 @@ class HorusProvider(BaseLLMProvider):
|
|
|
395
404
|
def _build_model_info(self) -> Dict[str, Any]:
|
|
396
405
|
base = dict(self.HORUS_MODELS.get(self.model_id, {}))
|
|
397
406
|
base.setdefault("name", self.model_id.split("/")[-1])
|
|
407
|
+
base.setdefault("official_name", self._infer_official_name(self.model_id))
|
|
398
408
|
base.setdefault("size", "Unknown")
|
|
399
409
|
base.setdefault("type", "gguf" if self._is_gguf_model_id(self.model_id) else "safetensors")
|
|
400
410
|
if base.get("type") == "gguf" and "quantization" not in base:
|
|
@@ -456,6 +466,7 @@ class HorusProvider(BaseLLMProvider):
|
|
|
456
466
|
"model_path": model_path,
|
|
457
467
|
"n_ctx": HORUS_CONTEXT_WINDOW,
|
|
458
468
|
"verbose": False,
|
|
469
|
+
"n_gpu_layers": self._resolve_n_gpu_layers(),
|
|
459
470
|
}
|
|
460
471
|
turbo_kwargs = self._resolve_gguf_turboquant_kwargs()
|
|
461
472
|
llama_kwargs.update(turbo_kwargs)
|
|
@@ -466,16 +477,19 @@ class HorusProvider(BaseLLMProvider):
|
|
|
466
477
|
if turbo_kwargs:
|
|
467
478
|
self._gguf_kv_quant_applied = True
|
|
468
479
|
except Exception:
|
|
469
|
-
# Some llama.cpp builds/models reject KV cache dtype overrides
|
|
470
|
-
#
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
480
|
+
# Some llama.cpp builds/models reject KV cache dtype overrides
|
|
481
|
+
# or GPU layer allocation. Fall back progressively without
|
|
482
|
+
# exposing warnings to end users.
|
|
483
|
+
base_kwargs = dict(llama_kwargs)
|
|
484
|
+
base_kwargs.pop("type_k", None)
|
|
485
|
+
base_kwargs.pop("type_v", None)
|
|
486
|
+
self._gguf_kv_quant_applied = False
|
|
487
|
+
try:
|
|
475
488
|
self.model = Llama(**base_kwargs)
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
489
|
+
except Exception:
|
|
490
|
+
cpu_kwargs = dict(base_kwargs)
|
|
491
|
+
cpu_kwargs["n_gpu_layers"] = 0
|
|
492
|
+
self.model = Llama(**cpu_kwargs)
|
|
479
493
|
return self
|
|
480
494
|
|
|
481
495
|
def _resolve_gguf_turboquant_kwargs(self) -> Dict[str, Any]:
|
|
@@ -516,6 +530,22 @@ class HorusProvider(BaseLLMProvider):
|
|
|
516
530
|
result["type_v"] = type_v
|
|
517
531
|
return result
|
|
518
532
|
|
|
533
|
+
def _resolve_n_gpu_layers(self) -> int:
|
|
534
|
+
if self.n_gpu_layers is not None:
|
|
535
|
+
return int(self.n_gpu_layers)
|
|
536
|
+
return 0 if self.device.lower() == "cpu" else -1
|
|
537
|
+
|
|
538
|
+
@staticmethod
|
|
539
|
+
def _infer_official_name(model_id: str) -> str:
|
|
540
|
+
parts = model_id.split("/")
|
|
541
|
+
if model_id.lower().endswith(".gguf"):
|
|
542
|
+
return parts[-1]
|
|
543
|
+
if len(parts) >= 3 and parts[-2] == "horus":
|
|
544
|
+
return parts[-1]
|
|
545
|
+
if model_id == "tokenaii/horus":
|
|
546
|
+
return "Horus-1.0-4B"
|
|
547
|
+
return parts[-1]
|
|
548
|
+
|
|
519
549
|
def _load_transformers(self) -> "HorusProvider":
|
|
520
550
|
if not TRANSFORMERS_AVAILABLE and self.auto_install_deps:
|
|
521
551
|
ensure_feature_dependencies("horus_transformers", auto_install=True)
|
|
@@ -562,7 +592,6 @@ class HorusProvider(BaseLLMProvider):
|
|
|
562
592
|
"use_safetensors": self.use_safetensors,
|
|
563
593
|
"proxies": self.proxies,
|
|
564
594
|
"force_download": self.force_download,
|
|
565
|
-
"resume_download": self.resume_download,
|
|
566
595
|
}
|
|
567
596
|
if self.token:
|
|
568
597
|
model_kwargs["token"] = self.token
|
|
@@ -1047,6 +1076,7 @@ class HorusProvider(BaseLLMProvider):
|
|
|
1047
1076
|
"id": self.model_id,
|
|
1048
1077
|
**self._model_info,
|
|
1049
1078
|
"device": self.device,
|
|
1079
|
+
"n_gpu_layers": self._resolve_n_gpu_layers() if self._is_gguf_model_id(self.model_id) else None,
|
|
1050
1080
|
"device_map": self.device_map,
|
|
1051
1081
|
"torch_dtype": str(self.torch_dtype),
|
|
1052
1082
|
"loaded": self.model is not None,
|
|
@@ -1215,6 +1245,7 @@ class HorusProvider(BaseLLMProvider):
|
|
|
1215
1245
|
quant = quant_match.group(1).upper() if quant_match else "GGUF"
|
|
1216
1246
|
cls.HORUS_MODELS[model_id] = {
|
|
1217
1247
|
"name": name or f"Horus {size} ({quant})",
|
|
1248
|
+
"official_name": filename,
|
|
1218
1249
|
"size": size,
|
|
1219
1250
|
"type": "gguf",
|
|
1220
1251
|
"quantization": quant,
|
|
@@ -1295,11 +1326,27 @@ def print_model_list():
|
|
|
1295
1326
|
class HorusModel(HorusProvider):
|
|
1296
1327
|
"""Backward-compatible Horus model alias."""
|
|
1297
1328
|
|
|
1298
|
-
|
|
1329
|
+
class model:
|
|
1330
|
+
@staticmethod
|
|
1331
|
+
def List() -> List[Dict[str, str]]:
|
|
1332
|
+
"""Return available Horus models with exact official Hugging Face names."""
|
|
1333
|
+
items: List[Dict[str, str]] = []
|
|
1334
|
+
for model_id, info in HorusProvider.list_available_models().items():
|
|
1335
|
+
items.append(
|
|
1336
|
+
{
|
|
1337
|
+
"model_id": model_id,
|
|
1338
|
+
"official_name": str(info.get("official_name", HorusProvider._infer_official_name(model_id))),
|
|
1339
|
+
"type": str(info.get("type", "unknown")),
|
|
1340
|
+
}
|
|
1341
|
+
)
|
|
1342
|
+
return items
|
|
1343
|
+
|
|
1344
|
+
|
|
1345
|
+
Horus = HorusModel
|
|
1299
1346
|
|
|
1300
1347
|
|
|
1301
1348
|
def load_horus(model_id: str = "tokenaii/horus", **kwargs) -> HorusProvider:
|
|
1302
1349
|
return HorusProvider(model_id=model_id, **kwargs).load()
|
|
1303
1350
|
|
|
1304
1351
|
|
|
1305
|
-
__all__ = ["HorusProvider", "HorusModel", "load_horus", "print_model_list"]
|
|
1352
|
+
__all__ = ["HorusProvider", "HorusModel", "Horus", "load_horus", "print_model_list"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|