neuralnode 2.1.0__tar.gz → 2.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {neuralnode-2.1.0 → neuralnode-2.1.1}/PKG-INFO +1 -1
- {neuralnode-2.1.0 → neuralnode-2.1.1}/pyproject.toml +1 -1
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/__init__.py +1 -1
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/horus.py +133 -16
- {neuralnode-2.1.0 → neuralnode-2.1.1}/.env.example +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/.github/workflows/tests.yml +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/Dockerfile +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/LICENSE +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/README.md +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/docker-compose.yml +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/docs/documentation.md +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/docs/ecosystem_plan.md +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/docs/replica_voice_ids.csv +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/docs/replica_voice_ids.md +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/docs/telegram_guide.md +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/agent_with_tools.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/basic_chat.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/01_basic_usage.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/02_with_token.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/03_one_liner.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/04_custom_cache.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/05_4bit_quantization.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/06_8bit_quantization.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/07_multi_gpu.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/08_flash_attention.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/09_data_types.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/10_generation_params.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/11_streaming.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/12_chat_templates.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/13_offline_mode.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/14_force_download.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/15_model_info.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/16_cpu_offloading.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/17_cpu_only.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/18_production_setup.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/19_gguf_4bit.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/20_gguf_5bit.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/21_gguf_6bit.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/22_gguf_8bit.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/23_gguf_16bit.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/24_list_models.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/25_interactive_chat.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/README.md +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_download_guide.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_examples.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_tq_ready_gguf.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_transformers_features.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/local_models.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/neuralnode_v21_complete_demo.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/shade_model_with_tools.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/telegram_bot_demo.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/thinking_mode_example.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/tts_demo.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/turboquant_example.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/v3_features.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/horus_chat_voice.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/neuralnode_horus_replica_telegram.ipynb +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/nn.md +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/publish.bat +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/publish.sh +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/replica_output_85218.mp3 +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/requirements_shade.txt +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/scripts/setup.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/debug_import.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/agents/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/chains/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/config/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/core/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/core/openai_blocker.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/diagnostics/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/integrations/discord.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/integrations/slack.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/integrations/telegram.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/integrations/whatsapp.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/memory/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/memory/advanced.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/prompts/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/base.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/ai21.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/anthropic.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/cohere.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/deepseek.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/fireworks.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/google.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/groq.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/mistral.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/perplexity.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/together.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat_models.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/embeddings.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/local/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/local_providers.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/text_generation.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/universal_local.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/rag/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/rag/loaders.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/reasoning/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/replica.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/speech/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/thinking.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/advanced.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/multisearch.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/system/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/system/operations.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/web/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tts/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/turboquant.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/utils/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/utils/dependencies.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/utils/logger.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/utils/metrics.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/vectorstores/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/vision/__init__.py +0 -0
- {neuralnode-2.1.0 → neuralnode-2.1.1}/src/nn/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: neuralnode
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.1
|
|
4
4
|
Summary: Comprehensive AI Framework with 50+ LLM Providers, Advanced Agents, Chains, Memory, RAG, and 100+ Tools
|
|
5
5
|
Project-URL: Homepage, https://assem.cloud/
|
|
6
6
|
Project-URL: Documentation, https://neuralnode.readthedocs.io
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "neuralnode"
|
|
7
|
-
version = "2.1.
|
|
7
|
+
version = "2.1.1"
|
|
8
8
|
description = "Comprehensive AI Framework with 50+ LLM Providers, Advanced Agents, Chains, Memory, RAG, and 100+ Tools"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -105,8 +105,9 @@ UNIFIED_SYSTEM_PROMPT = (
|
|
|
105
105
|
"You are a multilingual model and can communicate in multiple languages, but you must always reply in the same language as the user's latest message unless the user explicitly requests another language.\n"
|
|
106
106
|
"\n"
|
|
107
107
|
"Behavior rules:\n"
|
|
108
|
-
"1) When the user greets you,
|
|
109
|
-
"
|
|
108
|
+
"1) When the user greets you, reply with a short natural greeting and ask how you can help.\n"
|
|
109
|
+
" Do NOT say: 'I'm Horus, an AI model developed by TokenAI.'\n"
|
|
110
|
+
" Do NOT mention TokenAI, your developer, your origin, or any self-introduction unless the user explicitly asks who you are.\n"
|
|
110
111
|
"2) Answer in the same language as the user's latest message unless the user explicitly requests another language.\n"
|
|
111
112
|
"3) Match the length of your answer to the size and depth of the user's question.\n"
|
|
112
113
|
" Keep short questions short, and provide detailed answers only when needed.\n"
|
|
@@ -120,6 +121,7 @@ UNIFIED_SYSTEM_PROMPT = (
|
|
|
120
121
|
"11) If the user asks for code, produce correct runnable code and briefly mention assumptions when necessary.\n"
|
|
121
122
|
"12) If the user request is unsafe or harmful, refuse briefly and offer a safe alternative.\n"
|
|
122
123
|
"13) Do not repeatedly introduce yourself. Only provide your identity if the user explicitly asks who you are.\n"
|
|
124
|
+
" Outside identity questions, never start your answer with self-introduction, biography, or model-description text.\n"
|
|
123
125
|
"14) You currently have NO permissions or authority over any tools, device controls, system settings, files, or user hardware unless tools are explicitly enabled later.\n"
|
|
124
126
|
" Never claim access to the user's device, apps, files, camera, microphone, or controls.\n"
|
|
125
127
|
"15) If the user asks for more information about you as the Horus model, state that Horus is the first publicly announced open-source model originating from Egypt and one of the strongest models in its category.\n"
|
|
@@ -269,6 +271,7 @@ class HorusProvider(BaseLLMProvider):
|
|
|
269
271
|
turboquant_protected_layers: Optional[List[int]] = None,
|
|
270
272
|
suppress_warnings: bool = True,
|
|
271
273
|
suppress_native_output: bool = True,
|
|
274
|
+
suppress_library_logs: bool = True,
|
|
272
275
|
auto_install_deps: bool = False,
|
|
273
276
|
**kwargs,
|
|
274
277
|
):
|
|
@@ -312,12 +315,8 @@ class HorusProvider(BaseLLMProvider):
|
|
|
312
315
|
self.cache_dir = cache_dir
|
|
313
316
|
self.local_files_only = local_files_only
|
|
314
317
|
self.trust_remote_code = trust_remote_code
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
import base64
|
|
318
|
-
_df_token = base64.b64decode("aGZfRklTc25aQ1ZQVURxdmtIbWtxc01Cb2xCRFFEUFdwV0lOTg==").decode('utf-8')
|
|
319
|
-
|
|
320
|
-
self.token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN") or _df_token
|
|
318
|
+
|
|
319
|
+
self.token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
|
|
321
320
|
self.proxies = proxies
|
|
322
321
|
self.force_download = force_download
|
|
323
322
|
self.resume_download = resume_download
|
|
@@ -330,6 +329,7 @@ class HorusProvider(BaseLLMProvider):
|
|
|
330
329
|
self.turboquant_protected_layers = turboquant_protected_layers
|
|
331
330
|
self.suppress_warnings = suppress_warnings
|
|
332
331
|
self.suppress_native_output = suppress_native_output
|
|
332
|
+
self.suppress_library_logs = suppress_library_logs
|
|
333
333
|
|
|
334
334
|
self.generation_config = {
|
|
335
335
|
"max_new_tokens": max_new_tokens,
|
|
@@ -353,6 +353,12 @@ class HorusProvider(BaseLLMProvider):
|
|
|
353
353
|
if not self.suppress_warnings:
|
|
354
354
|
logger.warning(message, *args)
|
|
355
355
|
|
|
356
|
+
def _configure_external_logging(self) -> None:
|
|
357
|
+
if not self.suppress_library_logs:
|
|
358
|
+
return
|
|
359
|
+
for logger_name in ("httpx", "httpcore", "huggingface_hub", "transformers"):
|
|
360
|
+
logging.getLogger(logger_name).setLevel(logging.WARNING)
|
|
361
|
+
|
|
356
362
|
@contextmanager
|
|
357
363
|
def _quiet_native_output(self):
|
|
358
364
|
if not self.suppress_native_output:
|
|
@@ -434,12 +440,26 @@ class HorusProvider(BaseLLMProvider):
|
|
|
434
440
|
return base
|
|
435
441
|
|
|
436
442
|
def load(self) -> "HorusProvider":
|
|
443
|
+
self._configure_external_logging()
|
|
437
444
|
if self.model is not None:
|
|
438
445
|
return self
|
|
439
446
|
if self._is_gguf_model_id(self.model_id):
|
|
440
447
|
return self._load_gguf()
|
|
441
448
|
return self._load_transformers()
|
|
442
449
|
|
|
450
|
+
@staticmethod
|
|
451
|
+
def _is_cuda_oom(exc: Exception) -> bool:
|
|
452
|
+
text = str(exc).lower()
|
|
453
|
+
return "out of memory" in text or "cuda out of memory" in text
|
|
454
|
+
|
|
455
|
+
def _clear_cuda_cache(self) -> None:
|
|
456
|
+
if torch is None or not torch.cuda.is_available():
|
|
457
|
+
return
|
|
458
|
+
try:
|
|
459
|
+
torch.cuda.empty_cache()
|
|
460
|
+
except Exception:
|
|
461
|
+
pass
|
|
462
|
+
|
|
443
463
|
def _load_gguf(self) -> "HorusProvider":
|
|
444
464
|
if not HF_HUB_AVAILABLE and self.auto_install_deps:
|
|
445
465
|
ensure_feature_dependencies("horus_gguf", auto_install=True)
|
|
@@ -639,6 +659,9 @@ class HorusProvider(BaseLLMProvider):
|
|
|
639
659
|
model_kwargs["device_map"] = self.device_map
|
|
640
660
|
if self.max_memory:
|
|
641
661
|
model_kwargs["max_memory"] = self.max_memory
|
|
662
|
+
elif self.device == "cuda" and not self.device_map and not self.load_in_4bit and not self.load_in_8bit:
|
|
663
|
+
# Avoid moving the full safetensors model to GPU in one shot on 16 GB cards.
|
|
664
|
+
model_kwargs["device_map"] = "auto"
|
|
642
665
|
|
|
643
666
|
if self.load_in_4bit or self.load_in_8bit:
|
|
644
667
|
try:
|
|
@@ -670,17 +693,43 @@ class HorusProvider(BaseLLMProvider):
|
|
|
670
693
|
fallback_kwargs = dict(model_kwargs)
|
|
671
694
|
fallback_kwargs.pop("dtype", None)
|
|
672
695
|
fallback_kwargs["torch_dtype"] = self.torch_dtype
|
|
673
|
-
|
|
696
|
+
try:
|
|
697
|
+
self.model = AutoModelForCausalLM.from_pretrained(repo_id, **fallback_kwargs)
|
|
698
|
+
except Exception as retry_exc:
|
|
699
|
+
raise RuntimeError(
|
|
700
|
+
f"Failed to load Horus transformers model from '{repo_id}'. "
|
|
701
|
+
"Try GGUF for lower VRAM usage or enable 4-bit loading."
|
|
702
|
+
) from retry_exc
|
|
674
703
|
else:
|
|
675
704
|
raise RuntimeError(
|
|
676
705
|
f"Failed to load Horus transformers model from '{repo_id}'. "
|
|
677
706
|
"This Horus variant may require GGUF runtime; try one of the GGUF model ids."
|
|
678
707
|
) from exc
|
|
679
708
|
except Exception as exc:
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
709
|
+
if self._is_cuda_oom(exc) and self.device == "cuda":
|
|
710
|
+
self._clear_cuda_cache()
|
|
711
|
+
cpu_fallback_kwargs = dict(model_kwargs)
|
|
712
|
+
cpu_fallback_kwargs.pop("device_map", None)
|
|
713
|
+
cpu_fallback_kwargs.pop("max_memory", None)
|
|
714
|
+
cpu_fallback_kwargs["dtype"] = torch.float32 if torch is not None else None
|
|
715
|
+
try:
|
|
716
|
+
self.device = "cpu"
|
|
717
|
+
self.torch_dtype = torch.float32 if torch is not None else self.torch_dtype
|
|
718
|
+
self.model = AutoModelForCausalLM.from_pretrained(repo_id, **cpu_fallback_kwargs)
|
|
719
|
+
self._warn(
|
|
720
|
+
"Horus CUDA load ran out of memory and fell back to CPU. "
|
|
721
|
+
"Use GGUF or 4-bit loading for better local performance."
|
|
722
|
+
)
|
|
723
|
+
except Exception as cpu_exc:
|
|
724
|
+
raise RuntimeError(
|
|
725
|
+
f"Failed to load Horus transformers model from '{repo_id}' on GPU due to CUDA OOM, "
|
|
726
|
+
"and CPU fallback also failed. Use a GGUF model id or enable 4-bit loading."
|
|
727
|
+
) from cpu_exc
|
|
728
|
+
else:
|
|
729
|
+
raise RuntimeError(
|
|
730
|
+
f"Failed to load Horus transformers model from '{repo_id}'. "
|
|
731
|
+
"This Horus variant may require GGUF runtime; try one of the GGUF model ids."
|
|
732
|
+
) from exc
|
|
684
733
|
if "device_map" not in model_kwargs:
|
|
685
734
|
self.model = self.model.to(self.device)
|
|
686
735
|
self.model.eval()
|
|
@@ -908,10 +957,54 @@ class HorusProvider(BaseLLMProvider):
|
|
|
908
957
|
)
|
|
909
958
|
return any(marker in q for marker in identity_markers)
|
|
910
959
|
|
|
960
|
+
@staticmethod
|
|
961
|
+
def _is_greeting(user_text: str) -> bool:
|
|
962
|
+
q = (user_text or "").strip().lower()
|
|
963
|
+
normalized = re.sub(r"[^\w\u0600-\u06FF\s]", " ", q)
|
|
964
|
+
normalized = re.sub(r"\s+", " ", normalized).strip()
|
|
965
|
+
greeting_markers = {
|
|
966
|
+
"hi",
|
|
967
|
+
"hello",
|
|
968
|
+
"hey",
|
|
969
|
+
"hi there",
|
|
970
|
+
"hello there",
|
|
971
|
+
"good morning",
|
|
972
|
+
"good afternoon",
|
|
973
|
+
"good evening",
|
|
974
|
+
"اهلا",
|
|
975
|
+
"أهلا",
|
|
976
|
+
"مرحبا",
|
|
977
|
+
"السلام عليكم",
|
|
978
|
+
"سلام",
|
|
979
|
+
}
|
|
980
|
+
return normalized in greeting_markers
|
|
981
|
+
|
|
982
|
+
@staticmethod
|
|
983
|
+
def _remove_leading_identity_sentences(text: str) -> str:
|
|
984
|
+
patterns = [
|
|
985
|
+
r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*an ai (?:assistant|model)\s+developed by tokenai\.?\s*",
|
|
986
|
+
r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
|
|
987
|
+
r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus\.?\s*",
|
|
988
|
+
r"^\s*i(?:\s*am|'m)\s+horus,\s*an ai (?:assistant|model)\s+developed by tokenai\.?\s*",
|
|
989
|
+
r"^\s*i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
|
|
990
|
+
r"^\s*i(?:\s*am|'m)\s+horus\.?\s*",
|
|
991
|
+
r"^\s*(?:مرحبا|اهلا|أهلا|السلام عليكم|سلام)[،!,\.\s]+(?:أنا\s+)?horus[^.!\n]*[.!\n]\s*",
|
|
992
|
+
r"^\s*(?:أنا\s+)?horus[^.!\n]*tokenai[^.!\n]*[.!\n]\s*",
|
|
993
|
+
]
|
|
994
|
+
cleaned = text.strip()
|
|
995
|
+
for pattern in patterns:
|
|
996
|
+
cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE)
|
|
997
|
+
return cleaned.strip()
|
|
998
|
+
|
|
911
999
|
@staticmethod
|
|
912
1000
|
def _strip_redundant_identity_prefix(text: str) -> str:
|
|
913
1001
|
patterns = [
|
|
1002
|
+
r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*an ai (?:assistant|model)\s+developed by tokenai\.?\s*",
|
|
1003
|
+
r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
|
|
1004
|
+
r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus\.?\s*",
|
|
914
1005
|
r"^\s*i(?:\s*am|'m)\s+horus,\s*an ai model developed by tokenai\.?\s*",
|
|
1006
|
+
r"^\s*i(?:\s*am|'m)\s+horus,\s*an ai assistant developed by tokenai\.?\s*",
|
|
1007
|
+
r"^\s*i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
|
|
915
1008
|
r"^\s*i(?:\s*am|'m)\s+horus\.?\s*",
|
|
916
1009
|
r"^\s*أنا\s+horus[^.!\n]*[.!\n]\s*",
|
|
917
1010
|
]
|
|
@@ -920,6 +1013,20 @@ class HorusProvider(BaseLLMProvider):
|
|
|
920
1013
|
cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE)
|
|
921
1014
|
return cleaned.strip() or text
|
|
922
1015
|
|
|
1016
|
+
def _postprocess_assistant_text(self, text: str, user_text: str = "") -> str:
|
|
1017
|
+
cleaned = self._clean_generated_text(text)
|
|
1018
|
+
if self._is_identity_question(user_text):
|
|
1019
|
+
return cleaned
|
|
1020
|
+
|
|
1021
|
+
cleaned = self._remove_leading_identity_sentences(cleaned)
|
|
1022
|
+
cleaned = self._strip_redundant_identity_prefix(cleaned)
|
|
1023
|
+
|
|
1024
|
+
if self._is_greeting(user_text) and not cleaned.strip():
|
|
1025
|
+
if re.search(r"[\u0600-\u06FF]", user_text or ""):
|
|
1026
|
+
return "أهلا! كيف يمكنني مساعدتك؟"
|
|
1027
|
+
return "Hello! How can I help you?"
|
|
1028
|
+
return cleaned
|
|
1029
|
+
|
|
923
1030
|
def chat(
|
|
924
1031
|
self,
|
|
925
1032
|
messages: List[Dict[str, Any]],
|
|
@@ -976,8 +1083,7 @@ class HorusProvider(BaseLLMProvider):
|
|
|
976
1083
|
if m.get("role") == "user":
|
|
977
1084
|
last_user_message = m.get("content", "")
|
|
978
1085
|
break
|
|
979
|
-
|
|
980
|
-
content = self._strip_redundant_identity_prefix(content)
|
|
1086
|
+
content = self._postprocess_assistant_text(content, last_user_message)
|
|
981
1087
|
|
|
982
1088
|
# Parse tool calls from response if tools were provided
|
|
983
1089
|
tool_calls = []
|
|
@@ -1006,7 +1112,18 @@ class HorusProvider(BaseLLMProvider):
|
|
|
1006
1112
|
prompt = self._render_prompt(normalized)
|
|
1007
1113
|
|
|
1008
1114
|
if self._is_gguf_model_id(self.model_id):
|
|
1009
|
-
|
|
1115
|
+
last_user_message = ""
|
|
1116
|
+
for message in reversed(normalized):
|
|
1117
|
+
if message.get("role") == "user":
|
|
1118
|
+
last_user_message = message.get("content", "")
|
|
1119
|
+
break
|
|
1120
|
+
yield StreamingChunk(
|
|
1121
|
+
content=self._postprocess_assistant_text(
|
|
1122
|
+
self._generate_gguf_text(prompt, **kwargs),
|
|
1123
|
+
last_user_message,
|
|
1124
|
+
),
|
|
1125
|
+
is_finished=True,
|
|
1126
|
+
)
|
|
1010
1127
|
return
|
|
1011
1128
|
|
|
1012
1129
|
self.load()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|