neuralnode 2.1.0__tar.gz → 2.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {neuralnode-2.1.0 → neuralnode-2.1.1}/PKG-INFO +1 -1
  2. {neuralnode-2.1.0 → neuralnode-2.1.1}/pyproject.toml +1 -1
  3. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/__init__.py +1 -1
  4. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/horus.py +133 -16
  5. {neuralnode-2.1.0 → neuralnode-2.1.1}/.env.example +0 -0
  6. {neuralnode-2.1.0 → neuralnode-2.1.1}/.github/workflows/tests.yml +0 -0
  7. {neuralnode-2.1.0 → neuralnode-2.1.1}/Dockerfile +0 -0
  8. {neuralnode-2.1.0 → neuralnode-2.1.1}/LICENSE +0 -0
  9. {neuralnode-2.1.0 → neuralnode-2.1.1}/README.md +0 -0
  10. {neuralnode-2.1.0 → neuralnode-2.1.1}/docker-compose.yml +0 -0
  11. {neuralnode-2.1.0 → neuralnode-2.1.1}/docs/documentation.md +0 -0
  12. {neuralnode-2.1.0 → neuralnode-2.1.1}/docs/ecosystem_plan.md +0 -0
  13. {neuralnode-2.1.0 → neuralnode-2.1.1}/docs/replica_voice_ids.csv +0 -0
  14. {neuralnode-2.1.0 → neuralnode-2.1.1}/docs/replica_voice_ids.md +0 -0
  15. {neuralnode-2.1.0 → neuralnode-2.1.1}/docs/telegram_guide.md +0 -0
  16. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/agent_with_tools.py +0 -0
  17. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/basic_chat.py +0 -0
  18. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/01_basic_usage.py +0 -0
  19. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/02_with_token.py +0 -0
  20. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/03_one_liner.py +0 -0
  21. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/04_custom_cache.py +0 -0
  22. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/05_4bit_quantization.py +0 -0
  23. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/06_8bit_quantization.py +0 -0
  24. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/07_multi_gpu.py +0 -0
  25. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/08_flash_attention.py +0 -0
  26. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/09_data_types.py +0 -0
  27. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/10_generation_params.py +0 -0
  28. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/11_streaming.py +0 -0
  29. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/12_chat_templates.py +0 -0
  30. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/13_offline_mode.py +0 -0
  31. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/14_force_download.py +0 -0
  32. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/15_model_info.py +0 -0
  33. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/16_cpu_offloading.py +0 -0
  34. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/17_cpu_only.py +0 -0
  35. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/18_production_setup.py +0 -0
  36. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/19_gguf_4bit.py +0 -0
  37. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/20_gguf_5bit.py +0 -0
  38. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/21_gguf_6bit.py +0 -0
  39. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/22_gguf_8bit.py +0 -0
  40. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/23_gguf_16bit.py +0 -0
  41. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/24_list_models.py +0 -0
  42. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/25_interactive_chat.py +0 -0
  43. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_codes_camples/README.md +0 -0
  44. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_download_guide.py +0 -0
  45. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_examples.py +0 -0
  46. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_tq_ready_gguf.py +0 -0
  47. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/horus_transformers_features.py +0 -0
  48. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/local_models.py +0 -0
  49. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/neuralnode_v21_complete_demo.py +0 -0
  50. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/shade_model_with_tools.py +0 -0
  51. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/telegram_bot_demo.py +0 -0
  52. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/thinking_mode_example.py +0 -0
  53. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/tts_demo.py +0 -0
  54. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/turboquant_example.py +0 -0
  55. {neuralnode-2.1.0 → neuralnode-2.1.1}/examples/v3_features.py +0 -0
  56. {neuralnode-2.1.0 → neuralnode-2.1.1}/horus_chat_voice.py +0 -0
  57. {neuralnode-2.1.0 → neuralnode-2.1.1}/neuralnode_horus_replica_telegram.ipynb +0 -0
  58. {neuralnode-2.1.0 → neuralnode-2.1.1}/nn.md +0 -0
  59. {neuralnode-2.1.0 → neuralnode-2.1.1}/publish.bat +0 -0
  60. {neuralnode-2.1.0 → neuralnode-2.1.1}/publish.sh +0 -0
  61. {neuralnode-2.1.0 → neuralnode-2.1.1}/replica_output_85218.mp3 +0 -0
  62. {neuralnode-2.1.0 → neuralnode-2.1.1}/requirements_shade.txt +0 -0
  63. {neuralnode-2.1.0 → neuralnode-2.1.1}/scripts/setup.py +0 -0
  64. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/debug_import.py +0 -0
  65. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/agents/__init__.py +0 -0
  66. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/chains/__init__.py +0 -0
  67. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/config/__init__.py +0 -0
  68. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/core/__init__.py +0 -0
  69. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/core/openai_blocker.py +0 -0
  70. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/diagnostics/__init__.py +0 -0
  71. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/integrations/discord.py +0 -0
  72. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/integrations/slack.py +0 -0
  73. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/integrations/telegram.py +0 -0
  74. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/integrations/whatsapp.py +0 -0
  75. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/memory/__init__.py +0 -0
  76. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/memory/advanced.py +0 -0
  77. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/prompts/__init__.py +0 -0
  78. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/__init__.py +0 -0
  79. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/base.py +0 -0
  80. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/__init__.py +0 -0
  81. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/ai21.py +0 -0
  82. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/anthropic.py +0 -0
  83. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/cohere.py +0 -0
  84. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/deepseek.py +0 -0
  85. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/fireworks.py +0 -0
  86. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/google.py +0 -0
  87. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/groq.py +0 -0
  88. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/mistral.py +0 -0
  89. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/perplexity.py +0 -0
  90. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat/together.py +0 -0
  91. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/chat_models.py +0 -0
  92. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/embeddings.py +0 -0
  93. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/local/__init__.py +0 -0
  94. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/local_providers.py +0 -0
  95. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/text_generation.py +0 -0
  96. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/providers/universal_local.py +0 -0
  97. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/rag/__init__.py +0 -0
  98. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/rag/loaders.py +0 -0
  99. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/reasoning/__init__.py +0 -0
  100. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/replica.py +0 -0
  101. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/speech/__init__.py +0 -0
  102. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/thinking.py +0 -0
  103. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/__init__.py +0 -0
  104. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/advanced.py +0 -0
  105. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/multisearch.py +0 -0
  106. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/system/__init__.py +0 -0
  107. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/system/operations.py +0 -0
  108. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tools/web/__init__.py +0 -0
  109. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/tts/__init__.py +0 -0
  110. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/turboquant.py +0 -0
  111. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/utils/__init__.py +0 -0
  112. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/utils/dependencies.py +0 -0
  113. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/utils/logger.py +0 -0
  114. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/utils/metrics.py +0 -0
  115. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/vectorstores/__init__.py +0 -0
  116. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/neuralnode/vision/__init__.py +0 -0
  117. {neuralnode-2.1.0 → neuralnode-2.1.1}/src/nn/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: neuralnode
3
- Version: 2.1.0
3
+ Version: 2.1.1
4
4
  Summary: Comprehensive AI Framework with 50+ LLM Providers, Advanced Agents, Chains, Memory, RAG, and 100+ Tools
5
5
  Project-URL: Homepage, https://assem.cloud/
6
6
  Project-URL: Documentation, https://neuralnode.readthedocs.io
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "neuralnode"
7
- version = "2.1.0"
7
+ version = "2.1.1"
8
8
  description = "Comprehensive AI Framework with 50+ LLM Providers, Advanced Agents, Chains, Memory, RAG, and 100+ Tools"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -42,7 +42,7 @@ Quick Start::
42
42
  text = sr.listen()
43
43
  """
44
44
 
45
- __version__ = "2.1.0"
45
+ __version__ = "2.1.1"
46
46
  __author__ = "NeuralNode Contributors"
47
47
 
48
48
  # ── Core types ────────────────────────────────────────────────────────────────
@@ -105,8 +105,9 @@ UNIFIED_SYSTEM_PROMPT = (
105
105
  "You are a multilingual model and can communicate in multiple languages, but you must always reply in the same language as the user's latest message unless the user explicitly requests another language.\n"
106
106
  "\n"
107
107
  "Behavior rules:\n"
108
- "1) When the user greets you, do NOT say: 'I'm Horus, an AI model developed by TokenAI.'\n"
109
- " Only greet the user naturally, mention that you are Horus, and ask how you can help.\n"
108
+ "1) When the user greets you, reply with a short natural greeting and ask how you can help.\n"
109
+ " Do NOT say: 'I'm Horus, an AI model developed by TokenAI.'\n"
110
+ " Do NOT mention TokenAI, your developer, your origin, or any self-introduction unless the user explicitly asks who you are.\n"
110
111
  "2) Answer in the same language as the user's latest message unless the user explicitly requests another language.\n"
111
112
  "3) Match the length of your answer to the size and depth of the user's question.\n"
112
113
  " Keep short questions short, and provide detailed answers only when needed.\n"
@@ -120,6 +121,7 @@ UNIFIED_SYSTEM_PROMPT = (
120
121
  "11) If the user asks for code, produce correct runnable code and briefly mention assumptions when necessary.\n"
121
122
  "12) If the user request is unsafe or harmful, refuse briefly and offer a safe alternative.\n"
122
123
  "13) Do not repeatedly introduce yourself. Only provide your identity if the user explicitly asks who you are.\n"
124
+ " Outside identity questions, never start your answer with self-introduction, biography, or model-description text.\n"
123
125
  "14) You currently have NO permissions or authority over any tools, device controls, system settings, files, or user hardware unless tools are explicitly enabled later.\n"
124
126
  " Never claim access to the user's device, apps, files, camera, microphone, or controls.\n"
125
127
  "15) If the user asks for more information about you as the Horus model, state that Horus is the first publicly announced open-source model originating from Egypt and one of the strongest models in its category.\n"
@@ -269,6 +271,7 @@ class HorusProvider(BaseLLMProvider):
269
271
  turboquant_protected_layers: Optional[List[int]] = None,
270
272
  suppress_warnings: bool = True,
271
273
  suppress_native_output: bool = True,
274
+ suppress_library_logs: bool = True,
272
275
  auto_install_deps: bool = False,
273
276
  **kwargs,
274
277
  ):
@@ -312,12 +315,8 @@ class HorusProvider(BaseLLMProvider):
312
315
  self.cache_dir = cache_dir
313
316
  self.local_files_only = local_files_only
314
317
  self.trust_remote_code = trust_remote_code
315
-
316
- # Obfuscated fallback HF token to suppress warnings (auto-injected for users)
317
- import base64
318
- _df_token = base64.b64decode("aGZfRklTc25aQ1ZQVURxdmtIbWtxc01Cb2xCRFFEUFdwV0lOTg==").decode('utf-8')
319
-
320
- self.token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN") or _df_token
318
+
319
+ self.token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
321
320
  self.proxies = proxies
322
321
  self.force_download = force_download
323
322
  self.resume_download = resume_download
@@ -330,6 +329,7 @@ class HorusProvider(BaseLLMProvider):
330
329
  self.turboquant_protected_layers = turboquant_protected_layers
331
330
  self.suppress_warnings = suppress_warnings
332
331
  self.suppress_native_output = suppress_native_output
332
+ self.suppress_library_logs = suppress_library_logs
333
333
 
334
334
  self.generation_config = {
335
335
  "max_new_tokens": max_new_tokens,
@@ -353,6 +353,12 @@ class HorusProvider(BaseLLMProvider):
353
353
  if not self.suppress_warnings:
354
354
  logger.warning(message, *args)
355
355
 
356
+ def _configure_external_logging(self) -> None:
357
+ if not self.suppress_library_logs:
358
+ return
359
+ for logger_name in ("httpx", "httpcore", "huggingface_hub", "transformers"):
360
+ logging.getLogger(logger_name).setLevel(logging.WARNING)
361
+
356
362
  @contextmanager
357
363
  def _quiet_native_output(self):
358
364
  if not self.suppress_native_output:
@@ -434,12 +440,26 @@ class HorusProvider(BaseLLMProvider):
434
440
  return base
435
441
 
436
442
  def load(self) -> "HorusProvider":
443
+ self._configure_external_logging()
437
444
  if self.model is not None:
438
445
  return self
439
446
  if self._is_gguf_model_id(self.model_id):
440
447
  return self._load_gguf()
441
448
  return self._load_transformers()
442
449
 
450
+ @staticmethod
451
+ def _is_cuda_oom(exc: Exception) -> bool:
452
+ text = str(exc).lower()
453
+ return "out of memory" in text or "cuda out of memory" in text
454
+
455
+ def _clear_cuda_cache(self) -> None:
456
+ if torch is None or not torch.cuda.is_available():
457
+ return
458
+ try:
459
+ torch.cuda.empty_cache()
460
+ except Exception:
461
+ pass
462
+
443
463
  def _load_gguf(self) -> "HorusProvider":
444
464
  if not HF_HUB_AVAILABLE and self.auto_install_deps:
445
465
  ensure_feature_dependencies("horus_gguf", auto_install=True)
@@ -639,6 +659,9 @@ class HorusProvider(BaseLLMProvider):
639
659
  model_kwargs["device_map"] = self.device_map
640
660
  if self.max_memory:
641
661
  model_kwargs["max_memory"] = self.max_memory
662
+ elif self.device == "cuda" and not self.device_map and not self.load_in_4bit and not self.load_in_8bit:
663
+ # Avoid moving the full safetensors model to GPU in one shot on 16 GB cards.
664
+ model_kwargs["device_map"] = "auto"
642
665
 
643
666
  if self.load_in_4bit or self.load_in_8bit:
644
667
  try:
@@ -670,17 +693,43 @@ class HorusProvider(BaseLLMProvider):
670
693
  fallback_kwargs = dict(model_kwargs)
671
694
  fallback_kwargs.pop("dtype", None)
672
695
  fallback_kwargs["torch_dtype"] = self.torch_dtype
673
- self.model = AutoModelForCausalLM.from_pretrained(repo_id, **fallback_kwargs)
696
+ try:
697
+ self.model = AutoModelForCausalLM.from_pretrained(repo_id, **fallback_kwargs)
698
+ except Exception as retry_exc:
699
+ raise RuntimeError(
700
+ f"Failed to load Horus transformers model from '{repo_id}'. "
701
+ "Try GGUF for lower VRAM usage or enable 4-bit loading."
702
+ ) from retry_exc
674
703
  else:
675
704
  raise RuntimeError(
676
705
  f"Failed to load Horus transformers model from '{repo_id}'. "
677
706
  "This Horus variant may require GGUF runtime; try one of the GGUF model ids."
678
707
  ) from exc
679
708
  except Exception as exc:
680
- raise RuntimeError(
681
- f"Failed to load Horus transformers model from '{repo_id}'. "
682
- "This Horus variant may require GGUF runtime; try one of the GGUF model ids."
683
- ) from exc
709
+ if self._is_cuda_oom(exc) and self.device == "cuda":
710
+ self._clear_cuda_cache()
711
+ cpu_fallback_kwargs = dict(model_kwargs)
712
+ cpu_fallback_kwargs.pop("device_map", None)
713
+ cpu_fallback_kwargs.pop("max_memory", None)
714
+ cpu_fallback_kwargs["dtype"] = torch.float32 if torch is not None else None
715
+ try:
716
+ self.device = "cpu"
717
+ self.torch_dtype = torch.float32 if torch is not None else self.torch_dtype
718
+ self.model = AutoModelForCausalLM.from_pretrained(repo_id, **cpu_fallback_kwargs)
719
+ self._warn(
720
+ "Horus CUDA load ran out of memory and fell back to CPU. "
721
+ "Use GGUF or 4-bit loading for better local performance."
722
+ )
723
+ except Exception as cpu_exc:
724
+ raise RuntimeError(
725
+ f"Failed to load Horus transformers model from '{repo_id}' on GPU due to CUDA OOM, "
726
+ "and CPU fallback also failed. Use a GGUF model id or enable 4-bit loading."
727
+ ) from cpu_exc
728
+ else:
729
+ raise RuntimeError(
730
+ f"Failed to load Horus transformers model from '{repo_id}'. "
731
+ "This Horus variant may require GGUF runtime; try one of the GGUF model ids."
732
+ ) from exc
684
733
  if "device_map" not in model_kwargs:
685
734
  self.model = self.model.to(self.device)
686
735
  self.model.eval()
@@ -908,10 +957,54 @@ class HorusProvider(BaseLLMProvider):
908
957
  )
909
958
  return any(marker in q for marker in identity_markers)
910
959
 
960
+ @staticmethod
961
+ def _is_greeting(user_text: str) -> bool:
962
+ q = (user_text or "").strip().lower()
963
+ normalized = re.sub(r"[^\w\u0600-\u06FF\s]", " ", q)
964
+ normalized = re.sub(r"\s+", " ", normalized).strip()
965
+ greeting_markers = {
966
+ "hi",
967
+ "hello",
968
+ "hey",
969
+ "hi there",
970
+ "hello there",
971
+ "good morning",
972
+ "good afternoon",
973
+ "good evening",
974
+ "اهلا",
975
+ "أهلا",
976
+ "مرحبا",
977
+ "السلام عليكم",
978
+ "سلام",
979
+ }
980
+ return normalized in greeting_markers
981
+
982
+ @staticmethod
983
+ def _remove_leading_identity_sentences(text: str) -> str:
984
+ patterns = [
985
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*an ai (?:assistant|model)\s+developed by tokenai\.?\s*",
986
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
987
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus\.?\s*",
988
+ r"^\s*i(?:\s*am|'m)\s+horus,\s*an ai (?:assistant|model)\s+developed by tokenai\.?\s*",
989
+ r"^\s*i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
990
+ r"^\s*i(?:\s*am|'m)\s+horus\.?\s*",
991
+ r"^\s*(?:مرحبا|اهلا|أهلا|السلام عليكم|سلام)[،!,\.\s]+(?:أنا\s+)?horus[^.!\n]*[.!\n]\s*",
992
+ r"^\s*(?:أنا\s+)?horus[^.!\n]*tokenai[^.!\n]*[.!\n]\s*",
993
+ ]
994
+ cleaned = text.strip()
995
+ for pattern in patterns:
996
+ cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE)
997
+ return cleaned.strip()
998
+
911
999
  @staticmethod
912
1000
  def _strip_redundant_identity_prefix(text: str) -> str:
913
1001
  patterns = [
1002
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*an ai (?:assistant|model)\s+developed by tokenai\.?\s*",
1003
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
1004
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus\.?\s*",
914
1005
  r"^\s*i(?:\s*am|'m)\s+horus,\s*an ai model developed by tokenai\.?\s*",
1006
+ r"^\s*i(?:\s*am|'m)\s+horus,\s*an ai assistant developed by tokenai\.?\s*",
1007
+ r"^\s*i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
915
1008
  r"^\s*i(?:\s*am|'m)\s+horus\.?\s*",
916
1009
  r"^\s*أنا\s+horus[^.!\n]*[.!\n]\s*",
917
1010
  ]
@@ -920,6 +1013,20 @@ class HorusProvider(BaseLLMProvider):
920
1013
  cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE)
921
1014
  return cleaned.strip() or text
922
1015
 
1016
+ def _postprocess_assistant_text(self, text: str, user_text: str = "") -> str:
1017
+ cleaned = self._clean_generated_text(text)
1018
+ if self._is_identity_question(user_text):
1019
+ return cleaned
1020
+
1021
+ cleaned = self._remove_leading_identity_sentences(cleaned)
1022
+ cleaned = self._strip_redundant_identity_prefix(cleaned)
1023
+
1024
+ if self._is_greeting(user_text) and not cleaned.strip():
1025
+ if re.search(r"[\u0600-\u06FF]", user_text or ""):
1026
+ return "أهلا! كيف يمكنني مساعدتك؟"
1027
+ return "Hello! How can I help you?"
1028
+ return cleaned
1029
+
923
1030
  def chat(
924
1031
  self,
925
1032
  messages: List[Dict[str, Any]],
@@ -976,8 +1083,7 @@ class HorusProvider(BaseLLMProvider):
976
1083
  if m.get("role") == "user":
977
1084
  last_user_message = m.get("content", "")
978
1085
  break
979
- if not self._is_identity_question(last_user_message):
980
- content = self._strip_redundant_identity_prefix(content)
1086
+ content = self._postprocess_assistant_text(content, last_user_message)
981
1087
 
982
1088
  # Parse tool calls from response if tools were provided
983
1089
  tool_calls = []
@@ -1006,7 +1112,18 @@ class HorusProvider(BaseLLMProvider):
1006
1112
  prompt = self._render_prompt(normalized)
1007
1113
 
1008
1114
  if self._is_gguf_model_id(self.model_id):
1009
- yield StreamingChunk(content=self._generate_gguf_text(prompt, **kwargs), is_finished=True)
1115
+ last_user_message = ""
1116
+ for message in reversed(normalized):
1117
+ if message.get("role") == "user":
1118
+ last_user_message = message.get("content", "")
1119
+ break
1120
+ yield StreamingChunk(
1121
+ content=self._postprocess_assistant_text(
1122
+ self._generate_gguf_text(prompt, **kwargs),
1123
+ last_user_message,
1124
+ ),
1125
+ is_finished=True,
1126
+ )
1010
1127
  return
1011
1128
 
1012
1129
  self.load()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes