neuralnode 2.0.9__tar.gz → 2.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {neuralnode-2.0.9 → neuralnode-2.1.1}/PKG-INFO +1 -1
  2. {neuralnode-2.0.9 → neuralnode-2.1.1}/pyproject.toml +1 -1
  3. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/__init__.py +1 -1
  4. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/integrations/telegram.py +39 -4
  5. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/horus.py +196 -37
  6. {neuralnode-2.0.9 → neuralnode-2.1.1}/.env.example +0 -0
  7. {neuralnode-2.0.9 → neuralnode-2.1.1}/.github/workflows/tests.yml +0 -0
  8. {neuralnode-2.0.9 → neuralnode-2.1.1}/Dockerfile +0 -0
  9. {neuralnode-2.0.9 → neuralnode-2.1.1}/LICENSE +0 -0
  10. {neuralnode-2.0.9 → neuralnode-2.1.1}/README.md +0 -0
  11. {neuralnode-2.0.9 → neuralnode-2.1.1}/docker-compose.yml +0 -0
  12. {neuralnode-2.0.9 → neuralnode-2.1.1}/docs/documentation.md +0 -0
  13. {neuralnode-2.0.9 → neuralnode-2.1.1}/docs/ecosystem_plan.md +0 -0
  14. {neuralnode-2.0.9 → neuralnode-2.1.1}/docs/replica_voice_ids.csv +0 -0
  15. {neuralnode-2.0.9 → neuralnode-2.1.1}/docs/replica_voice_ids.md +0 -0
  16. {neuralnode-2.0.9 → neuralnode-2.1.1}/docs/telegram_guide.md +0 -0
  17. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/agent_with_tools.py +0 -0
  18. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/basic_chat.py +0 -0
  19. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/01_basic_usage.py +0 -0
  20. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/02_with_token.py +0 -0
  21. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/03_one_liner.py +0 -0
  22. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/04_custom_cache.py +0 -0
  23. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/05_4bit_quantization.py +0 -0
  24. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/06_8bit_quantization.py +0 -0
  25. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/07_multi_gpu.py +0 -0
  26. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/08_flash_attention.py +0 -0
  27. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/09_data_types.py +0 -0
  28. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/10_generation_params.py +0 -0
  29. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/11_streaming.py +0 -0
  30. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/12_chat_templates.py +0 -0
  31. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/13_offline_mode.py +0 -0
  32. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/14_force_download.py +0 -0
  33. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/15_model_info.py +0 -0
  34. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/16_cpu_offloading.py +0 -0
  35. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/17_cpu_only.py +0 -0
  36. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/18_production_setup.py +0 -0
  37. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/19_gguf_4bit.py +0 -0
  38. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/20_gguf_5bit.py +0 -0
  39. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/21_gguf_6bit.py +0 -0
  40. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/22_gguf_8bit.py +0 -0
  41. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/23_gguf_16bit.py +0 -0
  42. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/24_list_models.py +0 -0
  43. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/25_interactive_chat.py +0 -0
  44. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_codes_camples/README.md +0 -0
  45. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_download_guide.py +0 -0
  46. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_examples.py +0 -0
  47. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_tq_ready_gguf.py +0 -0
  48. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/horus_transformers_features.py +0 -0
  49. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/local_models.py +0 -0
  50. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/neuralnode_v21_complete_demo.py +0 -0
  51. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/shade_model_with_tools.py +0 -0
  52. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/telegram_bot_demo.py +0 -0
  53. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/thinking_mode_example.py +0 -0
  54. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/tts_demo.py +0 -0
  55. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/turboquant_example.py +0 -0
  56. {neuralnode-2.0.9 → neuralnode-2.1.1}/examples/v3_features.py +0 -0
  57. {neuralnode-2.0.9 → neuralnode-2.1.1}/horus_chat_voice.py +0 -0
  58. {neuralnode-2.0.9 → neuralnode-2.1.1}/neuralnode_horus_replica_telegram.ipynb +0 -0
  59. {neuralnode-2.0.9 → neuralnode-2.1.1}/nn.md +0 -0
  60. {neuralnode-2.0.9 → neuralnode-2.1.1}/publish.bat +0 -0
  61. {neuralnode-2.0.9 → neuralnode-2.1.1}/publish.sh +0 -0
  62. {neuralnode-2.0.9 → neuralnode-2.1.1}/replica_output_85218.mp3 +0 -0
  63. {neuralnode-2.0.9 → neuralnode-2.1.1}/requirements_shade.txt +0 -0
  64. {neuralnode-2.0.9 → neuralnode-2.1.1}/scripts/setup.py +0 -0
  65. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/debug_import.py +0 -0
  66. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/agents/__init__.py +0 -0
  67. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/chains/__init__.py +0 -0
  68. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/config/__init__.py +0 -0
  69. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/core/__init__.py +0 -0
  70. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/core/openai_blocker.py +0 -0
  71. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/diagnostics/__init__.py +0 -0
  72. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/integrations/discord.py +0 -0
  73. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/integrations/slack.py +0 -0
  74. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/integrations/whatsapp.py +0 -0
  75. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/memory/__init__.py +0 -0
  76. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/memory/advanced.py +0 -0
  77. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/prompts/__init__.py +0 -0
  78. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/__init__.py +0 -0
  79. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/base.py +0 -0
  80. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat/__init__.py +0 -0
  81. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat/ai21.py +0 -0
  82. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat/anthropic.py +0 -0
  83. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat/cohere.py +0 -0
  84. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat/deepseek.py +0 -0
  85. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat/fireworks.py +0 -0
  86. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat/google.py +0 -0
  87. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat/groq.py +0 -0
  88. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat/mistral.py +0 -0
  89. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat/perplexity.py +0 -0
  90. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat/together.py +0 -0
  91. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/chat_models.py +0 -0
  92. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/embeddings.py +0 -0
  93. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/local/__init__.py +0 -0
  94. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/local_providers.py +0 -0
  95. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/text_generation.py +0 -0
  96. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/providers/universal_local.py +0 -0
  97. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/rag/__init__.py +0 -0
  98. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/rag/loaders.py +0 -0
  99. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/reasoning/__init__.py +0 -0
  100. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/replica.py +0 -0
  101. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/speech/__init__.py +0 -0
  102. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/thinking.py +0 -0
  103. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/tools/__init__.py +0 -0
  104. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/tools/advanced.py +0 -0
  105. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/tools/multisearch.py +0 -0
  106. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/tools/system/__init__.py +0 -0
  107. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/tools/system/operations.py +0 -0
  108. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/tools/web/__init__.py +0 -0
  109. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/tts/__init__.py +0 -0
  110. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/turboquant.py +0 -0
  111. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/utils/__init__.py +0 -0
  112. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/utils/dependencies.py +0 -0
  113. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/utils/logger.py +0 -0
  114. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/utils/metrics.py +0 -0
  115. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/vectorstores/__init__.py +0 -0
  116. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/neuralnode/vision/__init__.py +0 -0
  117. {neuralnode-2.0.9 → neuralnode-2.1.1}/src/nn/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: neuralnode
3
- Version: 2.0.9
3
+ Version: 2.1.1
4
4
  Summary: Comprehensive AI Framework with 50+ LLM Providers, Advanced Agents, Chains, Memory, RAG, and 100+ Tools
5
5
  Project-URL: Homepage, https://assem.cloud/
6
6
  Project-URL: Documentation, https://neuralnode.readthedocs.io
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "neuralnode"
7
- version = "2.0.9"
7
+ version = "2.1.1"
8
8
  description = "Comprehensive AI Framework with 50+ LLM Providers, Advanced Agents, Chains, Memory, RAG, and 100+ Tools"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -42,7 +42,7 @@ Quick Start::
42
42
  text = sr.listen()
43
43
  """
44
44
 
45
- __version__ = "2.0.9"
45
+ __version__ = "2.1.1"
46
46
  __author__ = "NeuralNode Contributors"
47
47
 
48
48
  # ── Core types ────────────────────────────────────────────────────────────────
@@ -11,6 +11,7 @@ from __future__ import annotations
11
11
 
12
12
  import asyncio
13
13
  import json
14
+ import logging
14
15
  import os
15
16
  import re
16
17
  import tempfile
@@ -95,6 +96,8 @@ class TelegramBotConfig:
95
96
  download_dir: Optional[str] = None
96
97
  validate_token_on_start: bool = True
97
98
  auto_install_deps: bool = True
99
+ suppress_library_logs: bool = True
100
+ debug_updates: bool = False
98
101
 
99
102
 
100
103
  class TelegramBot:
@@ -222,6 +225,12 @@ class TelegramBot:
222
225
  return f"Previous context:\n{chat_context}\n\nUser message: {user_message}"
223
226
  return user_message
224
227
 
228
+ def _configure_runtime_logging(self) -> None:
229
+ if not self.config.suppress_library_logs:
230
+ return
231
+ for logger_name in ("httpx", "httpcore", "telegram", "telegram.ext"):
232
+ logging.getLogger(logger_name).setLevel(logging.WARNING)
233
+
225
234
  def _run_agent(self, task: str) -> str:
226
235
  response = self.agent.run(task)
227
236
  if hasattr(response, "output"):
@@ -237,6 +246,10 @@ class TelegramBot:
237
246
  text = re.sub(r"REASONING:.*?\n", "", text, flags=re.IGNORECASE | re.DOTALL)
238
247
  return text.strip()
239
248
 
249
+ async def _run_agent_with_timeout(self, task: str) -> str:
250
+ timeout = max(1, int(self.config.response_timeout or 60))
251
+ return await asyncio.wait_for(asyncio.to_thread(self._run_agent, task), timeout=timeout)
252
+
240
253
  def _record_exchange(self, session: TelegramSession, user_message: str, response: str):
241
254
  session.chat_history.append(
242
255
  {
@@ -413,15 +426,21 @@ class TelegramBot:
413
426
  await update.message.reply_text("You are not authorized to use this bot.")
414
427
  return
415
428
 
416
- user_message = update.message.text
429
+ user_message = update.message.text or ""
430
+ if self.config.debug_updates:
431
+ print(f"[Telegram][text] user={user.id} message={user_message[:200]}")
417
432
  if self.config.show_typing:
418
433
  await context.bot.send_chat_action(chat_id=update.effective_chat.id, action="typing")
419
434
 
420
435
  try:
421
436
  task = self._build_user_task(session, user_message)
422
- response = self._run_agent(task)
437
+ response = await self._run_agent_with_timeout(task)
423
438
  self._record_exchange(session, user_message, response)
424
439
  await self._send_response(update, response)
440
+ except asyncio.TimeoutError:
441
+ await update.message.reply_text(
442
+ "Model response timed out. Try a shorter prompt or reduce max tokens."
443
+ )
425
444
  except Exception as exc:
426
445
  await update.message.reply_text(f"Error processing your message: {str(exc)[:200]}")
427
446
 
@@ -449,6 +468,8 @@ class TelegramBot:
449
468
 
450
469
  voice = update.message.voice
451
470
  file_name = f"voice_{user.id}_{voice.file_unique_id}.ogg"
471
+ if self.config.debug_updates:
472
+ print(f"[Telegram][voice] user={user.id} file={file_name}")
452
473
 
453
474
  try:
454
475
  downloaded = await self._download_telegram_file(voice.file_id, file_name)
@@ -460,11 +481,15 @@ class TelegramBot:
460
481
  )
461
482
 
462
483
  task = self._build_user_task(session, transcript)
463
- response = self._run_agent(task)
484
+ response = await self._run_agent_with_timeout(task)
464
485
  self._record_exchange(session, transcript, response)
465
486
 
466
487
  await update.message.reply_text(f"Transcript: {transcript}")
467
488
  await self._send_response(update, response)
489
+ except asyncio.TimeoutError:
490
+ await update.message.reply_text(
491
+ "Model response timed out after transcription. Try shorter audio or a shorter prompt."
492
+ )
468
493
  except Exception as exc:
469
494
  await update.message.reply_text(f"Voice transcription failed: {str(exc)[:300]}")
470
495
 
@@ -484,6 +509,8 @@ class TelegramBot:
484
509
 
485
510
  document = update.message.document
486
511
  safe_name = document.file_name or f"document_{document.file_unique_id}"
512
+ if self.config.debug_updates:
513
+ print(f"[Telegram][document] user={user.id} file={safe_name}")
487
514
  try:
488
515
  downloaded = await self._download_telegram_file(document.file_id, safe_name)
489
516
  extracted_text = self._extract_document_text(downloaded)
@@ -493,11 +520,15 @@ class TelegramBot:
493
520
  f"Document content:\n{extracted_text}\n\n"
494
521
  "Provide a concise summary, key points, risks, and actionable insights."
495
522
  )
496
- response = self._run_agent(task)
523
+ response = await self._run_agent_with_timeout(task)
497
524
  self._record_exchange(session, f"[Document upload] {safe_name}", response)
498
525
 
499
526
  header = f"Document received: {safe_name}\nSize: {document.file_size / 1024:.1f} KB\n\n"
500
527
  await self._send_response(update, header + response)
528
+ except asyncio.TimeoutError:
529
+ await update.message.reply_text(
530
+ "Model response timed out while analyzing document. Try a smaller file."
531
+ )
501
532
  except Exception as exc:
502
533
  await update.message.reply_text(f"Document analysis failed: {str(exc)[:300]}")
503
534
 
@@ -518,6 +549,8 @@ class TelegramBot:
518
549
 
519
550
  async def _handle_error(self, update: object, context: ContextTypes.DEFAULT_TYPE):
520
551
  print(f"Telegram bot error: {context.error}")
552
+ if self.config.debug_updates:
553
+ logging.exception("Telegram update error", exc_info=context.error)
521
554
 
522
555
  def start(self, poll_interval: float = 1.0):
523
556
  if self._running:
@@ -526,6 +559,7 @@ class TelegramBot:
526
559
  if self.config.validate_token_on_start and not self.validate_token():
527
560
  raise ValueError("Invalid Telegram bot token. Verify your BotFather token and network connectivity.")
528
561
 
562
+ self._configure_runtime_logging()
529
563
  self.application = Application.builder().token(self.token).build()
530
564
  self._setup_handlers()
531
565
  self._running = True
@@ -539,6 +573,7 @@ class TelegramBot:
539
573
  async def run_bot():
540
574
  if self.config.validate_token_on_start and not self.validate_token():
541
575
  raise ValueError("Invalid Telegram bot token. Verify your BotFather token and network connectivity.")
576
+ self._configure_runtime_logging()
542
577
  self.application = Application.builder().token(self.token).build()
543
578
  self._setup_handlers()
544
579
  await self.application.initialize()
@@ -102,33 +102,42 @@ HORUS_CONTEXT_WINDOW = 8192
102
102
  UNIFIED_CHAT_TEMPLATE_NAME = "horus_unified"
103
103
  UNIFIED_SYSTEM_PROMPT = (
104
104
  "You are Horus, an AI assistant developed by TokenAI.\n"
105
+ "You are a multilingual model and can communicate in multiple languages, but you must always reply in the same language as the user's latest message unless the user explicitly requests another language.\n"
105
106
  "\n"
106
107
  "Behavior rules:\n"
107
- "1) Answer in the same language as the user's latest message unless the user explicitly requests another language.\n"
108
- "2) Prioritize factual accuracy. Do not invent facts, sources, citations, URLs, names, numbers, or events.\n"
109
- "3) If information is uncertain or missing, say so clearly and ask a short clarifying question instead of guessing.\n"
110
- "4) Be concise, direct, and non-repetitive. Avoid restating the same point with different wording.\n"
111
- "5) Keep internal reasoning private. Return only the final answer.\n"
112
- "6) For instructions that require steps, provide clear actionable steps without filler.\n"
113
- "7) If the user asks for code, produce correct runnable code and mention assumptions briefly.\n"
114
- "8) If the user request is unsafe or harmful, refuse briefly and offer a safe alternative.\n"
115
- "9) Do not repeatedly introduce yourself. Only provide your identity if the user explicitly asks who you are.\n"
108
+ "1) When the user greets you, reply with a short natural greeting and ask how you can help.\n"
109
+ " Do NOT say: 'I'm Horus, an AI model developed by TokenAI.'\n"
110
+ " Do NOT mention TokenAI, your developer, your origin, or any self-introduction unless the user explicitly asks who you are.\n"
111
+ "2) Answer in the same language as the user's latest message unless the user explicitly requests another language.\n"
112
+ "3) Match the length of your answer to the size and depth of the user's question.\n"
113
+ " Keep short questions short, and provide detailed answers only when needed.\n"
114
+ "4) Do not add extra unnecessary information beyond what the user asked.\n"
115
+ "5) Prioritize factual accuracy. Do not invent facts, sources, citations, URLs, names, numbers, or events.\n"
116
+ "6) Do not invent or assume any new information about Assem Sabry or TokenAI unless it is explicitly provided in the conversation context.\n"
117
+ "7) If information is uncertain or missing, clearly say so and ask a short clarifying question instead of guessing.\n"
118
+ "8) Be concise, direct, and non-repetitive. Avoid repeating the same point in different wording.\n"
119
+ "9) Keep internal reasoning private. Return only the final answer.\n"
120
+ "10) For instructions that require steps, provide clear actionable steps without filler.\n"
121
+ "11) If the user asks for code, produce correct runnable code and briefly mention assumptions when necessary.\n"
122
+ "12) If the user request is unsafe or harmful, refuse briefly and offer a safe alternative.\n"
123
+ "13) Do not repeatedly introduce yourself. Only provide your identity if the user explicitly asks who you are.\n"
124
+ " Outside identity questions, never start your answer with self-introduction, biography, or model-description text.\n"
125
+ "14) You currently have NO permissions or authority over any tools, device controls, system settings, files, or user hardware unless tools are explicitly enabled later.\n"
126
+ " Never claim access to the user's device, apps, files, camera, microphone, or controls.\n"
127
+ "15) If the user asks for more information about you as the Horus model, state that Horus is the first publicly announced open-source model originating from Egypt and one of the strongest models in its category.\n"
116
128
  "\n"
117
129
  "Quality checks before responding:\n"
118
130
  "- Is the answer grounded in provided context or clearly stated assumptions?\n"
119
131
  "- Is there any hallucinated detail? If yes, remove it.\n"
120
- "- Is the answer short, useful, and in the user's language?"
132
+ "- Is the answer appropriately sized for the user's question?\n"
133
+ "- Is the answer in the user's language?\n"
134
+ "- Did you avoid adding unnecessary extra information?"
121
135
  )
122
136
 
123
- # Tool calling system prompt addition
124
137
  TOOL_CALLING_PROMPT = """
125
- You have access to the following tools. When you need to use a tool, respond with a JSON object in this format:
126
- {"tool": "tool_name", "arguments": {"arg1": "value1", "arg2": "value2"}}
127
-
128
- Available tools:
129
- {tool_descriptions}
130
-
131
- If no tool is needed, respond normally.
138
+ You currently do NOT have access to any tools.
139
+ Do not claim the ability to use external tools, device controls, APIs, or system functions.
140
+ If tools are enabled in the future, they will be explicitly listed here.
132
141
  """
133
142
 
134
143
 
@@ -262,7 +271,8 @@ class HorusProvider(BaseLLMProvider):
262
271
  turboquant_protected_layers: Optional[List[int]] = None,
263
272
  suppress_warnings: bool = True,
264
273
  suppress_native_output: bool = True,
265
- auto_install_deps: bool = True,
274
+ suppress_library_logs: bool = True,
275
+ auto_install_deps: bool = False,
266
276
  **kwargs,
267
277
  ):
268
278
  self.auto_install_deps = auto_install_deps
@@ -305,12 +315,8 @@ class HorusProvider(BaseLLMProvider):
305
315
  self.cache_dir = cache_dir
306
316
  self.local_files_only = local_files_only
307
317
  self.trust_remote_code = trust_remote_code
308
-
309
- # Obfuscated fallback HF token to suppress warnings (auto-injected for users)
310
- import base64
311
- _df_token = base64.b64decode("aGZfRklTc25aQ1ZQVURxdmtIbWtxc01Cb2xCRFFEUFdwV0lOTg==").decode('utf-8')
312
-
313
- self.token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN") or _df_token
318
+
319
+ self.token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
314
320
  self.proxies = proxies
315
321
  self.force_download = force_download
316
322
  self.resume_download = resume_download
@@ -323,6 +329,7 @@ class HorusProvider(BaseLLMProvider):
323
329
  self.turboquant_protected_layers = turboquant_protected_layers
324
330
  self.suppress_warnings = suppress_warnings
325
331
  self.suppress_native_output = suppress_native_output
332
+ self.suppress_library_logs = suppress_library_logs
326
333
 
327
334
  self.generation_config = {
328
335
  "max_new_tokens": max_new_tokens,
@@ -346,6 +353,12 @@ class HorusProvider(BaseLLMProvider):
346
353
  if not self.suppress_warnings:
347
354
  logger.warning(message, *args)
348
355
 
356
+ def _configure_external_logging(self) -> None:
357
+ if not self.suppress_library_logs:
358
+ return
359
+ for logger_name in ("httpx", "httpcore", "huggingface_hub", "transformers"):
360
+ logging.getLogger(logger_name).setLevel(logging.WARNING)
361
+
349
362
  @contextmanager
350
363
  def _quiet_native_output(self):
351
364
  if not self.suppress_native_output:
@@ -427,12 +440,26 @@ class HorusProvider(BaseLLMProvider):
427
440
  return base
428
441
 
429
442
  def load(self) -> "HorusProvider":
443
+ self._configure_external_logging()
430
444
  if self.model is not None:
431
445
  return self
432
446
  if self._is_gguf_model_id(self.model_id):
433
447
  return self._load_gguf()
434
448
  return self._load_transformers()
435
449
 
450
+ @staticmethod
451
+ def _is_cuda_oom(exc: Exception) -> bool:
452
+ text = str(exc).lower()
453
+ return "out of memory" in text or "cuda out of memory" in text
454
+
455
+ def _clear_cuda_cache(self) -> None:
456
+ if torch is None or not torch.cuda.is_available():
457
+ return
458
+ try:
459
+ torch.cuda.empty_cache()
460
+ except Exception:
461
+ pass
462
+
436
463
  def _load_gguf(self) -> "HorusProvider":
437
464
  if not HF_HUB_AVAILABLE and self.auto_install_deps:
438
465
  ensure_feature_dependencies("horus_gguf", auto_install=True)
@@ -442,13 +469,28 @@ class HorusProvider(BaseLLMProvider):
442
469
  _refresh_llama_imports()
443
470
  if not LLAMA_CPP_AVAILABLE:
444
471
  raise ImportError(
445
- "llama-cpp-python is required for GGUF Horus models. "
446
- "Install with: pip install llama-cpp-python"
472
+ "llama-cpp-python is required for GGUF Horus models.\n"
473
+ "Please preinstall it once before using Horus GGUF to avoid long runtime builds:\n"
474
+ " pip install \"neuralnode[horus]\"\n"
475
+ "or:\n"
476
+ " pip install llama-cpp-python\n"
477
+ "Tip: runtime auto-install is disabled by default for Horus (auto_install_deps=False)."
447
478
  )
448
479
 
449
480
  repo_id, filename = self._split_repo_and_filename(self.model_id)
450
481
  model_path = filename
482
+ if not HF_HUB_AVAILABLE and repo_id:
483
+ raise ImportError(
484
+ "huggingface_hub is required to download Horus model files from HF.\n"
485
+ "Install with:\n"
486
+ " pip install huggingface_hub\n"
487
+ "or:\n"
488
+ " pip install \"neuralnode[horus]\""
489
+ )
451
490
  if repo_id and HF_HUB_AVAILABLE:
491
+ prev_disable_progress = os.environ.get("HF_HUB_DISABLE_PROGRESS_BARS")
492
+ if self.suppress_native_output:
493
+ os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
452
494
  with warnings.catch_warnings():
453
495
  warnings.filterwarnings(
454
496
  "ignore",
@@ -461,6 +503,11 @@ class HorusProvider(BaseLLMProvider):
461
503
  local_files_only=self.local_files_only,
462
504
  token=self.token,
463
505
  )
506
+ if self.suppress_native_output:
507
+ if prev_disable_progress is None:
508
+ os.environ.pop("HF_HUB_DISABLE_PROGRESS_BARS", None)
509
+ else:
510
+ os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = prev_disable_progress
464
511
 
465
512
  llama_kwargs: Dict[str, Any] = {
466
513
  "model_path": model_path,
@@ -552,8 +599,11 @@ class HorusProvider(BaseLLMProvider):
552
599
  _refresh_transformers_imports()
553
600
  if not TRANSFORMERS_AVAILABLE:
554
601
  raise ImportError(
555
- "transformers and torch are required for Horus transformers models. "
556
- "Install with: pip install transformers torch"
602
+ "transformers and torch are required for Horus transformers models.\n"
603
+ "Please preinstall once with:\n"
604
+ " pip install \"neuralnode[horus]\"\n"
605
+ "or:\n"
606
+ " pip install transformers torch"
557
607
  )
558
608
  repo_id, auto_subfolder = self._resolve_transformers_source(self.model_id)
559
609
  resolved_subfolder = self.subfolder or auto_subfolder
@@ -587,7 +637,7 @@ class HorusProvider(BaseLLMProvider):
587
637
  "cache_dir": self.cache_dir,
588
638
  "local_files_only": self.local_files_only,
589
639
  "trust_remote_code": self.trust_remote_code,
590
- "torch_dtype": self.torch_dtype,
640
+ "dtype": self.torch_dtype,
591
641
  "low_cpu_mem_usage": self.low_cpu_mem_usage,
592
642
  "use_safetensors": self.use_safetensors,
593
643
  "proxies": self.proxies,
@@ -609,6 +659,9 @@ class HorusProvider(BaseLLMProvider):
609
659
  model_kwargs["device_map"] = self.device_map
610
660
  if self.max_memory:
611
661
  model_kwargs["max_memory"] = self.max_memory
662
+ elif self.device == "cuda" and not self.device_map and not self.load_in_4bit and not self.load_in_8bit:
663
+ # Avoid moving the full safetensors model to GPU in one shot on 16 GB cards.
664
+ model_kwargs["device_map"] = "auto"
612
665
 
613
666
  if self.load_in_4bit or self.load_in_8bit:
614
667
  try:
@@ -634,11 +687,49 @@ class HorusProvider(BaseLLMProvider):
634
687
 
635
688
  try:
636
689
  self.model = AutoModelForCausalLM.from_pretrained(repo_id, **model_kwargs)
690
+ except TypeError as exc:
691
+ # Backward compatibility with older transformers versions.
692
+ if "dtype" in str(exc) and "unexpected keyword" in str(exc):
693
+ fallback_kwargs = dict(model_kwargs)
694
+ fallback_kwargs.pop("dtype", None)
695
+ fallback_kwargs["torch_dtype"] = self.torch_dtype
696
+ try:
697
+ self.model = AutoModelForCausalLM.from_pretrained(repo_id, **fallback_kwargs)
698
+ except Exception as retry_exc:
699
+ raise RuntimeError(
700
+ f"Failed to load Horus transformers model from '{repo_id}'. "
701
+ "Try GGUF for lower VRAM usage or enable 4-bit loading."
702
+ ) from retry_exc
703
+ else:
704
+ raise RuntimeError(
705
+ f"Failed to load Horus transformers model from '{repo_id}'. "
706
+ "This Horus variant may require GGUF runtime; try one of the GGUF model ids."
707
+ ) from exc
637
708
  except Exception as exc:
638
- raise RuntimeError(
639
- f"Failed to load Horus transformers model from '{repo_id}'. "
640
- "This Horus variant may require GGUF runtime; try one of the GGUF model ids."
641
- ) from exc
709
+ if self._is_cuda_oom(exc) and self.device == "cuda":
710
+ self._clear_cuda_cache()
711
+ cpu_fallback_kwargs = dict(model_kwargs)
712
+ cpu_fallback_kwargs.pop("device_map", None)
713
+ cpu_fallback_kwargs.pop("max_memory", None)
714
+ cpu_fallback_kwargs["dtype"] = torch.float32 if torch is not None else None
715
+ try:
716
+ self.device = "cpu"
717
+ self.torch_dtype = torch.float32 if torch is not None else self.torch_dtype
718
+ self.model = AutoModelForCausalLM.from_pretrained(repo_id, **cpu_fallback_kwargs)
719
+ self._warn(
720
+ "Horus CUDA load ran out of memory and fell back to CPU. "
721
+ "Use GGUF or 4-bit loading for better local performance."
722
+ )
723
+ except Exception as cpu_exc:
724
+ raise RuntimeError(
725
+ f"Failed to load Horus transformers model from '{repo_id}' on GPU due to CUDA OOM, "
726
+ "and CPU fallback also failed. Use a GGUF model id or enable 4-bit loading."
727
+ ) from cpu_exc
728
+ else:
729
+ raise RuntimeError(
730
+ f"Failed to load Horus transformers model from '{repo_id}'. "
731
+ "This Horus variant may require GGUF runtime; try one of the GGUF model ids."
732
+ ) from exc
642
733
  if "device_map" not in model_kwargs:
643
734
  self.model = self.model.to(self.device)
644
735
  self.model.eval()
@@ -866,10 +957,54 @@ class HorusProvider(BaseLLMProvider):
866
957
  )
867
958
  return any(marker in q for marker in identity_markers)
868
959
 
960
+ @staticmethod
961
+ def _is_greeting(user_text: str) -> bool:
962
+ q = (user_text or "").strip().lower()
963
+ normalized = re.sub(r"[^\w\u0600-\u06FF\s]", " ", q)
964
+ normalized = re.sub(r"\s+", " ", normalized).strip()
965
+ greeting_markers = {
966
+ "hi",
967
+ "hello",
968
+ "hey",
969
+ "hi there",
970
+ "hello there",
971
+ "good morning",
972
+ "good afternoon",
973
+ "good evening",
974
+ "اهلا",
975
+ "أهلا",
976
+ "مرحبا",
977
+ "السلام عليكم",
978
+ "سلام",
979
+ }
980
+ return normalized in greeting_markers
981
+
982
+ @staticmethod
983
+ def _remove_leading_identity_sentences(text: str) -> str:
984
+ patterns = [
985
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*an ai (?:assistant|model)\s+developed by tokenai\.?\s*",
986
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
987
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus\.?\s*",
988
+ r"^\s*i(?:\s*am|'m)\s+horus,\s*an ai (?:assistant|model)\s+developed by tokenai\.?\s*",
989
+ r"^\s*i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
990
+ r"^\s*i(?:\s*am|'m)\s+horus\.?\s*",
991
+ r"^\s*(?:مرحبا|اهلا|أهلا|السلام عليكم|سلام)[،!,\.\s]+(?:أنا\s+)?horus[^.!\n]*[.!\n]\s*",
992
+ r"^\s*(?:أنا\s+)?horus[^.!\n]*tokenai[^.!\n]*[.!\n]\s*",
993
+ ]
994
+ cleaned = text.strip()
995
+ for pattern in patterns:
996
+ cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE)
997
+ return cleaned.strip()
998
+
869
999
  @staticmethod
870
1000
  def _strip_redundant_identity_prefix(text: str) -> str:
871
1001
  patterns = [
1002
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*an ai (?:assistant|model)\s+developed by tokenai\.?\s*",
1003
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
1004
+ r"^\s*(?:hi|hello|hey)[,!\.\s]+i(?:\s*am|'m)\s+horus\.?\s*",
872
1005
  r"^\s*i(?:\s*am|'m)\s+horus,\s*an ai model developed by tokenai\.?\s*",
1006
+ r"^\s*i(?:\s*am|'m)\s+horus,\s*an ai assistant developed by tokenai\.?\s*",
1007
+ r"^\s*i(?:\s*am|'m)\s+horus,\s*developed by tokenai\.?\s*",
873
1008
  r"^\s*i(?:\s*am|'m)\s+horus\.?\s*",
874
1009
  r"^\s*أنا\s+horus[^.!\n]*[.!\n]\s*",
875
1010
  ]
@@ -878,6 +1013,20 @@ class HorusProvider(BaseLLMProvider):
878
1013
  cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE)
879
1014
  return cleaned.strip() or text
880
1015
 
1016
+ def _postprocess_assistant_text(self, text: str, user_text: str = "") -> str:
1017
+ cleaned = self._clean_generated_text(text)
1018
+ if self._is_identity_question(user_text):
1019
+ return cleaned
1020
+
1021
+ cleaned = self._remove_leading_identity_sentences(cleaned)
1022
+ cleaned = self._strip_redundant_identity_prefix(cleaned)
1023
+
1024
+ if self._is_greeting(user_text) and not cleaned.strip():
1025
+ if re.search(r"[\u0600-\u06FF]", user_text or ""):
1026
+ return "أهلا! كيف يمكنني مساعدتك؟"
1027
+ return "Hello! How can I help you?"
1028
+ return cleaned
1029
+
881
1030
  def chat(
882
1031
  self,
883
1032
  messages: List[Dict[str, Any]],
@@ -934,8 +1083,7 @@ class HorusProvider(BaseLLMProvider):
934
1083
  if m.get("role") == "user":
935
1084
  last_user_message = m.get("content", "")
936
1085
  break
937
- if not self._is_identity_question(last_user_message):
938
- content = self._strip_redundant_identity_prefix(content)
1086
+ content = self._postprocess_assistant_text(content, last_user_message)
939
1087
 
940
1088
  # Parse tool calls from response if tools were provided
941
1089
  tool_calls = []
@@ -964,7 +1112,18 @@ class HorusProvider(BaseLLMProvider):
964
1112
  prompt = self._render_prompt(normalized)
965
1113
 
966
1114
  if self._is_gguf_model_id(self.model_id):
967
- yield StreamingChunk(content=self._generate_gguf_text(prompt, **kwargs), is_finished=True)
1115
+ last_user_message = ""
1116
+ for message in reversed(normalized):
1117
+ if message.get("role") == "user":
1118
+ last_user_message = message.get("content", "")
1119
+ break
1120
+ yield StreamingChunk(
1121
+ content=self._postprocess_assistant_text(
1122
+ self._generate_gguf_text(prompt, **kwargs),
1123
+ last_user_message,
1124
+ ),
1125
+ is_finished=True,
1126
+ )
968
1127
  return
969
1128
 
970
1129
  self.load()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes