quantalogic 0.31.1__tar.gz → 0.33.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. {quantalogic-0.31.1 → quantalogic-0.33.0}/PKG-INFO +14 -1
  2. {quantalogic-0.31.1 → quantalogic-0.33.0}/README.md +13 -0
  3. {quantalogic-0.31.1 → quantalogic-0.33.0}/pyproject.toml +1 -1
  4. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/agent.py +73 -53
  5. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/generative_model.py +7 -3
  6. quantalogic-0.33.0/quantalogic/get_model_info.py +83 -0
  7. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/llm.py +47 -9
  8. quantalogic-0.33.0/quantalogic/model_info.py +12 -0
  9. quantalogic-0.33.0/quantalogic/model_info_list.py +60 -0
  10. quantalogic-0.33.0/quantalogic/model_info_litellm.py +70 -0
  11. quantalogic-0.33.0/quantalogic/prompts.py +116 -0
  12. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/task_runner.py +11 -0
  13. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/replace_in_file_tool.py +1 -0
  14. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/__init__.py +2 -0
  15. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/get_all_models.py +1 -1
  16. quantalogic-0.33.0/quantalogic/utils/lm_studio_model_info.py +48 -0
  17. quantalogic-0.31.1/quantalogic/get_model_info.py +0 -44
  18. quantalogic-0.31.1/quantalogic/prompts.py +0 -119
  19. {quantalogic-0.31.1 → quantalogic-0.33.0}/LICENSE +0 -0
  20. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/__init__.py +0 -0
  21. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/agent_config.py +0 -0
  22. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/agent_factory.py +0 -0
  23. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/coding_agent.py +0 -0
  24. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/config.py +0 -0
  25. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/console_print_events.py +0 -0
  26. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/console_print_token.py +0 -0
  27. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/docs_cli.py +0 -0
  28. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/event_emitter.py +0 -0
  29. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/interactive_text_editor.py +0 -0
  30. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/main.py +0 -0
  31. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/memory.py +0 -0
  32. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/model_names.py +0 -0
  33. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/search_agent.py +0 -0
  34. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/__init__.py +0 -0
  35. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/agent_server.py +0 -0
  36. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/models.py +0 -0
  37. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/routes.py +0 -0
  38. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/state.py +0 -0
  39. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/static/js/event_visualizer.js +0 -0
  40. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/static/js/quantalogic.js +0 -0
  41. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/templates/index.html +0 -0
  42. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/task_file_reader.py +0 -0
  43. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tool_manager.py +0 -0
  44. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/__init__.py +0 -0
  45. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/agent_tool.py +0 -0
  46. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/dalle_e.py +0 -0
  47. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/download_http_file_tool.py +0 -0
  48. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/duckduckgo_search_tool.py +0 -0
  49. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/edit_whole_content_tool.py +0 -0
  50. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/elixir_tool.py +0 -0
  51. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/execute_bash_command_tool.py +0 -0
  52. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/generate_database_report_tool.py +0 -0
  53. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/grep_app_tool.py +0 -0
  54. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/input_question_tool.py +0 -0
  55. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/jinja_tool.py +0 -0
  56. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/__init__.py +0 -0
  57. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/c_handler.py +0 -0
  58. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/cpp_handler.py +0 -0
  59. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/go_handler.py +0 -0
  60. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/java_handler.py +0 -0
  61. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/javascript_handler.py +0 -0
  62. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/python_handler.py +0 -0
  63. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/rust_handler.py +0 -0
  64. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/scala_handler.py +0 -0
  65. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/typescript_handler.py +0 -0
  66. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/list_directory_tool.py +0 -0
  67. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/llm_tool.py +0 -0
  68. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/llm_vision_tool.py +0 -0
  69. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/markitdown_tool.py +0 -0
  70. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/nodejs_tool.py +0 -0
  71. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/python_tool.py +0 -0
  72. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/read_file_block_tool.py +0 -0
  73. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/read_file_tool.py +0 -0
  74. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/read_html_tool.py +0 -0
  75. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/ripgrep_tool.py +0 -0
  76. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/search_definition_names.py +0 -0
  77. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/serpapi_search_tool.py +0 -0
  78. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/sql_query_tool.py +0 -0
  79. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/task_complete_tool.py +0 -0
  80. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/tool.py +0 -0
  81. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/unified_diff_tool.py +0 -0
  82. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/utils/__init__.py +0 -0
  83. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/utils/create_sample_database.py +0 -0
  84. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/utils/generate_database_report.py +0 -0
  85. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/wikipedia_search_tool.py +0 -0
  86. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/write_file_tool.py +0 -0
  87. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/ask_user_validation.py +0 -0
  88. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/check_version.py +0 -0
  89. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/download_http_file.py +0 -0
  90. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/get_coding_environment.py +0 -0
  91. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/get_environment.py +0 -0
  92. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/get_quantalogic_rules_content.py +0 -0
  93. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/git_ls.py +0 -0
  94. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/read_file.py +0 -0
  95. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/read_http_text_content.py +0 -0
  96. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/version.py +0 -0
  97. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/version_check.py +0 -0
  98. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/welcome_message.py +0 -0
  99. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/xml_parser.py +0 -0
  100. {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/xml_tool_parser.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: quantalogic
3
- Version: 0.31.1
3
+ Version: 0.33.0
4
4
  Summary: QuantaLogic ReAct Agents
5
5
  Author: Raphaël MANSUY
6
6
  Author-email: raphael.mansuy@gmail.com
@@ -184,12 +184,25 @@ See our [Release Notes](RELEASE_NOTES.MD) for detailed version history and chang
184
184
  | openrouter/openai/gpt-4o | OPENROUTER_API_KEY | OpenAI's GPT-4o model accessible through OpenRouter platform. |
185
185
  | openrouter/mistralai/mistral-large-2411 | OPENROUTER_API_KEY | Mistral's large model optimized for complex reasoning tasks, available through OpenRouter with enhanced multilingual capabilities. |
186
186
  | mistral/mistral-large-2407 | MISTRAL_API_KEY | Mistral's high-performance model designed for enterprise-grade applications, offering advanced reasoning and multilingual support. |
187
+ | nvidia/deepseek-ai/deepseek-r1 | NVIDIA_API_KEY | NVIDIA's DeepSeek R1 model optimized for high-performance AI tasks and advanced reasoning capabilities. |
188
+ | lm_studio/mistral-small-24b-instruct-2501 | LM_STUDIO_API_KEY | LM Studio's Mistral Small model optimized for local inference with advanced reasoning capabilities. |
187
189
  | dashscope/qwen-max | DASHSCOPE_API_KEY | Alibaba's Qwen-Max model optimized for maximum performance and extensive reasoning capabilities. |
188
190
  | dashscope/qwen-plus | DASHSCOPE_API_KEY | Alibaba's Qwen-Plus model offering balanced performance and cost-efficiency for a variety of tasks. |
189
191
  | dashscope/qwen-turbo | DASHSCOPE_API_KEY | Alibaba's Qwen-Turbo model designed for fast and efficient responses, ideal for high-throughput scenarios. |
190
192
 
191
193
  To configure the environment API key for Quantalogic using LiteLLM, set the required environment variable for your chosen provider and any optional variables like `OPENAI_API_BASE` or `OPENROUTER_REFERRER`. Use a `.env` file or a secrets manager to securely store these keys, and load them in your code using `python-dotenv`. For advanced configurations, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/).
192
194
 
195
+ ### LM Studio Local Setup
196
+
197
+ To use LM Studio with the Mistral model locally, set the following environment variables:
198
+
199
+ ```bash
200
+ export LM_STUDIO_API_BASE="http://localhost:1234/v1"
201
+ export LM_STUDIO_API_KEY="your-api-key-here"
202
+ ```
203
+
204
+ Replace `http://localhost:1234/v1` with your LM Studio server URL and `your-api-key-here` with your actual API key.
205
+
193
206
 
194
207
  ## 📦 Installation
195
208
 
@@ -124,12 +124,25 @@ See our [Release Notes](RELEASE_NOTES.MD) for detailed version history and chang
124
124
  | openrouter/openai/gpt-4o | OPENROUTER_API_KEY | OpenAI's GPT-4o model accessible through OpenRouter platform. |
125
125
  | openrouter/mistralai/mistral-large-2411 | OPENROUTER_API_KEY | Mistral's large model optimized for complex reasoning tasks, available through OpenRouter with enhanced multilingual capabilities. |
126
126
  | mistral/mistral-large-2407 | MISTRAL_API_KEY | Mistral's high-performance model designed for enterprise-grade applications, offering advanced reasoning and multilingual support. |
127
+ | nvidia/deepseek-ai/deepseek-r1 | NVIDIA_API_KEY | NVIDIA's DeepSeek R1 model optimized for high-performance AI tasks and advanced reasoning capabilities. |
128
+ | lm_studio/mistral-small-24b-instruct-2501 | LM_STUDIO_API_KEY | LM Studio's Mistral Small model optimized for local inference with advanced reasoning capabilities. |
127
129
  | dashscope/qwen-max | DASHSCOPE_API_KEY | Alibaba's Qwen-Max model optimized for maximum performance and extensive reasoning capabilities. |
128
130
  | dashscope/qwen-plus | DASHSCOPE_API_KEY | Alibaba's Qwen-Plus model offering balanced performance and cost-efficiency for a variety of tasks. |
129
131
  | dashscope/qwen-turbo | DASHSCOPE_API_KEY | Alibaba's Qwen-Turbo model designed for fast and efficient responses, ideal for high-throughput scenarios. |
130
132
 
131
133
  To configure the environment API key for Quantalogic using LiteLLM, set the required environment variable for your chosen provider and any optional variables like `OPENAI_API_BASE` or `OPENROUTER_REFERRER`. Use a `.env` file or a secrets manager to securely store these keys, and load them in your code using `python-dotenv`. For advanced configurations, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/).
132
134
 
135
+ ### LM Studio Local Setup
136
+
137
+ To use LM Studio with the Mistral model locally, set the following environment variables:
138
+
139
+ ```bash
140
+ export LM_STUDIO_API_BASE="http://localhost:1234/v1"
141
+ export LM_STUDIO_API_KEY="your-api-key-here"
142
+ ```
143
+
144
+ Replace `http://localhost:1234/v1` with your LM Studio server URL and `your-api-key-here` with your actual API key.
145
+
133
146
 
134
147
  ## 📦 Installation
135
148
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "quantalogic"
3
- version = "0.31.1"
3
+ version = "0.33.0"
4
4
  description = "QuantaLogic ReAct Agents"
5
5
  authors = ["Raphaël MANSUY <raphael.mansuy@gmail.com>"]
6
6
  readme = "README.md"
@@ -52,11 +52,7 @@ class ObserveResponseResult(BaseModel):
52
52
  class Agent(BaseModel):
53
53
  """Enhanced QuantaLogic agent implementing ReAct framework."""
54
54
 
55
- model_config = ConfigDict(
56
- arbitrary_types_allowed=True,
57
- validate_assignment=True,
58
- extra="forbid"
59
- )
55
+ model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True, extra="forbid")
60
56
 
61
57
  specific_expertise: str
62
58
  model: GenerativeModel
@@ -95,7 +91,7 @@ class Agent(BaseModel):
95
91
  """Initialize the agent with model, memory, tools, and configurations."""
96
92
  try:
97
93
  logger.debug("Initializing agent...")
98
-
94
+
99
95
  # Create event emitter
100
96
  event_emitter = EventEmitter()
101
97
 
@@ -142,9 +138,9 @@ class Agent(BaseModel):
142
138
  compact_every_n_iterations=compact_every_n_iterations or 30,
143
139
  max_tokens_working_memory=max_tokens_working_memory,
144
140
  )
145
-
141
+
146
142
  self._model_name = model_name
147
-
143
+
148
144
  logger.debug(f"Memory will be compacted every {self.compact_every_n_iterations} iterations")
149
145
  logger.debug(f"Max tokens for working memory set to: {self.max_tokens_working_memory}")
150
146
  logger.debug("Agent initialized successfully.")
@@ -168,7 +164,9 @@ class Agent(BaseModel):
168
164
  """Clear the memory and reset the session."""
169
165
  self._reset_session(clear_memory=True)
170
166
 
171
- def solve_task(self, task: str, max_iterations: int = 30, streaming: bool = False, clear_memory: bool = True) -> str:
167
+ def solve_task(
168
+ self, task: str, max_iterations: int = 30, streaming: bool = False, clear_memory: bool = True
169
+ ) -> str:
172
170
  """Solve the given task using the ReAct framework.
173
171
 
174
172
  Args:
@@ -182,7 +180,7 @@ class Agent(BaseModel):
182
180
  str: The final response after task completion.
183
181
  """
184
182
  logger.debug(f"Solving task... {task}")
185
- self._reset_session(task_to_solve=task, max_iterations=max_iterations,clear_memory=clear_memory)
183
+ self._reset_session(task_to_solve=task, max_iterations=max_iterations, clear_memory=clear_memory)
186
184
 
187
185
  # Generate task summary
188
186
  self.task_to_solve_summary = self._generate_task_summary(task)
@@ -228,7 +226,9 @@ class Agent(BaseModel):
228
226
  # For streaming, collect the response chunks
229
227
  content = ""
230
228
  for chunk in self.model.generate_with_history(
231
- messages_history=self.memory.memory, prompt=current_prompt, streaming=True
229
+ messages_history=self.memory.memory,
230
+ prompt=current_prompt,
231
+ streaming=True,
232
232
  ):
233
233
  content += chunk
234
234
 
@@ -245,7 +245,8 @@ class Agent(BaseModel):
245
245
  )
246
246
  else:
247
247
  result = self.model.generate_with_history(
248
- messages_history=self.memory.memory, prompt=current_prompt, streaming=False
248
+ messages_history=self.memory.memory, prompt=current_prompt, streaming=False,
249
+ stop_words=["thinking"]
249
250
  )
250
251
 
251
252
  content = result.response
@@ -296,7 +297,7 @@ class Agent(BaseModel):
296
297
 
297
298
  return answer
298
299
 
299
- def _reset_session(self, task_to_solve: str = "", max_iterations: int = 30,clear_memory: bool = True):
300
+ def _reset_session(self, task_to_solve: str = "", max_iterations: int = 30, clear_memory: bool = True):
300
301
  """Reset the agent's session."""
301
302
  logger.debug("Resetting session...")
302
303
  self.task_to_solve = task_to_solve
@@ -316,29 +317,30 @@ class Agent(BaseModel):
316
317
  def _compact_memory_if_needed(self, current_prompt: str = ""):
317
318
  """Compacts the memory if it exceeds the maximum occupancy or token limit."""
318
319
  ratio_occupied = self._calculate_context_occupancy()
319
-
320
+
320
321
  # Compact memory if any of these conditions are met:
321
322
  # 1. Memory occupancy exceeds MAX_OCCUPANCY, or
322
323
  # 2. Current iteration is a multiple of compact_every_n_iterations, or
323
324
  # 3. Working memory exceeds max_tokens_working_memory (if set)
324
325
  should_compact_by_occupancy = ratio_occupied >= MAX_OCCUPANCY
325
326
  should_compact_by_iteration = (
326
- self.compact_every_n_iterations is not None and
327
- self.current_iteration > 0 and
328
- self.current_iteration % self.compact_every_n_iterations == 0
327
+ self.compact_every_n_iterations is not None
328
+ and self.current_iteration > 0
329
+ and self.current_iteration % self.compact_every_n_iterations == 0
329
330
  )
330
331
  should_compact_by_token_limit = (
331
- self.max_tokens_working_memory is not None and
332
- self.total_tokens > self.max_tokens_working_memory
332
+ self.max_tokens_working_memory is not None and self.total_tokens > self.max_tokens_working_memory
333
333
  )
334
-
334
+
335
335
  if should_compact_by_occupancy or should_compact_by_iteration or should_compact_by_token_limit:
336
336
  if should_compact_by_occupancy:
337
337
  logger.debug(f"Memory compaction triggered: Occupancy {ratio_occupied}% exceeds {MAX_OCCUPANCY}%")
338
-
338
+
339
339
  if should_compact_by_iteration:
340
- logger.debug(f"Memory compaction triggered: Iteration {self.current_iteration} is a multiple of {self.compact_every_n_iterations}")
341
-
340
+ logger.debug(
341
+ f"Memory compaction triggered: Iteration {self.current_iteration} is a multiple of {self.compact_every_n_iterations}"
342
+ )
343
+
342
344
  self._emit_event("memory_full")
343
345
  self.memory.compact()
344
346
  self.total_tokens = self.model.token_counter_with_history(self.memory.memory, current_prompt)
@@ -399,7 +401,7 @@ class Agent(BaseModel):
399
401
  return self._handle_tool_execution_failure(response)
400
402
 
401
403
  variable_name = self.variable_store.add(response)
402
- new_prompt = self._format_observation_response(response, variable_name, iteration)
404
+ new_prompt = self._format_observation_response(response, executed_tool, variable_name, iteration)
403
405
 
404
406
  return ObserveResponseResult(
405
407
  next_prompt=new_prompt,
@@ -414,7 +416,7 @@ class Agent(BaseModel):
414
416
  """Extract tool usage from the response content."""
415
417
  if not content or not isinstance(content, str):
416
418
  return {}
417
-
419
+
418
420
  xml_parser = ToleranceXMLParser()
419
421
  tool_names = self.tools.tool_names()
420
422
  return xml_parser.extract_elements(text=content, element_names=tool_names)
@@ -461,7 +463,7 @@ class Agent(BaseModel):
461
463
  answer=None,
462
464
  )
463
465
 
464
- def _handle_repeated_tool_call(self, tool_name: str, arguments_with_values: dict) -> (str,str):
466
+ def _handle_repeated_tool_call(self, tool_name: str, arguments_with_values: dict) -> (str, str):
465
467
  """Handle the case where a tool call is repeated."""
466
468
  repeat_count = self.last_tool_call.get("count", 0)
467
469
  error_message = (
@@ -494,7 +496,9 @@ class Agent(BaseModel):
494
496
  answer=None,
495
497
  )
496
498
 
497
- def _format_observation_response(self, response: str, variable_name: str, iteration: int) -> str:
499
+ def _format_observation_response(
500
+ self, response: str, last_exectured_tool: str, variable_name: str, iteration: int
501
+ ) -> str:
498
502
  """Format the observation response with the given response, variable name, and iteration."""
499
503
  response_display = response
500
504
  if len(response) > MAX_RESPONSE_LENGTH:
@@ -504,29 +508,45 @@ class Agent(BaseModel):
504
508
  )
505
509
 
506
510
  # Format the response message
507
- formatted_response = (
508
- f"Your next step: you Must now plan the next tool call to complete the based on this new observation\n"
509
- f"\n--- Observations for iteration {iteration} / max {self.max_iterations} ---\n"
510
- f"\n--- Tool execution result in ${variable_name}$ ---\n"
511
- f"<{variable_name}>\n{response_display}\n</{variable_name}>\n\n"
512
- f"--- Tools ---\n{self._get_tools_names_prompt()}\n"
513
- f"--- Variables ---\n{self._get_variable_prompt()}\n"
514
- "Analyze this response to determine the next steps. If the step failed, reconsider your approach.\n"
515
- f"--- Task to solve summary ---\n{self.task_to_solve_summary}\n"
516
- "--- Format ---\n"
517
- "Respond only with two XML blocks in markdown as specified in system prompt.\n"
518
- "No extra comments must be added.\n"
511
+ formatted_response = formatted_response = (
512
+ "# Analysis and Next Action Decision Point\n\n"
513
+ f"📊 Progress: Iteration {iteration}/{self.max_iterations}\n\n"
514
+ "## Current Context\n"
515
+ f"```\n{self.task_to_solve_summary}```\n\n"
516
+ f"## Latest Tool {last_exectured_tool} Execution Result:\n"
517
+ f"Variable: ${variable_name}$\n"
518
+ f"```\n{response_display}```\n\n"
519
+ "## Available Resources\n"
520
+ f"🛠️ Tools:\n{self._get_tools_names_prompt()}\n\n"
521
+ f"📦 Variables:\n{self._get_variable_prompt()}\n\n"
522
+ "## Your Task\n"
523
+ "1. Analyze the execution result and progress, formalize if the current step is solved according to the task.\n"
524
+ "2. Determine the most effective next step\n"
525
+ "3. Select exactly ONE tool from the available list\n"
526
+ "4. Utilize variable interpolation where needed\n"
527
+ "## Response Requirements\n"
528
+ "Provide TWO markdown-formatted XML blocks:\n"
529
+ "1. Your analysis of the progression resulting from the execution of the tool in <thinking> tags, don't include <context_analysis/>\n"
530
+ "2. Your tool execution plan in <tool_name> tags\n\n"
531
+ "## Response Format\n"
519
532
  "```xml\n"
520
533
  "<thinking>\n"
521
- "...\n"
534
+ "[Detailed analysis of progress, and reasoning for next step]\n"
522
535
  "</thinking>\n"
523
536
  "```\n"
524
537
  "```xml\n"
525
- "< ...tool_name... >\n"
526
- "...\n"
527
- "</ ...tool_name... >\n"
528
- "```"
529
- )
538
+ "<action>\n"
539
+ "<selected_tool_name>\n"
540
+ "[Precise instruction for tool execution]\n"
541
+ "</selected_tool_name>\n"
542
+ "</action>\n"
543
+ "```\n\n"
544
+ "⚠️ Important:\n"
545
+ "- Respond ONLY with the two XML blocks\n"
546
+ "- No additional commentary\n"
547
+ "- If previous step failed, revise approach\n"
548
+ "- Ensure variable interpolation syntax is correct\n"
549
+ "- Utilize the <task_complete> tool to indicate task completion, display the result or if the task is deemed unfeasible.")
530
550
 
531
551
  return formatted_response
532
552
 
@@ -589,10 +609,10 @@ class Agent(BaseModel):
589
609
  arguments_with_values_interpolated = {
590
610
  key: self._interpolate_variables(value) for key, value in arguments_with_values.items()
591
611
  }
592
-
612
+
593
613
  arguments_with_values_interpolated = arguments_with_values_interpolated
594
614
 
595
- # test if tool need variables in context
615
+ # test if tool need variables in context
596
616
  if tool.need_variables:
597
617
  # Inject variables into the tool if needed
598
618
  arguments_with_values_interpolated["variables"] = self.variable_store
@@ -603,8 +623,7 @@ class Agent(BaseModel):
603
623
  try:
604
624
  # Convert arguments to proper types
605
625
  converted_args = self.tools.validate_and_convert_arguments(
606
- tool_name,
607
- arguments_with_values_interpolated
626
+ tool_name, arguments_with_values_interpolated
608
627
  )
609
628
  except ValueError as e:
610
629
  return "", f"Argument Error: {str(e)}"
@@ -637,9 +656,10 @@ class Agent(BaseModel):
637
656
  """Interpolate variables using $var$ syntax in the given text."""
638
657
  try:
639
658
  import re
659
+
640
660
  for var in self.variable_store.keys():
641
661
  # Escape the variable name for regex, but use raw value for replacement
642
- pattern = rf'\${re.escape(var)}\$'
662
+ pattern = rf"\${re.escape(var)}\$"
643
663
  replacement = self.variable_store[var]
644
664
  text = re.sub(pattern, replacement, text)
645
665
  return text
@@ -729,9 +749,7 @@ class Agent(BaseModel):
729
749
  # Remove the last assistant / user message
730
750
  user_message = memory_copy.pop()
731
751
  assistant_message = memory_copy.pop()
732
- summary = self.model.generate_with_history(
733
- messages_history=memory_copy, prompt=prompt_summary
734
- )
752
+ summary = self.model.generate_with_history(messages_history=memory_copy, prompt=prompt_summary)
735
753
  # Remove user message
736
754
  memory_copy.pop()
737
755
  # Replace by summary
@@ -751,6 +769,8 @@ class Agent(BaseModel):
751
769
  str: Generated task summary
752
770
  """
753
771
  try:
772
+ if len(content) < 200:
773
+ return content
754
774
  prompt = (
755
775
  "Create an ultra-concise task summary that captures ONLY: \n"
756
776
  "1. Primary objective/purpose\n"
@@ -123,7 +123,8 @@ class GenerativeModel:
123
123
 
124
124
  # Generate a response with conversation history and optional streaming
125
125
  def generate_with_history(
126
- self, messages_history: list[Message], prompt: str, image_url: str | None = None, streaming: bool = False
126
+ self, messages_history: list[Message], prompt: str, image_url: str | None = None, streaming: bool = False,
127
+ stop_words: list[str] | None = None
127
128
  ) -> ResponseStats:
128
129
  """Generate a response with conversation history and optional image.
129
130
 
@@ -132,6 +133,7 @@ class GenerativeModel:
132
133
  prompt: Current user prompt.
133
134
  image_url: Optional image URL for visual queries.
134
135
  streaming: Whether to stream the response.
136
+ stop_words: Optional list of stop words for streaming
135
137
 
136
138
  Returns:
137
139
  Detailed response statistics or a generator in streaming mode.
@@ -163,6 +165,7 @@ class GenerativeModel:
163
165
  model=self.model,
164
166
  messages=messages,
165
167
  num_retries=MIN_RETRIES,
168
+ stop=stop_words,
166
169
  )
167
170
 
168
171
  token_usage = TokenUsage(
@@ -181,7 +184,7 @@ class GenerativeModel:
181
184
  except Exception as e:
182
185
  self._handle_generation_exception(e)
183
186
 
184
- def _stream_response(self, messages):
187
+ def _stream_response(self, messages, stop_words: list[str] | None = None):
185
188
  """Private method to handle streaming responses."""
186
189
  try:
187
190
  for chunk in generate_completion(
@@ -189,7 +192,8 @@ class GenerativeModel:
189
192
  model=self.model,
190
193
  messages=messages,
191
194
  num_retries=MIN_RETRIES,
192
- stream=True, # Enable streaming
195
+ stream=True, # Enable streaming,
196
+ stop=stop_words,
193
197
  ):
194
198
  if chunk.choices[0].delta.content is not None:
195
199
  self.event_emitter.emit("stream_chunk", chunk.choices[0].delta.content)
@@ -0,0 +1,83 @@
1
+ import loguru
2
+
3
+ from quantalogic.model_info_list import model_info
4
+ from quantalogic.model_info_litellm import litellm_get_model_max_input_tokens, litellm_get_model_max_output_tokens
5
+ from quantalogic.utils.lm_studio_model_info import ModelInfo, get_model_list
6
+
7
+ DEFAULT_MAX_OUTPUT_TOKENS = 4 * 1024 # Reasonable default for most models
8
+ DEFAULT_MAX_INPUT_TOKENS = 32 * 1024 # Reasonable default for most models
9
+
10
+
11
+ def validate_model_name(model_name: str) -> None:
12
+ if not isinstance(model_name, str) or not model_name.strip():
13
+ raise ValueError(f"Invalid model name: {model_name}")
14
+
15
+
16
+ def print_model_info():
17
+ for info in model_info.values():
18
+ print(f"\n{info.model_name}:")
19
+ print(f" Max Input Tokens: {info.max_input_tokens:,}")
20
+ print(f" Max Output Tokens: {info.max_output_tokens:,}")
21
+
22
+
23
+ def get_max_output_tokens(model_name: str) -> int:
24
+ """Get max output tokens with safe fallback"""
25
+ validate_model_name(model_name)
26
+
27
+ if model_name.startswith('lm_studio/'):
28
+ try:
29
+ models = get_model_list()
30
+ for model in models.data:
31
+ if model.id == model_name[len('lm_studio/'):]:
32
+ return model.max_context_length
33
+ except Exception:
34
+ loguru.logger.warning(f"Could not fetch LM Studio model info for {model_name}, using default")
35
+
36
+ if model_name in model_info:
37
+ return model_info[model_name].max_output_tokens
38
+
39
+ try:
40
+ return litellm_get_model_max_output_tokens(model_name)
41
+ except Exception as e:
42
+ loguru.logger.warning(f"Model {model_name} not found in LiteLLM registry, using default")
43
+ return DEFAULT_MAX_OUTPUT_TOKENS
44
+
45
+
46
+ def get_max_input_tokens(model_name: str) -> int:
47
+ """Get max input tokens with safe fallback"""
48
+ validate_model_name(model_name)
49
+
50
+ if model_name.startswith('lm_studio/'):
51
+ try:
52
+ models = get_model_list()
53
+ for model in models.data:
54
+ if model.id == model_name[len('lm_studio/'):]:
55
+ return model.max_context_length
56
+ except Exception:
57
+ loguru.logger.warning(f"Could not fetch LM Studio model info for {model_name}, using default")
58
+
59
+ if model_name in model_info:
60
+ return model_info[model_name].max_input_tokens
61
+
62
+ try:
63
+ return litellm_get_model_max_input_tokens(model_name)
64
+ except Exception:
65
+ loguru.logger.warning(f"Model {model_name} not found in LiteLLM registry, using default")
66
+ return DEFAULT_MAX_INPUT_TOKENS
67
+
68
+
69
+ def get_max_tokens(model_name: str) -> int:
70
+ """Get total maximum tokens (input + output)"""
71
+ validate_model_name(model_name)
72
+
73
+ # Get input and output tokens separately
74
+ input_tokens = get_max_input_tokens(model_name)
75
+ output_tokens = get_max_output_tokens(model_name)
76
+
77
+ return input_tokens + output_tokens
78
+
79
+
80
+ if __name__ == "__main__":
81
+ print_model_info()
82
+ print(get_max_input_tokens("gpt-4o-mini"))
83
+ print(get_max_output_tokens("openrouter/openai/gpt-4o-mini"))
@@ -30,18 +30,56 @@ def get_model_info(model_name: str) -> dict | None:
30
30
  return model_info.get(model_name, None)
31
31
 
32
32
 
33
+ class ModelProviderConfig:
34
+ def __init__(self, prefix: str, provider: str, base_url: str, env_var: str):
35
+ self.prefix = prefix
36
+ self.provider = provider
37
+ self.base_url = base_url
38
+ self.env_var = env_var
39
+
40
+ def configure(self, model: str, kwargs: Dict[str, Any]) -> None:
41
+ kwargs["model"] = model.replace(self.prefix, "")
42
+ kwargs["custom_llm_provider"] = self.provider
43
+ kwargs["base_url"] = self.base_url
44
+ api_key = os.getenv(self.env_var)
45
+ if not api_key:
46
+ raise ValueError(f"{self.env_var} is not set in the environment variables.")
47
+ kwargs["api_key"] = api_key
48
+
49
+
50
+ # Default provider configurations
51
+ PROVIDERS = {
52
+ "dashscope": ModelProviderConfig(
53
+ prefix="dashscope/",
54
+ provider="openai",
55
+ base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
56
+ env_var="DASHSCOPE_API_KEY"
57
+ ),
58
+ "nvidia": ModelProviderConfig(
59
+ prefix="nvidia/",
60
+ provider="openai",
61
+ base_url="https://integrate.api.nvidia.com/v1",
62
+ env_var="NVIDIA_API_KEY"
63
+ ),
64
+ "ovh": ModelProviderConfig(
65
+ prefix="ovh/",
66
+ provider="openai",
67
+ base_url="https://deepseek-r1-distill-llama-70b.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1",
68
+ env_var="OVH_API_KEY"
69
+ )
70
+ }
71
+
72
+
33
73
  def generate_completion(**kwargs: Dict[str, Any]) -> Any:
34
74
  """Wraps litellm completion with proper type hints."""
35
75
  model = kwargs.get("model", "")
36
- if model.startswith("dashscope/"):
37
- # Remove prefix and configure for OpenAI-compatible endpoint
38
- kwargs["model"] = model.replace("dashscope/", "")
39
- kwargs["custom_llm_provider"] = "openai" # Explicitly specify OpenAI provider
40
- kwargs["base_url"] = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
41
- api_key = os.getenv("DASHSCOPE_API_KEY")
42
- if not api_key:
43
- raise ValueError("DASHSCOPE_API_KEY is not set in the environment variables.")
44
- kwargs["api_key"] = api_key
76
+
77
+ # Find matching provider
78
+ for provider_name, provider_config in PROVIDERS.items():
79
+ if model.startswith(provider_config.prefix):
80
+ provider_config.configure(model, kwargs)
81
+ break
82
+
45
83
  return completion(**kwargs)
46
84
 
47
85
 
@@ -0,0 +1,12 @@
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class ModelInfo(BaseModel):
5
+ model_name: str
6
+ max_input_tokens: int
7
+ max_output_tokens: int
8
+ max_cot_tokens: int | None = None
9
+
10
+
11
+ class ModelNotFoundError(Exception):
12
+ """Raised when a model is not found in local registry"""
@@ -0,0 +1,60 @@
1
+ from quantalogic.model_info import ModelInfo
2
+
3
+ model_info = {
4
+ "dashscope/qwen-max": ModelInfo(
5
+ model_name="dashscope/qwen-max",
6
+ max_output_tokens=8 * 1024,
7
+ max_input_tokens=32 * 1024,
8
+ ),
9
+ "dashscope/qwen-plus": ModelInfo(
10
+ model_name="dashscope/qwen-plus",
11
+ max_output_tokens=8 * 1024,
12
+ max_input_tokens=131072,
13
+ ),
14
+ "dashscope/qwen-turbo": ModelInfo(
15
+ model_name="dashscope/qwen-turbo",
16
+ max_output_tokens=8 * 1024,
17
+ max_input_tokens=1000000,
18
+ ),
19
+ "deepseek-reasoner": ModelInfo(
20
+ model_name="deepseek-reasoner",
21
+ max_output_tokens=8 * 1024,
22
+ max_input_tokens=1024 * 128,
23
+ ),
24
+ "openrouter/deepseek/deepseek-r1": ModelInfo(
25
+ model_name="openrouter/deepseek/deepseek-r1",
26
+ max_output_tokens=8 * 1024,
27
+ max_input_tokens=1024 * 128,
28
+ ),
29
+ "openrouter/mistralai/mistral-large-2411": ModelInfo(
30
+ model_name="openrouter/mistralai/mistral-large-2411",
31
+ max_output_tokens=128 * 1024,
32
+ max_input_tokens=1024 * 128,
33
+ ),
34
+ "mistralai/mistral-large-2411": ModelInfo(
35
+ model_name="mistralai/mistral-large-2411",
36
+ max_output_tokens=128 * 1024,
37
+ max_input_tokens=1024 * 128,
38
+ ),
39
+ "deepseek/deepseek-chat": ModelInfo(
40
+ model_name="deepseek/deepseek-chat",
41
+ max_output_tokens=8 * 1024,
42
+ max_input_tokens=1024 * 64,
43
+ ),
44
+ "deepseek/deepseek-reasoner": ModelInfo(
45
+ model_name="deepseek/deepseek-reasoner",
46
+ max_output_tokens=8 * 1024,
47
+ max_input_tokens=1024 * 64,
48
+ max_cot_tokens=1024 * 32,
49
+ ),
50
+ "nvidia/deepseek-ai/deepseek-r1": ModelInfo(
51
+ model_name="nvidia/deepseek-ai/deepseek-r1",
52
+ max_output_tokens=8 * 1024,
53
+ max_input_tokens=1024 * 64,
54
+ ),
55
+ "ovh/DeepSeek-R1-Distill-Llama-70B": ModelInfo(
56
+ model_name="ovh/DeepSeek-R1-Distill-Llama-70B",
57
+ max_output_tokens=8 * 1024,
58
+ max_input_tokens=1024 * 64,
59
+ ),
60
+ }