quantalogic 0.31.1__tar.gz → 0.33.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quantalogic-0.31.1 → quantalogic-0.33.0}/PKG-INFO +14 -1
- {quantalogic-0.31.1 → quantalogic-0.33.0}/README.md +13 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/pyproject.toml +1 -1
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/agent.py +73 -53
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/generative_model.py +7 -3
- quantalogic-0.33.0/quantalogic/get_model_info.py +83 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/llm.py +47 -9
- quantalogic-0.33.0/quantalogic/model_info.py +12 -0
- quantalogic-0.33.0/quantalogic/model_info_list.py +60 -0
- quantalogic-0.33.0/quantalogic/model_info_litellm.py +70 -0
- quantalogic-0.33.0/quantalogic/prompts.py +116 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/task_runner.py +11 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/replace_in_file_tool.py +1 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/__init__.py +2 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/get_all_models.py +1 -1
- quantalogic-0.33.0/quantalogic/utils/lm_studio_model_info.py +48 -0
- quantalogic-0.31.1/quantalogic/get_model_info.py +0 -44
- quantalogic-0.31.1/quantalogic/prompts.py +0 -119
- {quantalogic-0.31.1 → quantalogic-0.33.0}/LICENSE +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/__init__.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/agent_config.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/agent_factory.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/coding_agent.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/config.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/console_print_events.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/console_print_token.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/docs_cli.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/event_emitter.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/interactive_text_editor.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/main.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/memory.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/model_names.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/search_agent.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/__init__.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/agent_server.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/models.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/routes.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/state.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/static/js/event_visualizer.js +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/static/js/quantalogic.js +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/server/templates/index.html +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/task_file_reader.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tool_manager.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/__init__.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/agent_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/dalle_e.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/download_http_file_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/duckduckgo_search_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/edit_whole_content_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/elixir_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/execute_bash_command_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/generate_database_report_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/grep_app_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/input_question_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/jinja_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/__init__.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/c_handler.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/cpp_handler.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/go_handler.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/java_handler.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/javascript_handler.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/python_handler.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/rust_handler.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/scala_handler.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/language_handlers/typescript_handler.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/list_directory_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/llm_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/llm_vision_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/markitdown_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/nodejs_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/python_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/read_file_block_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/read_file_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/read_html_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/ripgrep_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/search_definition_names.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/serpapi_search_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/sql_query_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/task_complete_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/unified_diff_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/utils/__init__.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/utils/create_sample_database.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/utils/generate_database_report.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/wikipedia_search_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/tools/write_file_tool.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/ask_user_validation.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/check_version.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/download_http_file.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/get_coding_environment.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/get_environment.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/get_quantalogic_rules_content.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/git_ls.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/read_file.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/utils/read_http_text_content.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/version.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/version_check.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/welcome_message.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/xml_parser.py +0 -0
- {quantalogic-0.31.1 → quantalogic-0.33.0}/quantalogic/xml_tool_parser.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: quantalogic
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.33.0
|
4
4
|
Summary: QuantaLogic ReAct Agents
|
5
5
|
Author: Raphaël MANSUY
|
6
6
|
Author-email: raphael.mansuy@gmail.com
|
@@ -184,12 +184,25 @@ See our [Release Notes](RELEASE_NOTES.MD) for detailed version history and chang
|
|
184
184
|
| openrouter/openai/gpt-4o | OPENROUTER_API_KEY | OpenAI's GPT-4o model accessible through OpenRouter platform. |
|
185
185
|
| openrouter/mistralai/mistral-large-2411 | OPENROUTER_API_KEY | Mistral's large model optimized for complex reasoning tasks, available through OpenRouter with enhanced multilingual capabilities. |
|
186
186
|
| mistral/mistral-large-2407 | MISTRAL_API_KEY | Mistral's high-performance model designed for enterprise-grade applications, offering advanced reasoning and multilingual support. |
|
187
|
+
| nvidia/deepseek-ai/deepseek-r1 | NVIDIA_API_KEY | NVIDIA's DeepSeek R1 model optimized for high-performance AI tasks and advanced reasoning capabilities. |
|
188
|
+
| lm_studio/mistral-small-24b-instruct-2501 | LM_STUDIO_API_KEY | LM Studio's Mistral Small model optimized for local inference with advanced reasoning capabilities. |
|
187
189
|
| dashscope/qwen-max | DASHSCOPE_API_KEY | Alibaba's Qwen-Max model optimized for maximum performance and extensive reasoning capabilities. |
|
188
190
|
| dashscope/qwen-plus | DASHSCOPE_API_KEY | Alibaba's Qwen-Plus model offering balanced performance and cost-efficiency for a variety of tasks. |
|
189
191
|
| dashscope/qwen-turbo | DASHSCOPE_API_KEY | Alibaba's Qwen-Turbo model designed for fast and efficient responses, ideal for high-throughput scenarios. |
|
190
192
|
|
191
193
|
To configure the environment API key for Quantalogic using LiteLLM, set the required environment variable for your chosen provider and any optional variables like `OPENAI_API_BASE` or `OPENROUTER_REFERRER`. Use a `.env` file or a secrets manager to securely store these keys, and load them in your code using `python-dotenv`. For advanced configurations, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/).
|
192
194
|
|
195
|
+
### LM Studio Local Setup
|
196
|
+
|
197
|
+
To use LM Studio with the Mistral model locally, set the following environment variables:
|
198
|
+
|
199
|
+
```bash
|
200
|
+
export LM_STUDIO_API_BASE="http://localhost:1234/v1"
|
201
|
+
export LM_STUDIO_API_KEY="your-api-key-here"
|
202
|
+
```
|
203
|
+
|
204
|
+
Replace `http://localhost:1234/v1` with your LM Studio server URL and `your-api-key-here` with your actual API key.
|
205
|
+
|
193
206
|
|
194
207
|
## 📦 Installation
|
195
208
|
|
@@ -124,12 +124,25 @@ See our [Release Notes](RELEASE_NOTES.MD) for detailed version history and chang
|
|
124
124
|
| openrouter/openai/gpt-4o | OPENROUTER_API_KEY | OpenAI's GPT-4o model accessible through OpenRouter platform. |
|
125
125
|
| openrouter/mistralai/mistral-large-2411 | OPENROUTER_API_KEY | Mistral's large model optimized for complex reasoning tasks, available through OpenRouter with enhanced multilingual capabilities. |
|
126
126
|
| mistral/mistral-large-2407 | MISTRAL_API_KEY | Mistral's high-performance model designed for enterprise-grade applications, offering advanced reasoning and multilingual support. |
|
127
|
+
| nvidia/deepseek-ai/deepseek-r1 | NVIDIA_API_KEY | NVIDIA's DeepSeek R1 model optimized for high-performance AI tasks and advanced reasoning capabilities. |
|
128
|
+
| lm_studio/mistral-small-24b-instruct-2501 | LM_STUDIO_API_KEY | LM Studio's Mistral Small model optimized for local inference with advanced reasoning capabilities. |
|
127
129
|
| dashscope/qwen-max | DASHSCOPE_API_KEY | Alibaba's Qwen-Max model optimized for maximum performance and extensive reasoning capabilities. |
|
128
130
|
| dashscope/qwen-plus | DASHSCOPE_API_KEY | Alibaba's Qwen-Plus model offering balanced performance and cost-efficiency for a variety of tasks. |
|
129
131
|
| dashscope/qwen-turbo | DASHSCOPE_API_KEY | Alibaba's Qwen-Turbo model designed for fast and efficient responses, ideal for high-throughput scenarios. |
|
130
132
|
|
131
133
|
To configure the environment API key for Quantalogic using LiteLLM, set the required environment variable for your chosen provider and any optional variables like `OPENAI_API_BASE` or `OPENROUTER_REFERRER`. Use a `.env` file or a secrets manager to securely store these keys, and load them in your code using `python-dotenv`. For advanced configurations, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/).
|
132
134
|
|
135
|
+
### LM Studio Local Setup
|
136
|
+
|
137
|
+
To use LM Studio with the Mistral model locally, set the following environment variables:
|
138
|
+
|
139
|
+
```bash
|
140
|
+
export LM_STUDIO_API_BASE="http://localhost:1234/v1"
|
141
|
+
export LM_STUDIO_API_KEY="your-api-key-here"
|
142
|
+
```
|
143
|
+
|
144
|
+
Replace `http://localhost:1234/v1` with your LM Studio server URL and `your-api-key-here` with your actual API key.
|
145
|
+
|
133
146
|
|
134
147
|
## 📦 Installation
|
135
148
|
|
@@ -52,11 +52,7 @@ class ObserveResponseResult(BaseModel):
|
|
52
52
|
class Agent(BaseModel):
|
53
53
|
"""Enhanced QuantaLogic agent implementing ReAct framework."""
|
54
54
|
|
55
|
-
model_config = ConfigDict(
|
56
|
-
arbitrary_types_allowed=True,
|
57
|
-
validate_assignment=True,
|
58
|
-
extra="forbid"
|
59
|
-
)
|
55
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True, extra="forbid")
|
60
56
|
|
61
57
|
specific_expertise: str
|
62
58
|
model: GenerativeModel
|
@@ -95,7 +91,7 @@ class Agent(BaseModel):
|
|
95
91
|
"""Initialize the agent with model, memory, tools, and configurations."""
|
96
92
|
try:
|
97
93
|
logger.debug("Initializing agent...")
|
98
|
-
|
94
|
+
|
99
95
|
# Create event emitter
|
100
96
|
event_emitter = EventEmitter()
|
101
97
|
|
@@ -142,9 +138,9 @@ class Agent(BaseModel):
|
|
142
138
|
compact_every_n_iterations=compact_every_n_iterations or 30,
|
143
139
|
max_tokens_working_memory=max_tokens_working_memory,
|
144
140
|
)
|
145
|
-
|
141
|
+
|
146
142
|
self._model_name = model_name
|
147
|
-
|
143
|
+
|
148
144
|
logger.debug(f"Memory will be compacted every {self.compact_every_n_iterations} iterations")
|
149
145
|
logger.debug(f"Max tokens for working memory set to: {self.max_tokens_working_memory}")
|
150
146
|
logger.debug("Agent initialized successfully.")
|
@@ -168,7 +164,9 @@ class Agent(BaseModel):
|
|
168
164
|
"""Clear the memory and reset the session."""
|
169
165
|
self._reset_session(clear_memory=True)
|
170
166
|
|
171
|
-
def solve_task(
|
167
|
+
def solve_task(
|
168
|
+
self, task: str, max_iterations: int = 30, streaming: bool = False, clear_memory: bool = True
|
169
|
+
) -> str:
|
172
170
|
"""Solve the given task using the ReAct framework.
|
173
171
|
|
174
172
|
Args:
|
@@ -182,7 +180,7 @@ class Agent(BaseModel):
|
|
182
180
|
str: The final response after task completion.
|
183
181
|
"""
|
184
182
|
logger.debug(f"Solving task... {task}")
|
185
|
-
self._reset_session(task_to_solve=task, max_iterations=max_iterations,clear_memory=clear_memory)
|
183
|
+
self._reset_session(task_to_solve=task, max_iterations=max_iterations, clear_memory=clear_memory)
|
186
184
|
|
187
185
|
# Generate task summary
|
188
186
|
self.task_to_solve_summary = self._generate_task_summary(task)
|
@@ -228,7 +226,9 @@ class Agent(BaseModel):
|
|
228
226
|
# For streaming, collect the response chunks
|
229
227
|
content = ""
|
230
228
|
for chunk in self.model.generate_with_history(
|
231
|
-
messages_history=self.memory.memory,
|
229
|
+
messages_history=self.memory.memory,
|
230
|
+
prompt=current_prompt,
|
231
|
+
streaming=True,
|
232
232
|
):
|
233
233
|
content += chunk
|
234
234
|
|
@@ -245,7 +245,8 @@ class Agent(BaseModel):
|
|
245
245
|
)
|
246
246
|
else:
|
247
247
|
result = self.model.generate_with_history(
|
248
|
-
messages_history=self.memory.memory, prompt=current_prompt, streaming=False
|
248
|
+
messages_history=self.memory.memory, prompt=current_prompt, streaming=False,
|
249
|
+
stop_words=["thinking"]
|
249
250
|
)
|
250
251
|
|
251
252
|
content = result.response
|
@@ -296,7 +297,7 @@ class Agent(BaseModel):
|
|
296
297
|
|
297
298
|
return answer
|
298
299
|
|
299
|
-
def _reset_session(self, task_to_solve: str = "", max_iterations: int = 30,clear_memory: bool = True):
|
300
|
+
def _reset_session(self, task_to_solve: str = "", max_iterations: int = 30, clear_memory: bool = True):
|
300
301
|
"""Reset the agent's session."""
|
301
302
|
logger.debug("Resetting session...")
|
302
303
|
self.task_to_solve = task_to_solve
|
@@ -316,29 +317,30 @@ class Agent(BaseModel):
|
|
316
317
|
def _compact_memory_if_needed(self, current_prompt: str = ""):
|
317
318
|
"""Compacts the memory if it exceeds the maximum occupancy or token limit."""
|
318
319
|
ratio_occupied = self._calculate_context_occupancy()
|
319
|
-
|
320
|
+
|
320
321
|
# Compact memory if any of these conditions are met:
|
321
322
|
# 1. Memory occupancy exceeds MAX_OCCUPANCY, or
|
322
323
|
# 2. Current iteration is a multiple of compact_every_n_iterations, or
|
323
324
|
# 3. Working memory exceeds max_tokens_working_memory (if set)
|
324
325
|
should_compact_by_occupancy = ratio_occupied >= MAX_OCCUPANCY
|
325
326
|
should_compact_by_iteration = (
|
326
|
-
self.compact_every_n_iterations is not None
|
327
|
-
self.current_iteration > 0
|
328
|
-
self.current_iteration % self.compact_every_n_iterations == 0
|
327
|
+
self.compact_every_n_iterations is not None
|
328
|
+
and self.current_iteration > 0
|
329
|
+
and self.current_iteration % self.compact_every_n_iterations == 0
|
329
330
|
)
|
330
331
|
should_compact_by_token_limit = (
|
331
|
-
self.max_tokens_working_memory is not None and
|
332
|
-
self.total_tokens > self.max_tokens_working_memory
|
332
|
+
self.max_tokens_working_memory is not None and self.total_tokens > self.max_tokens_working_memory
|
333
333
|
)
|
334
|
-
|
334
|
+
|
335
335
|
if should_compact_by_occupancy or should_compact_by_iteration or should_compact_by_token_limit:
|
336
336
|
if should_compact_by_occupancy:
|
337
337
|
logger.debug(f"Memory compaction triggered: Occupancy {ratio_occupied}% exceeds {MAX_OCCUPANCY}%")
|
338
|
-
|
338
|
+
|
339
339
|
if should_compact_by_iteration:
|
340
|
-
logger.debug(
|
341
|
-
|
340
|
+
logger.debug(
|
341
|
+
f"Memory compaction triggered: Iteration {self.current_iteration} is a multiple of {self.compact_every_n_iterations}"
|
342
|
+
)
|
343
|
+
|
342
344
|
self._emit_event("memory_full")
|
343
345
|
self.memory.compact()
|
344
346
|
self.total_tokens = self.model.token_counter_with_history(self.memory.memory, current_prompt)
|
@@ -399,7 +401,7 @@ class Agent(BaseModel):
|
|
399
401
|
return self._handle_tool_execution_failure(response)
|
400
402
|
|
401
403
|
variable_name = self.variable_store.add(response)
|
402
|
-
new_prompt = self._format_observation_response(response, variable_name, iteration)
|
404
|
+
new_prompt = self._format_observation_response(response, executed_tool, variable_name, iteration)
|
403
405
|
|
404
406
|
return ObserveResponseResult(
|
405
407
|
next_prompt=new_prompt,
|
@@ -414,7 +416,7 @@ class Agent(BaseModel):
|
|
414
416
|
"""Extract tool usage from the response content."""
|
415
417
|
if not content or not isinstance(content, str):
|
416
418
|
return {}
|
417
|
-
|
419
|
+
|
418
420
|
xml_parser = ToleranceXMLParser()
|
419
421
|
tool_names = self.tools.tool_names()
|
420
422
|
return xml_parser.extract_elements(text=content, element_names=tool_names)
|
@@ -461,7 +463,7 @@ class Agent(BaseModel):
|
|
461
463
|
answer=None,
|
462
464
|
)
|
463
465
|
|
464
|
-
def _handle_repeated_tool_call(self, tool_name: str, arguments_with_values: dict) -> (str,str):
|
466
|
+
def _handle_repeated_tool_call(self, tool_name: str, arguments_with_values: dict) -> (str, str):
|
465
467
|
"""Handle the case where a tool call is repeated."""
|
466
468
|
repeat_count = self.last_tool_call.get("count", 0)
|
467
469
|
error_message = (
|
@@ -494,7 +496,9 @@ class Agent(BaseModel):
|
|
494
496
|
answer=None,
|
495
497
|
)
|
496
498
|
|
497
|
-
def _format_observation_response(
|
499
|
+
def _format_observation_response(
|
500
|
+
self, response: str, last_exectured_tool: str, variable_name: str, iteration: int
|
501
|
+
) -> str:
|
498
502
|
"""Format the observation response with the given response, variable name, and iteration."""
|
499
503
|
response_display = response
|
500
504
|
if len(response) > MAX_RESPONSE_LENGTH:
|
@@ -504,29 +508,45 @@ class Agent(BaseModel):
|
|
504
508
|
)
|
505
509
|
|
506
510
|
# Format the response message
|
507
|
-
formatted_response = (
|
508
|
-
|
509
|
-
f"
|
510
|
-
|
511
|
-
f"
|
512
|
-
f"
|
513
|
-
f"
|
514
|
-
"
|
515
|
-
|
516
|
-
"
|
517
|
-
"
|
518
|
-
"
|
511
|
+
formatted_response = formatted_response = (
|
512
|
+
"# Analysis and Next Action Decision Point\n\n"
|
513
|
+
f"📊 Progress: Iteration {iteration}/{self.max_iterations}\n\n"
|
514
|
+
"## Current Context\n"
|
515
|
+
f"```\n{self.task_to_solve_summary}```\n\n"
|
516
|
+
f"## Latest Tool {last_exectured_tool} Execution Result:\n"
|
517
|
+
f"Variable: ${variable_name}$\n"
|
518
|
+
f"```\n{response_display}```\n\n"
|
519
|
+
"## Available Resources\n"
|
520
|
+
f"🛠️ Tools:\n{self._get_tools_names_prompt()}\n\n"
|
521
|
+
f"📦 Variables:\n{self._get_variable_prompt()}\n\n"
|
522
|
+
"## Your Task\n"
|
523
|
+
"1. Analyze the execution result and progress, formalize if the current step is solved according to the task.\n"
|
524
|
+
"2. Determine the most effective next step\n"
|
525
|
+
"3. Select exactly ONE tool from the available list\n"
|
526
|
+
"4. Utilize variable interpolation where needed\n"
|
527
|
+
"## Response Requirements\n"
|
528
|
+
"Provide TWO markdown-formatted XML blocks:\n"
|
529
|
+
"1. Your analysis of the progression resulting from the execution of the tool in <thinking> tags, don't include <context_analysis/>\n"
|
530
|
+
"2. Your tool execution plan in <tool_name> tags\n\n"
|
531
|
+
"## Response Format\n"
|
519
532
|
"```xml\n"
|
520
533
|
"<thinking>\n"
|
521
|
-
"
|
534
|
+
"[Detailed analysis of progress, and reasoning for next step]\n"
|
522
535
|
"</thinking>\n"
|
523
536
|
"```\n"
|
524
537
|
"```xml\n"
|
525
|
-
"<
|
526
|
-
"
|
527
|
-
"
|
528
|
-
"
|
529
|
-
|
538
|
+
"<action>\n"
|
539
|
+
"<selected_tool_name>\n"
|
540
|
+
"[Precise instruction for tool execution]\n"
|
541
|
+
"</selected_tool_name>\n"
|
542
|
+
"</action>\n"
|
543
|
+
"```\n\n"
|
544
|
+
"⚠️ Important:\n"
|
545
|
+
"- Respond ONLY with the two XML blocks\n"
|
546
|
+
"- No additional commentary\n"
|
547
|
+
"- If previous step failed, revise approach\n"
|
548
|
+
"- Ensure variable interpolation syntax is correct\n"
|
549
|
+
"- Utilize the <task_complete> tool to indicate task completion, display the result or if the task is deemed unfeasible.")
|
530
550
|
|
531
551
|
return formatted_response
|
532
552
|
|
@@ -589,10 +609,10 @@ class Agent(BaseModel):
|
|
589
609
|
arguments_with_values_interpolated = {
|
590
610
|
key: self._interpolate_variables(value) for key, value in arguments_with_values.items()
|
591
611
|
}
|
592
|
-
|
612
|
+
|
593
613
|
arguments_with_values_interpolated = arguments_with_values_interpolated
|
594
614
|
|
595
|
-
# test if tool need variables in context
|
615
|
+
# test if tool need variables in context
|
596
616
|
if tool.need_variables:
|
597
617
|
# Inject variables into the tool if needed
|
598
618
|
arguments_with_values_interpolated["variables"] = self.variable_store
|
@@ -603,8 +623,7 @@ class Agent(BaseModel):
|
|
603
623
|
try:
|
604
624
|
# Convert arguments to proper types
|
605
625
|
converted_args = self.tools.validate_and_convert_arguments(
|
606
|
-
tool_name,
|
607
|
-
arguments_with_values_interpolated
|
626
|
+
tool_name, arguments_with_values_interpolated
|
608
627
|
)
|
609
628
|
except ValueError as e:
|
610
629
|
return "", f"Argument Error: {str(e)}"
|
@@ -637,9 +656,10 @@ class Agent(BaseModel):
|
|
637
656
|
"""Interpolate variables using $var$ syntax in the given text."""
|
638
657
|
try:
|
639
658
|
import re
|
659
|
+
|
640
660
|
for var in self.variable_store.keys():
|
641
661
|
# Escape the variable name for regex, but use raw value for replacement
|
642
|
-
pattern = rf
|
662
|
+
pattern = rf"\${re.escape(var)}\$"
|
643
663
|
replacement = self.variable_store[var]
|
644
664
|
text = re.sub(pattern, replacement, text)
|
645
665
|
return text
|
@@ -729,9 +749,7 @@ class Agent(BaseModel):
|
|
729
749
|
# Remove the last assistant / user message
|
730
750
|
user_message = memory_copy.pop()
|
731
751
|
assistant_message = memory_copy.pop()
|
732
|
-
summary = self.model.generate_with_history(
|
733
|
-
messages_history=memory_copy, prompt=prompt_summary
|
734
|
-
)
|
752
|
+
summary = self.model.generate_with_history(messages_history=memory_copy, prompt=prompt_summary)
|
735
753
|
# Remove user message
|
736
754
|
memory_copy.pop()
|
737
755
|
# Replace by summary
|
@@ -751,6 +769,8 @@ class Agent(BaseModel):
|
|
751
769
|
str: Generated task summary
|
752
770
|
"""
|
753
771
|
try:
|
772
|
+
if len(content) < 200:
|
773
|
+
return content
|
754
774
|
prompt = (
|
755
775
|
"Create an ultra-concise task summary that captures ONLY: \n"
|
756
776
|
"1. Primary objective/purpose\n"
|
@@ -123,7 +123,8 @@ class GenerativeModel:
|
|
123
123
|
|
124
124
|
# Generate a response with conversation history and optional streaming
|
125
125
|
def generate_with_history(
|
126
|
-
self, messages_history: list[Message], prompt: str, image_url: str | None = None, streaming: bool = False
|
126
|
+
self, messages_history: list[Message], prompt: str, image_url: str | None = None, streaming: bool = False,
|
127
|
+
stop_words: list[str] | None = None
|
127
128
|
) -> ResponseStats:
|
128
129
|
"""Generate a response with conversation history and optional image.
|
129
130
|
|
@@ -132,6 +133,7 @@ class GenerativeModel:
|
|
132
133
|
prompt: Current user prompt.
|
133
134
|
image_url: Optional image URL for visual queries.
|
134
135
|
streaming: Whether to stream the response.
|
136
|
+
stop_words: Optional list of stop words for streaming
|
135
137
|
|
136
138
|
Returns:
|
137
139
|
Detailed response statistics or a generator in streaming mode.
|
@@ -163,6 +165,7 @@ class GenerativeModel:
|
|
163
165
|
model=self.model,
|
164
166
|
messages=messages,
|
165
167
|
num_retries=MIN_RETRIES,
|
168
|
+
stop=stop_words,
|
166
169
|
)
|
167
170
|
|
168
171
|
token_usage = TokenUsage(
|
@@ -181,7 +184,7 @@ class GenerativeModel:
|
|
181
184
|
except Exception as e:
|
182
185
|
self._handle_generation_exception(e)
|
183
186
|
|
184
|
-
def _stream_response(self, messages):
|
187
|
+
def _stream_response(self, messages, stop_words: list[str] | None = None):
|
185
188
|
"""Private method to handle streaming responses."""
|
186
189
|
try:
|
187
190
|
for chunk in generate_completion(
|
@@ -189,7 +192,8 @@ class GenerativeModel:
|
|
189
192
|
model=self.model,
|
190
193
|
messages=messages,
|
191
194
|
num_retries=MIN_RETRIES,
|
192
|
-
stream=True, # Enable streaming
|
195
|
+
stream=True, # Enable streaming,
|
196
|
+
stop=stop_words,
|
193
197
|
):
|
194
198
|
if chunk.choices[0].delta.content is not None:
|
195
199
|
self.event_emitter.emit("stream_chunk", chunk.choices[0].delta.content)
|
@@ -0,0 +1,83 @@
|
|
1
|
+
import loguru
|
2
|
+
|
3
|
+
from quantalogic.model_info_list import model_info
|
4
|
+
from quantalogic.model_info_litellm import litellm_get_model_max_input_tokens, litellm_get_model_max_output_tokens
|
5
|
+
from quantalogic.utils.lm_studio_model_info import ModelInfo, get_model_list
|
6
|
+
|
7
|
+
DEFAULT_MAX_OUTPUT_TOKENS = 4 * 1024 # Reasonable default for most models
|
8
|
+
DEFAULT_MAX_INPUT_TOKENS = 32 * 1024 # Reasonable default for most models
|
9
|
+
|
10
|
+
|
11
|
+
def validate_model_name(model_name: str) -> None:
|
12
|
+
if not isinstance(model_name, str) or not model_name.strip():
|
13
|
+
raise ValueError(f"Invalid model name: {model_name}")
|
14
|
+
|
15
|
+
|
16
|
+
def print_model_info():
|
17
|
+
for info in model_info.values():
|
18
|
+
print(f"\n{info.model_name}:")
|
19
|
+
print(f" Max Input Tokens: {info.max_input_tokens:,}")
|
20
|
+
print(f" Max Output Tokens: {info.max_output_tokens:,}")
|
21
|
+
|
22
|
+
|
23
|
+
def get_max_output_tokens(model_name: str) -> int:
|
24
|
+
"""Get max output tokens with safe fallback"""
|
25
|
+
validate_model_name(model_name)
|
26
|
+
|
27
|
+
if model_name.startswith('lm_studio/'):
|
28
|
+
try:
|
29
|
+
models = get_model_list()
|
30
|
+
for model in models.data:
|
31
|
+
if model.id == model_name[len('lm_studio/'):]:
|
32
|
+
return model.max_context_length
|
33
|
+
except Exception:
|
34
|
+
loguru.logger.warning(f"Could not fetch LM Studio model info for {model_name}, using default")
|
35
|
+
|
36
|
+
if model_name in model_info:
|
37
|
+
return model_info[model_name].max_output_tokens
|
38
|
+
|
39
|
+
try:
|
40
|
+
return litellm_get_model_max_output_tokens(model_name)
|
41
|
+
except Exception as e:
|
42
|
+
loguru.logger.warning(f"Model {model_name} not found in LiteLLM registry, using default")
|
43
|
+
return DEFAULT_MAX_OUTPUT_TOKENS
|
44
|
+
|
45
|
+
|
46
|
+
def get_max_input_tokens(model_name: str) -> int:
|
47
|
+
"""Get max input tokens with safe fallback"""
|
48
|
+
validate_model_name(model_name)
|
49
|
+
|
50
|
+
if model_name.startswith('lm_studio/'):
|
51
|
+
try:
|
52
|
+
models = get_model_list()
|
53
|
+
for model in models.data:
|
54
|
+
if model.id == model_name[len('lm_studio/'):]:
|
55
|
+
return model.max_context_length
|
56
|
+
except Exception:
|
57
|
+
loguru.logger.warning(f"Could not fetch LM Studio model info for {model_name}, using default")
|
58
|
+
|
59
|
+
if model_name in model_info:
|
60
|
+
return model_info[model_name].max_input_tokens
|
61
|
+
|
62
|
+
try:
|
63
|
+
return litellm_get_model_max_input_tokens(model_name)
|
64
|
+
except Exception:
|
65
|
+
loguru.logger.warning(f"Model {model_name} not found in LiteLLM registry, using default")
|
66
|
+
return DEFAULT_MAX_INPUT_TOKENS
|
67
|
+
|
68
|
+
|
69
|
+
def get_max_tokens(model_name: str) -> int:
|
70
|
+
"""Get total maximum tokens (input + output)"""
|
71
|
+
validate_model_name(model_name)
|
72
|
+
|
73
|
+
# Get input and output tokens separately
|
74
|
+
input_tokens = get_max_input_tokens(model_name)
|
75
|
+
output_tokens = get_max_output_tokens(model_name)
|
76
|
+
|
77
|
+
return input_tokens + output_tokens
|
78
|
+
|
79
|
+
|
80
|
+
if __name__ == "__main__":
|
81
|
+
print_model_info()
|
82
|
+
print(get_max_input_tokens("gpt-4o-mini"))
|
83
|
+
print(get_max_output_tokens("openrouter/openai/gpt-4o-mini"))
|
@@ -30,18 +30,56 @@ def get_model_info(model_name: str) -> dict | None:
|
|
30
30
|
return model_info.get(model_name, None)
|
31
31
|
|
32
32
|
|
33
|
+
class ModelProviderConfig:
|
34
|
+
def __init__(self, prefix: str, provider: str, base_url: str, env_var: str):
|
35
|
+
self.prefix = prefix
|
36
|
+
self.provider = provider
|
37
|
+
self.base_url = base_url
|
38
|
+
self.env_var = env_var
|
39
|
+
|
40
|
+
def configure(self, model: str, kwargs: Dict[str, Any]) -> None:
|
41
|
+
kwargs["model"] = model.replace(self.prefix, "")
|
42
|
+
kwargs["custom_llm_provider"] = self.provider
|
43
|
+
kwargs["base_url"] = self.base_url
|
44
|
+
api_key = os.getenv(self.env_var)
|
45
|
+
if not api_key:
|
46
|
+
raise ValueError(f"{self.env_var} is not set in the environment variables.")
|
47
|
+
kwargs["api_key"] = api_key
|
48
|
+
|
49
|
+
|
50
|
+
# Default provider configurations
|
51
|
+
PROVIDERS = {
|
52
|
+
"dashscope": ModelProviderConfig(
|
53
|
+
prefix="dashscope/",
|
54
|
+
provider="openai",
|
55
|
+
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
|
56
|
+
env_var="DASHSCOPE_API_KEY"
|
57
|
+
),
|
58
|
+
"nvidia": ModelProviderConfig(
|
59
|
+
prefix="nvidia/",
|
60
|
+
provider="openai",
|
61
|
+
base_url="https://integrate.api.nvidia.com/v1",
|
62
|
+
env_var="NVIDIA_API_KEY"
|
63
|
+
),
|
64
|
+
"ovh": ModelProviderConfig(
|
65
|
+
prefix="ovh/",
|
66
|
+
provider="openai",
|
67
|
+
base_url="https://deepseek-r1-distill-llama-70b.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1",
|
68
|
+
env_var="OVH_API_KEY"
|
69
|
+
)
|
70
|
+
}
|
71
|
+
|
72
|
+
|
33
73
|
def generate_completion(**kwargs: Dict[str, Any]) -> Any:
|
34
74
|
"""Wraps litellm completion with proper type hints."""
|
35
75
|
model = kwargs.get("model", "")
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
raise ValueError("DASHSCOPE_API_KEY is not set in the environment variables.")
|
44
|
-
kwargs["api_key"] = api_key
|
76
|
+
|
77
|
+
# Find matching provider
|
78
|
+
for provider_name, provider_config in PROVIDERS.items():
|
79
|
+
if model.startswith(provider_config.prefix):
|
80
|
+
provider_config.configure(model, kwargs)
|
81
|
+
break
|
82
|
+
|
45
83
|
return completion(**kwargs)
|
46
84
|
|
47
85
|
|
@@ -0,0 +1,12 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
|
3
|
+
|
4
|
+
class ModelInfo(BaseModel):
|
5
|
+
model_name: str
|
6
|
+
max_input_tokens: int
|
7
|
+
max_output_tokens: int
|
8
|
+
max_cot_tokens: int | None = None
|
9
|
+
|
10
|
+
|
11
|
+
class ModelNotFoundError(Exception):
|
12
|
+
"""Raised when a model is not found in local registry"""
|
@@ -0,0 +1,60 @@
|
|
1
|
+
from quantalogic.model_info import ModelInfo
|
2
|
+
|
3
|
+
model_info = {
|
4
|
+
"dashscope/qwen-max": ModelInfo(
|
5
|
+
model_name="dashscope/qwen-max",
|
6
|
+
max_output_tokens=8 * 1024,
|
7
|
+
max_input_tokens=32 * 1024,
|
8
|
+
),
|
9
|
+
"dashscope/qwen-plus": ModelInfo(
|
10
|
+
model_name="dashscope/qwen-plus",
|
11
|
+
max_output_tokens=8 * 1024,
|
12
|
+
max_input_tokens=131072,
|
13
|
+
),
|
14
|
+
"dashscope/qwen-turbo": ModelInfo(
|
15
|
+
model_name="dashscope/qwen-turbo",
|
16
|
+
max_output_tokens=8 * 1024,
|
17
|
+
max_input_tokens=1000000,
|
18
|
+
),
|
19
|
+
"deepseek-reasoner": ModelInfo(
|
20
|
+
model_name="deepseek-reasoner",
|
21
|
+
max_output_tokens=8 * 1024,
|
22
|
+
max_input_tokens=1024 * 128,
|
23
|
+
),
|
24
|
+
"openrouter/deepseek/deepseek-r1": ModelInfo(
|
25
|
+
model_name="openrouter/deepseek/deepseek-r1",
|
26
|
+
max_output_tokens=8 * 1024,
|
27
|
+
max_input_tokens=1024 * 128,
|
28
|
+
),
|
29
|
+
"openrouter/mistralai/mistral-large-2411": ModelInfo(
|
30
|
+
model_name="openrouter/mistralai/mistral-large-2411",
|
31
|
+
max_output_tokens=128 * 1024,
|
32
|
+
max_input_tokens=1024 * 128,
|
33
|
+
),
|
34
|
+
"mistralai/mistral-large-2411": ModelInfo(
|
35
|
+
model_name="mistralai/mistral-large-2411",
|
36
|
+
max_output_tokens=128 * 1024,
|
37
|
+
max_input_tokens=1024 * 128,
|
38
|
+
),
|
39
|
+
"deepseek/deepseek-chat": ModelInfo(
|
40
|
+
model_name="deepseek/deepseek-chat",
|
41
|
+
max_output_tokens=8 * 1024,
|
42
|
+
max_input_tokens=1024 * 64,
|
43
|
+
),
|
44
|
+
"deepseek/deepseek-reasoner": ModelInfo(
|
45
|
+
model_name="deepseek/deepseek-reasoner",
|
46
|
+
max_output_tokens=8 * 1024,
|
47
|
+
max_input_tokens=1024 * 64,
|
48
|
+
max_cot_tokens=1024 * 32,
|
49
|
+
),
|
50
|
+
"nvidia/deepseek-ai/deepseek-r1": ModelInfo(
|
51
|
+
model_name="nvidia/deepseek-ai/deepseek-r1",
|
52
|
+
max_output_tokens=8 * 1024,
|
53
|
+
max_input_tokens=1024 * 64,
|
54
|
+
),
|
55
|
+
"ovh/DeepSeek-R1-Distill-Llama-70B": ModelInfo(
|
56
|
+
model_name="ovh/DeepSeek-R1-Distill-Llama-70B",
|
57
|
+
max_output_tokens=8 * 1024,
|
58
|
+
max_input_tokens=1024 * 64,
|
59
|
+
),
|
60
|
+
}
|