quantalogic 0.2.16__py3-none-any.whl → 0.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantalogic/__init__.py +3 -2
- quantalogic/agent.py +94 -38
- quantalogic/agent_config.py +62 -14
- quantalogic/coding_agent.py +17 -2
- quantalogic/{print_event.py → console_print_events.py} +1 -3
- quantalogic/console_print_token.py +16 -0
- quantalogic/docs_cli.py +50 -0
- quantalogic/generative_model.py +86 -77
- quantalogic/main.py +128 -18
- quantalogic/prompts.py +2 -2
- quantalogic/search_agent.py +12 -1
- quantalogic/server/agent_server.py +2 -2
- quantalogic/tools/llm_tool.py +52 -11
- quantalogic/tools/llm_vision_tool.py +23 -7
- quantalogic/xml_parser.py +109 -49
- {quantalogic-0.2.16.dist-info → quantalogic-0.2.18.dist-info}/METADATA +62 -153
- {quantalogic-0.2.16.dist-info → quantalogic-0.2.18.dist-info}/RECORD +20 -18
- quantalogic-0.2.18.dist-info/entry_points.txt +6 -0
- quantalogic-0.2.16.dist-info/entry_points.txt +0 -3
- {quantalogic-0.2.16.dist-info → quantalogic-0.2.18.dist-info}/LICENSE +0 -0
- {quantalogic-0.2.16.dist-info → quantalogic-0.2.18.dist-info}/WHEEL +0 -0
quantalogic/generative_model.py
CHANGED
@@ -2,14 +2,23 @@
|
|
2
2
|
|
3
3
|
import functools
|
4
4
|
|
5
|
+
import litellm
|
5
6
|
import openai
|
6
7
|
from litellm import completion, exceptions, get_max_tokens, get_model_info, token_counter
|
7
8
|
from loguru import logger
|
8
9
|
from pydantic import BaseModel, Field, field_validator
|
9
10
|
|
11
|
+
from quantalogic.event_emitter import EventEmitter # Importing the EventEmitter class
|
12
|
+
|
10
13
|
MIN_RETRIES = 1
|
11
14
|
|
12
15
|
|
16
|
+
|
17
|
+
litellm.suppress_debug_info = True # Very important to suppress prints don't remove
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
# Define the Message class for conversation handling
|
13
22
|
class Message(BaseModel):
|
14
23
|
"""Represents a message in a conversation with a specific role and content."""
|
15
24
|
|
@@ -70,21 +79,22 @@ class GenerativeModel:
|
|
70
79
|
self,
|
71
80
|
model: str = "ollama/qwen2.5-coder:14b",
|
72
81
|
temperature: float = 0.7,
|
82
|
+
event_emitter: EventEmitter = None, # EventEmitter instance
|
73
83
|
) -> None:
|
74
84
|
"""Initialize a generative model with configurable parameters.
|
75
85
|
|
76
|
-
Configure the generative model with specified model,
|
77
|
-
temperature, and maximum token settings.
|
78
|
-
|
79
86
|
Args:
|
80
|
-
model: Model identifier.
|
81
|
-
|
82
|
-
|
83
|
-
|
87
|
+
model: Model identifier. Defaults to "ollama/qwen2.5-coder:14b".
|
88
|
+
temperature: Temperature parameter for controlling randomness in generation.
|
89
|
+
Higher values (e.g. 0.8) make output more random, lower values (e.g. 0.2)
|
90
|
+
make it more deterministic. Defaults to 0.7.
|
91
|
+
event_emitter: Optional event emitter instance for handling asynchronous events
|
92
|
+
and callbacks during text generation. Defaults to None.
|
84
93
|
"""
|
85
94
|
logger.debug(f"Initializing GenerativeModel with model={model}, temperature={temperature}")
|
86
95
|
self.model = model
|
87
96
|
self.temperature = temperature
|
97
|
+
self.event_emitter = event_emitter or EventEmitter() # Initialize event emitter
|
88
98
|
self._get_model_info_cached = functools.lru_cache(maxsize=32)(self._get_model_info_impl)
|
89
99
|
|
90
100
|
# Define retriable exceptions based on LiteLLM's exception mapping
|
@@ -109,28 +119,20 @@ class GenerativeModel:
|
|
109
119
|
exceptions.PermissionDeniedError,
|
110
120
|
)
|
111
121
|
|
112
|
-
#
|
122
|
+
# Generate a response with conversation history and optional streaming
|
113
123
|
def generate_with_history(
|
114
|
-
self, messages_history: list[Message], prompt: str, image_url: str | None = None
|
124
|
+
self, messages_history: list[Message], prompt: str, image_url: str | None = None, streaming: bool = False
|
115
125
|
) -> ResponseStats:
|
116
126
|
"""Generate a response with conversation history and optional image.
|
117
127
|
|
118
|
-
Generates a response based on previous conversation messages,
|
119
|
-
a new user prompt, and an optional image URL.
|
120
|
-
|
121
128
|
Args:
|
122
129
|
messages_history: Previous conversation messages.
|
123
130
|
prompt: Current user prompt.
|
124
131
|
image_url: Optional image URL for visual queries.
|
132
|
+
streaming: Whether to stream the response.
|
125
133
|
|
126
134
|
Returns:
|
127
|
-
Detailed response statistics.
|
128
|
-
|
129
|
-
Raises:
|
130
|
-
openai.AuthenticationError: If authentication fails.
|
131
|
-
openai.InvalidRequestError: If the request is invalid (e.g., context length exceeded).
|
132
|
-
openai.APIError: For content policy violations or other API errors.
|
133
|
-
Exception: For other unexpected errors.
|
135
|
+
Detailed response statistics or a generator in streaming mode.
|
134
136
|
"""
|
135
137
|
messages = [{"role": msg.role, "content": str(msg.content)} for msg in messages_history]
|
136
138
|
|
@@ -147,6 +149,10 @@ class GenerativeModel:
|
|
147
149
|
else:
|
148
150
|
messages.append({"role": "user", "content": str(prompt)})
|
149
151
|
|
152
|
+
if streaming:
|
153
|
+
self.event_emitter.emit("stream_start") # Emit stream start event
|
154
|
+
return self._stream_response(messages) # Return generator
|
155
|
+
|
150
156
|
try:
|
151
157
|
logger.debug(f"Generating response for prompt: {prompt}")
|
152
158
|
|
@@ -171,54 +177,68 @@ class GenerativeModel:
|
|
171
177
|
)
|
172
178
|
|
173
179
|
except Exception as e:
|
174
|
-
|
175
|
-
"error_type": type(e).__name__,
|
176
|
-
"message": str(e),
|
177
|
-
"model": self.model,
|
178
|
-
"provider": getattr(e, "llm_provider", "unknown"),
|
179
|
-
"status_code": getattr(e, "status_code", None),
|
180
|
-
}
|
181
|
-
|
182
|
-
logger.error("LLM Generation Error: {}", error_details)
|
183
|
-
logger.debug(f"Error details: {error_details}")
|
184
|
-
logger.debug(f"Model: {self.model}, Temperature: {self.temperature}")
|
185
|
-
|
186
|
-
# Handle authentication and permission errors
|
187
|
-
if isinstance(e, self.AUTH_EXCEPTIONS):
|
188
|
-
logger.debug("Authentication error occurred")
|
189
|
-
raise openai.AuthenticationError(
|
190
|
-
f"Authentication failed with provider {error_details['provider']}"
|
191
|
-
) from e
|
192
|
-
|
193
|
-
# Handle context window errors
|
194
|
-
if isinstance(e, self.CONTEXT_EXCEPTIONS):
|
195
|
-
raise openai.InvalidRequestError(f"Context window exceeded or invalid request: {str(e)}") from e
|
196
|
-
|
197
|
-
# Handle content policy violations
|
198
|
-
if isinstance(e, self.POLICY_EXCEPTIONS):
|
199
|
-
raise openai.APIError(f"Content policy violation: {str(e)}") from e
|
200
|
-
|
201
|
-
# For other exceptions, preserve the original error type if it's from OpenAI
|
202
|
-
if isinstance(e, openai.OpenAIError):
|
203
|
-
raise
|
204
|
-
|
205
|
-
# Wrap unknown errors in APIError
|
206
|
-
raise openai.APIError(f"Unexpected error during generation: {str(e)}") from e
|
207
|
-
|
208
|
-
def generate(self, prompt: str, image_url: str | None = None) -> ResponseStats:
|
209
|
-
"""Generate a response without conversation history.
|
180
|
+
self._handle_generation_exception(e)
|
210
181
|
|
211
|
-
|
212
|
-
|
182
|
+
def _stream_response(self, messages):
|
183
|
+
"""Private method to handle streaming responses."""
|
184
|
+
try:
|
185
|
+
for chunk in completion(
|
186
|
+
temperature=self.temperature,
|
187
|
+
model=self.model,
|
188
|
+
messages=messages,
|
189
|
+
num_retries=MIN_RETRIES,
|
190
|
+
stream=True, # Enable streaming
|
191
|
+
):
|
192
|
+
if chunk.choices[0].delta.content is not None:
|
193
|
+
self.event_emitter.emit("stream_chunk", chunk.choices[0].delta.content)
|
194
|
+
yield chunk.choices[0].delta.content # Yield each chunk of content
|
195
|
+
|
196
|
+
self.event_emitter.emit("stream_end") # Emit stream end event
|
197
|
+
except Exception as e:
|
198
|
+
logger.error(f"Streaming error: {str(e)}")
|
199
|
+
raise
|
200
|
+
|
201
|
+
def generate(self, prompt: str, image_url: str | None = None, streaming: bool = False) -> ResponseStats:
|
202
|
+
"""Generate a response without conversation history.
|
213
203
|
|
214
204
|
Args:
|
215
205
|
prompt: User prompt.
|
216
206
|
image_url: Optional image URL for visual queries.
|
207
|
+
streaming: Whether to stream the response.
|
217
208
|
|
218
209
|
Returns:
|
219
|
-
Detailed response statistics.
|
210
|
+
Detailed response statistics or a generator in streaming mode.
|
220
211
|
"""
|
221
|
-
return self.generate_with_history([], prompt, image_url)
|
212
|
+
return self.generate_with_history([], prompt, image_url, streaming)
|
213
|
+
|
214
|
+
def _handle_generation_exception(self, e):
|
215
|
+
"""Handle exceptions during generation."""
|
216
|
+
error_details = {
|
217
|
+
"error_type": type(e).__name__,
|
218
|
+
"message": str(e),
|
219
|
+
"model": self.model,
|
220
|
+
"provider": getattr(e, "llm_provider", "unknown"),
|
221
|
+
"status_code": getattr(e, "status_code", None),
|
222
|
+
}
|
223
|
+
|
224
|
+
logger.error("LLM Generation Error: {}", error_details)
|
225
|
+
logger.debug(f"Error details: {error_details}")
|
226
|
+
logger.debug(f"Model: {self.model}, Temperature: {self.temperature}")
|
227
|
+
|
228
|
+
if isinstance(e, self.AUTH_EXCEPTIONS):
|
229
|
+
logger.debug("Authentication error occurred")
|
230
|
+
raise openai.AuthenticationError(f"Authentication failed with provider {error_details['provider']}") from e
|
231
|
+
|
232
|
+
if isinstance(e, self.CONTEXT_EXCEPTIONS):
|
233
|
+
raise openai.InvalidRequestError(f"Context window exceeded or invalid request: {str(e)}") from e
|
234
|
+
|
235
|
+
if isinstance(e, self.POLICY_EXCEPTIONS):
|
236
|
+
raise openai.APIError(f"Content policy violation: {str(e)}") from e
|
237
|
+
|
238
|
+
if isinstance(e, openai.OpenAIError):
|
239
|
+
raise
|
240
|
+
|
241
|
+
raise openai.APIError(f"Unexpected error during generation: {str(e)}") from e
|
222
242
|
|
223
243
|
def get_max_tokens(self) -> int:
|
224
244
|
"""Get the maximum number of tokens that can be generated by the model."""
|
@@ -239,17 +259,9 @@ class GenerativeModel:
|
|
239
259
|
return token_counter(model=self.model, messages=litellm_messages)
|
240
260
|
|
241
261
|
def _get_model_info_impl(self, model_name: str) -> dict:
|
242
|
-
"""Get information about the model with prefix fallback logic.
|
243
|
-
|
244
|
-
Attempts to find model info by progressively removing provider prefixes.
|
245
|
-
Raises ValueError if no valid model configuration is found.
|
246
|
-
Results are cached to improve performance.
|
247
|
-
|
248
|
-
Example:
|
249
|
-
openrouter/openai/gpt-4o-mini → openai/gpt-4o-mini → gpt-4o-mini
|
250
|
-
"""
|
262
|
+
"""Get information about the model with prefix fallback logic."""
|
251
263
|
original_model = model_name
|
252
|
-
|
264
|
+
|
253
265
|
while True:
|
254
266
|
try:
|
255
267
|
logger.debug(f"Attempting to retrieve model info for: {model_name}")
|
@@ -259,22 +271,19 @@ class GenerativeModel:
|
|
259
271
|
return model_info
|
260
272
|
except Exception:
|
261
273
|
pass
|
262
|
-
|
274
|
+
|
263
275
|
# Try removing one prefix level
|
264
|
-
parts = model_name.split(
|
276
|
+
parts = model_name.split("/")
|
265
277
|
if len(parts) <= 1:
|
266
278
|
break
|
267
|
-
model_name =
|
268
|
-
|
279
|
+
model_name = "/".join(parts[1:])
|
280
|
+
|
269
281
|
error_msg = f"Could not find model info for {original_model} after trying: {self.model} → {model_name}"
|
270
282
|
logger.error(error_msg)
|
271
283
|
raise ValueError(error_msg)
|
272
284
|
|
273
285
|
def get_model_info(self, model_name: str = None) -> dict:
|
274
|
-
"""Get cached information about the model.
|
275
|
-
|
276
|
-
If no model name is provided, uses the current model.
|
277
|
-
"""
|
286
|
+
"""Get cached information about the model."""
|
278
287
|
if model_name is None:
|
279
288
|
model_name = self.model
|
280
289
|
return self._get_model_info_cached(model_name)
|
quantalogic/main.py
CHANGED
@@ -10,12 +10,15 @@ from typing import Optional
|
|
10
10
|
import click
|
11
11
|
from loguru import logger
|
12
12
|
|
13
|
+
from quantalogic.console_print_events import console_print_events
|
13
14
|
from quantalogic.utils.check_version import check_if_is_latest_version
|
14
15
|
from quantalogic.version import get_version
|
15
16
|
|
16
17
|
# Configure logger
|
17
18
|
logger.remove() # Remove default logger
|
18
19
|
|
20
|
+
from threading import Lock # noqa: E402
|
21
|
+
|
19
22
|
from rich.console import Console # noqa: E402
|
20
23
|
from rich.panel import Panel # noqa: E402
|
21
24
|
from rich.prompt import Confirm # noqa: E402
|
@@ -31,30 +34,33 @@ from quantalogic.agent_config import ( # noqa: E402
|
|
31
34
|
create_orchestrator_agent,
|
32
35
|
)
|
33
36
|
from quantalogic.interactive_text_editor import get_multiline_input # noqa: E402
|
34
|
-
from quantalogic.print_event import console_print_events # noqa: E402
|
35
37
|
from quantalogic.search_agent import create_search_agent # noqa: E402
|
36
38
|
|
37
39
|
AGENT_MODES = ["code", "basic", "interpreter", "full", "code-basic", "search", "search-full"]
|
38
40
|
|
39
41
|
|
40
|
-
def create_agent_for_mode(mode: str, model_name: str, vision_model_name: str | None) -> Agent:
|
42
|
+
def create_agent_for_mode(mode: str, model_name: str, vision_model_name: str | None, no_stream: bool = False, compact_every_n_iteration: int | None = None, max_tokens_working_memory: int | None = None) -> Agent:
|
41
43
|
"""Create an agent based on the specified mode."""
|
42
44
|
logger.debug(f"Creating agent for mode: {mode} with model: {model_name}")
|
45
|
+
logger.debug(f"Using vision model: {vision_model_name}")
|
46
|
+
logger.debug(f"Using no_stream: {no_stream}")
|
47
|
+
logger.debug(f"Using compact_every_n_iteration: {compact_every_n_iteration}")
|
48
|
+
logger.debug(f"Using max_tokens_working_memory: {max_tokens_working_memory}")
|
43
49
|
if mode == "code":
|
44
50
|
logger.debug("Creating code agent without basic mode")
|
45
|
-
return create_coding_agent(model_name, vision_model_name, basic=False)
|
51
|
+
return create_coding_agent(model_name, vision_model_name, basic=False, no_stream=no_stream, compact_every_n_iteration=compact_every_n_iteration, max_tokens_working_memory=max_tokens_working_memory)
|
46
52
|
if mode == "code-basic":
|
47
|
-
return create_coding_agent(model_name, vision_model_name, basic=True)
|
53
|
+
return create_coding_agent(model_name, vision_model_name, basic=True, no_stream=no_stream, compact_every_n_iteration=compact_every_n_iteration, max_tokens_working_memory=max_tokens_working_memory)
|
48
54
|
elif mode == "basic":
|
49
|
-
return create_orchestrator_agent(model_name, vision_model_name)
|
55
|
+
return create_orchestrator_agent(model_name, vision_model_name, no_stream=no_stream, compact_every_n_iteration=compact_every_n_iteration, max_tokens_working_memory=max_tokens_working_memory)
|
50
56
|
elif mode == "full":
|
51
|
-
return create_full_agent(model_name, vision_model_name)
|
57
|
+
return create_full_agent(model_name, vision_model_name, no_stream=no_stream, compact_every_n_iteration=compact_every_n_iteration, max_tokens_working_memory=max_tokens_working_memory)
|
52
58
|
elif mode == "interpreter":
|
53
|
-
return create_interpreter_agent(model_name, vision_model_name)
|
59
|
+
return create_interpreter_agent(model_name, vision_model_name, no_stream=no_stream, compact_every_n_iteration=compact_every_n_iteration, max_tokens_working_memory=max_tokens_working_memory)
|
54
60
|
elif mode == "search":
|
55
|
-
return create_search_agent(model_name)
|
61
|
+
return create_search_agent(model_name, no_stream=no_stream, compact_every_n_iteration=compact_every_n_iteration, max_tokens_working_memory=max_tokens_working_memory)
|
56
62
|
if mode == "search-full":
|
57
|
-
return create_search_agent(model_name, mode_full=True)
|
63
|
+
return create_search_agent(model_name, mode_full=True, no_stream=no_stream, compact_every_n_iteration=compact_every_n_iteration, max_tokens_working_memory=max_tokens_working_memory)
|
58
64
|
else:
|
59
65
|
raise ValueError(f"Unknown agent mode: {mode}")
|
60
66
|
|
@@ -126,8 +132,34 @@ def get_task_from_file(file_path: str) -> str:
|
|
126
132
|
raise Exception(f"Unexpected error reading file: {e}")
|
127
133
|
|
128
134
|
|
135
|
+
# Spinner control
|
136
|
+
spinner_lock = Lock()
|
137
|
+
current_spinner = None
|
138
|
+
|
139
|
+
def start_spinner(console: Console) -> None:
|
140
|
+
"""Start the thinking spinner."""
|
141
|
+
global current_spinner
|
142
|
+
with spinner_lock:
|
143
|
+
if current_spinner is None:
|
144
|
+
current_spinner = console.status("[yellow]Thinking...", spinner="dots")
|
145
|
+
current_spinner.start()
|
146
|
+
|
147
|
+
def stop_spinner(console: Console) -> None:
|
148
|
+
"""Stop the thinking spinner."""
|
149
|
+
global current_spinner
|
150
|
+
with spinner_lock:
|
151
|
+
if current_spinner is not None:
|
152
|
+
current_spinner.stop()
|
153
|
+
current_spinner = None
|
154
|
+
|
155
|
+
|
129
156
|
def display_welcome_message(
|
130
|
-
console: Console,
|
157
|
+
console: Console,
|
158
|
+
model_name: str,
|
159
|
+
vision_model_name: str | None,
|
160
|
+
max_iterations: int = 50,
|
161
|
+
compact_every_n_iteration: int | None = None,
|
162
|
+
max_tokens_working_memory: int | None = None
|
131
163
|
) -> None:
|
132
164
|
"""Display the welcome message and instructions."""
|
133
165
|
version = get_version()
|
@@ -142,7 +174,9 @@ def display_welcome_message(
|
|
142
174
|
"\n"
|
143
175
|
f"- Model: {model_name}\n"
|
144
176
|
f"- Vision Model: {vision_model_name}\n"
|
145
|
-
f"- Max Iterations: {max_iterations}\n
|
177
|
+
f"- Max Iterations: {max_iterations}\n"
|
178
|
+
f"- Memory Compact Frequency: {compact_every_n_iteration or 'Default (Max Iterations)'}\n"
|
179
|
+
f"- Max Working Memory Tokens: {max_tokens_working_memory or 'Default'}\n\n"
|
146
180
|
"[bold magenta]💡 Pro Tips:[/bold magenta]\n\n"
|
147
181
|
"- Be as specific as possible in your task description to get the best results!\n"
|
148
182
|
"- Use clear and concise language when describing your task\n"
|
@@ -155,11 +189,17 @@ def display_welcome_message(
|
|
155
189
|
|
156
190
|
|
157
191
|
@click.group(invoke_without_command=True)
|
192
|
+
@click.option(
|
193
|
+
"--compact-every-n-iteration",
|
194
|
+
type=int,
|
195
|
+
default=None,
|
196
|
+
help="Set the frequency of memory compaction for the agent (default: max_iterations)."
|
197
|
+
)
|
158
198
|
@click.option("--version", is_flag=True, help="Show version information.")
|
159
199
|
@click.option(
|
160
200
|
"--model-name",
|
161
201
|
default=MODEL_NAME,
|
162
|
-
help='Specify the model to use (litellm format, e.g. "openrouter/deepseek-chat").',
|
202
|
+
help='Specify the model to use (litellm format, e.g. "openrouter/deepseek/deepseek-chat").',
|
163
203
|
)
|
164
204
|
@click.option(
|
165
205
|
"--log",
|
@@ -180,6 +220,12 @@ def display_welcome_message(
|
|
180
220
|
default=30,
|
181
221
|
help="Maximum number of iterations for task solving (default: 30).",
|
182
222
|
)
|
223
|
+
@click.option(
|
224
|
+
"--max-tokens-working-memory",
|
225
|
+
type=int,
|
226
|
+
default=None,
|
227
|
+
help="Set the maximum number of tokens allowed in the working memory."
|
228
|
+
)
|
183
229
|
@click.pass_context
|
184
230
|
def cli(
|
185
231
|
ctx: click.Context,
|
@@ -190,6 +236,8 @@ def cli(
|
|
190
236
|
log: str,
|
191
237
|
vision_model_name: str | None,
|
192
238
|
max_iterations: int,
|
239
|
+
compact_every_n_iteration: int | None,
|
240
|
+
max_tokens_working_memory: int | None,
|
193
241
|
) -> None:
|
194
242
|
"""QuantaLogic AI Assistant - A powerful AI tool for various tasks."""
|
195
243
|
if version:
|
@@ -205,6 +253,8 @@ def cli(
|
|
205
253
|
log=log,
|
206
254
|
vision_model_name=vision_model_name,
|
207
255
|
max_iterations=max_iterations,
|
256
|
+
compact_every_n_iteration=compact_every_n_iteration,
|
257
|
+
max_tokens_working_memory=max_tokens_working_memory,
|
208
258
|
)
|
209
259
|
|
210
260
|
|
@@ -213,7 +263,7 @@ def cli(
|
|
213
263
|
@click.option(
|
214
264
|
"--model-name",
|
215
265
|
default=MODEL_NAME,
|
216
|
-
help='Specify the model to use (litellm format, e.g. "openrouter/deepseek-chat").',
|
266
|
+
help='Specify the model to use (litellm format, e.g. "openrouter/deepseek/deepseek-chat").',
|
217
267
|
)
|
218
268
|
@click.option("--verbose", is_flag=True, help="Enable verbose output.")
|
219
269
|
@click.option("--mode", type=click.Choice(AGENT_MODES), default="code", help="Agent mode (code/search/full).")
|
@@ -234,6 +284,23 @@ def cli(
|
|
234
284
|
default=30,
|
235
285
|
help="Maximum number of iterations for task solving (default: 30).",
|
236
286
|
)
|
287
|
+
@click.option(
|
288
|
+
"--compact-every-n-iteration",
|
289
|
+
type=int,
|
290
|
+
default=None,
|
291
|
+
help="Set the frequency of memory compaction for the agent (default: max_iterations)."
|
292
|
+
)
|
293
|
+
@click.option(
|
294
|
+
"--max-tokens-working-memory",
|
295
|
+
type=int,
|
296
|
+
default=None,
|
297
|
+
help="Set the maximum number of tokens allowed in the working memory."
|
298
|
+
)
|
299
|
+
@click.option(
|
300
|
+
"--no-stream",
|
301
|
+
is_flag=True,
|
302
|
+
help="Disable streaming output (default: streaming enabled).",
|
303
|
+
)
|
237
304
|
@click.argument("task", required=False)
|
238
305
|
def task(
|
239
306
|
file: Optional[str],
|
@@ -244,6 +311,9 @@ def task(
|
|
244
311
|
vision_model_name: str | None,
|
245
312
|
task: Optional[str],
|
246
313
|
max_iterations: int,
|
314
|
+
compact_every_n_iteration: int | None,
|
315
|
+
max_tokens_working_memory: int | None,
|
316
|
+
no_stream: bool,
|
247
317
|
) -> None:
|
248
318
|
"""Execute a task with the QuantaLogic AI Assistant."""
|
249
319
|
console = Console()
|
@@ -257,7 +327,14 @@ def task(
|
|
257
327
|
check_new_version()
|
258
328
|
task_content = task
|
259
329
|
else:
|
260
|
-
display_welcome_message(
|
330
|
+
display_welcome_message(
|
331
|
+
console,
|
332
|
+
model_name,
|
333
|
+
vision_model_name,
|
334
|
+
max_iterations=max_iterations,
|
335
|
+
compact_every_n_iteration=compact_every_n_iteration,
|
336
|
+
max_tokens_working_memory=max_tokens_working_memory
|
337
|
+
)
|
261
338
|
check_new_version()
|
262
339
|
logger.debug("Waiting for user input...")
|
263
340
|
task_content = get_multiline_input(console).strip()
|
@@ -286,9 +363,13 @@ def task(
|
|
286
363
|
)
|
287
364
|
)
|
288
365
|
|
289
|
-
logger.debug(
|
290
|
-
|
291
|
-
|
366
|
+
logger.debug(
|
367
|
+
f"Creating agent for mode: {mode} with model: {model_name}, vision model: {vision_model_name}, no_stream: {no_stream}"
|
368
|
+
)
|
369
|
+
agent = create_agent_for_mode(mode, model_name, vision_model_name=vision_model_name, no_stream=no_stream, compact_every_n_iteration=compact_every_n_iteration, max_tokens_working_memory=max_tokens_working_memory)
|
370
|
+
logger.debug(
|
371
|
+
f"Created agent for mode: {mode} with model: {model_name}, vision model: {vision_model_name}, no_stream: {no_stream}"
|
372
|
+
)
|
292
373
|
|
293
374
|
events = [
|
294
375
|
"task_start",
|
@@ -302,16 +383,45 @@ def task(
|
|
302
383
|
"memory_compacted",
|
303
384
|
"memory_summary",
|
304
385
|
]
|
386
|
+
# Add spinner control to event handlers
|
387
|
+
def handle_task_think_start(*args, **kwargs):
|
388
|
+
start_spinner(console)
|
389
|
+
|
390
|
+
def handle_task_think_end(*args, **kwargs):
|
391
|
+
stop_spinner(console)
|
392
|
+
|
393
|
+
def handle_stream_chunk(event: str, data: str) -> None:
|
394
|
+
if current_spinner:
|
395
|
+
stop_spinner(console)
|
396
|
+
if data is not None:
|
397
|
+
console.print(data, end="", markup=False)
|
398
|
+
|
305
399
|
agent.event_emitter.on(
|
306
400
|
event=events,
|
307
401
|
listener=console_print_events,
|
308
402
|
)
|
403
|
+
|
404
|
+
agent.event_emitter.on(
|
405
|
+
event="task_think_start",
|
406
|
+
listener=handle_task_think_start,
|
407
|
+
)
|
408
|
+
|
409
|
+
agent.event_emitter.on(
|
410
|
+
event="task_think_end",
|
411
|
+
listener=handle_task_think_end,
|
412
|
+
)
|
413
|
+
|
414
|
+
agent.event_emitter.on(
|
415
|
+
event="stream_chunk",
|
416
|
+
listener=handle_stream_chunk,
|
417
|
+
)
|
418
|
+
|
309
419
|
logger.debug("Registered event handlers for agent events with events: {events}")
|
310
420
|
|
311
421
|
logger.debug(f"Solving task with agent: {task_content}")
|
312
422
|
if max_iterations < 1:
|
313
423
|
raise ValueError("max_iterations must be greater than 0")
|
314
|
-
result = agent.solve_task(task=task_content, max_iterations=max_iterations)
|
424
|
+
result = agent.solve_task(task=task_content, max_iterations=max_iterations, streaming=not no_stream)
|
315
425
|
logger.debug(f"Task solved with result: {result} using {max_iterations} iterations")
|
316
426
|
|
317
427
|
console.print(
|
quantalogic/prompts.py
CHANGED
@@ -17,7 +17,7 @@ Every response must contain exactly two XML blocks:
|
|
17
17
|
1. Analysis Block:
|
18
18
|
```xml
|
19
19
|
<thinking>
|
20
|
-
<!--
|
20
|
+
<!-- Must follow this precise format, concise, dense, use abreviations, emojis, unicode characters to make it denser -->
|
21
21
|
<task_analysis_if_no_history>
|
22
22
|
Only if no conversation history:
|
23
23
|
* Rewrite the <task> and its context with your own words in detailed, clear, and specific manner.
|
@@ -51,7 +51,7 @@ Every response must contain exactly two XML blocks:
|
|
51
51
|
</last_observation>
|
52
52
|
<progess_analysis>
|
53
53
|
<!-- if there is a conversation history -->
|
54
|
-
* Detail each step failed and completed so far.
|
54
|
+
* Detail each step failed and completed so far, be concise.
|
55
55
|
* Identify and evaluate any blockers or challenges to the progress of global task.
|
56
56
|
* Identify repetitions: if repeated steps, take a step back and rethink your approach.
|
57
57
|
* Provide potential solutions, and if needed, suggest reevaluating the approach and the plan.
|
quantalogic/search_agent.py
CHANGED
@@ -12,12 +12,21 @@ from quantalogic.tools import (
|
|
12
12
|
)
|
13
13
|
|
14
14
|
|
15
|
-
def create_search_agent(
|
15
|
+
def create_search_agent(
|
16
|
+
model_name: str,
|
17
|
+
mode_full: bool = False,
|
18
|
+
no_stream: bool = False,
|
19
|
+
compact_every_n_iteration: int | None = None,
|
20
|
+
max_tokens_working_memory: int | None = None
|
21
|
+
) -> Agent:
|
16
22
|
"""Creates and configures a search agent with web, knowledge, and privacy-focused search tools.
|
17
23
|
|
18
24
|
Args:
|
19
25
|
model_name (str): Name of the language model to use for the agent's core capabilities
|
20
26
|
mode_full (bool, optional): If True, the agent will be configured with a full set of tools.
|
27
|
+
no_stream (bool, optional): If True, the agent will not stream results.
|
28
|
+
compact_every_n_iteration (int | None, optional): Frequency of memory compaction.
|
29
|
+
max_tokens_working_memory (int | None, optional): Maximum tokens for working memory.
|
21
30
|
|
22
31
|
Returns:
|
23
32
|
Agent: A fully configured search agent instance with:
|
@@ -57,4 +66,6 @@ def create_search_agent(model_name: str, mode_full: bool = False) -> Agent:
|
|
57
66
|
model_name=model_name,
|
58
67
|
tools=tools,
|
59
68
|
specific_expertise=specific_expertise,
|
69
|
+
compact_every_n_iterations=compact_every_n_iteration,
|
70
|
+
max_tokens_working_memory=max_tokens_working_memory,
|
60
71
|
)
|
@@ -30,7 +30,7 @@ from quantalogic.agent_config import (
|
|
30
30
|
create_coding_agent, # noqa: F401
|
31
31
|
create_orchestrator_agent, # noqa: F401
|
32
32
|
)
|
33
|
-
from quantalogic.
|
33
|
+
from quantalogic.console_print_events import console_print_events
|
34
34
|
|
35
35
|
# Configure logger
|
36
36
|
logger.remove()
|
@@ -246,7 +246,7 @@ class AgentState:
|
|
246
246
|
def initialize_agent_with_sse_validation(self, model_name: str = MODEL_NAME):
|
247
247
|
"""Initialize agent with SSE-based user validation."""
|
248
248
|
try:
|
249
|
-
self.agent = create_agent(model_name)
|
249
|
+
self.agent = create_agent(model_name, None)
|
250
250
|
|
251
251
|
# Comprehensive list of agent events to track
|
252
252
|
agent_events = [
|