lollms-client 0.33.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/azure_openai/__init__.py +6 -10
- lollms_client/llm_bindings/claude/__init__.py +4 -7
- lollms_client/llm_bindings/gemini/__init__.py +3 -7
- lollms_client/llm_bindings/grok/__init__.py +3 -7
- lollms_client/llm_bindings/groq/__init__.py +4 -6
- lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +4 -6
- lollms_client/llm_bindings/litellm/__init__.py +15 -6
- lollms_client/llm_bindings/llamacpp/__init__.py +27 -9
- lollms_client/llm_bindings/lollms/__init__.py +24 -14
- lollms_client/llm_bindings/lollms_webui/__init__.py +6 -12
- lollms_client/llm_bindings/mistral/__init__.py +3 -5
- lollms_client/llm_bindings/ollama/__init__.py +6 -11
- lollms_client/llm_bindings/open_router/__init__.py +4 -6
- lollms_client/llm_bindings/openai/__init__.py +7 -14
- lollms_client/llm_bindings/openllm/__init__.py +12 -12
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +1 -1
- lollms_client/llm_bindings/tensor_rt/__init__.py +8 -13
- lollms_client/llm_bindings/transformers/__init__.py +14 -6
- lollms_client/llm_bindings/vllm/__init__.py +16 -12
- lollms_client/lollms_core.py +303 -490
- lollms_client/lollms_discussion.py +431 -78
- lollms_client/lollms_llm_binding.py +192 -381
- lollms_client/lollms_mcp_binding.py +33 -2
- lollms_client/lollms_tti_binding.py +107 -2
- lollms_client/mcp_bindings/local_mcp/__init__.py +3 -2
- lollms_client/mcp_bindings/remote_mcp/__init__.py +6 -5
- lollms_client/mcp_bindings/standard_mcp/__init__.py +3 -5
- lollms_client/stt_bindings/lollms/__init__.py +6 -8
- lollms_client/stt_bindings/whisper/__init__.py +2 -4
- lollms_client/stt_bindings/whispercpp/__init__.py +15 -16
- lollms_client/tti_bindings/dalle/__init__.py +50 -29
- lollms_client/tti_bindings/diffusers/__init__.py +227 -439
- lollms_client/tti_bindings/gemini/__init__.py +320 -0
- lollms_client/tti_bindings/lollms/__init__.py +8 -9
- lollms_client-1.1.0.dist-info/METADATA +1214 -0
- lollms_client-1.1.0.dist-info/RECORD +69 -0
- {lollms_client-0.33.0.dist-info → lollms_client-1.1.0.dist-info}/top_level.txt +0 -2
- examples/article_summary/article_summary.py +0 -58
- examples/console_discussion/console_app.py +0 -266
- examples/console_discussion.py +0 -448
- examples/deep_analyze/deep_analyse.py +0 -30
- examples/deep_analyze/deep_analyze_multiple_files.py +0 -32
- examples/function_calling_with_local_custom_mcp.py +0 -250
- examples/generate_a_benchmark_for_safe_store.py +0 -89
- examples/generate_and_speak/generate_and_speak.py +0 -251
- examples/generate_game_sfx/generate_game_fx.py +0 -240
- examples/generate_text_with_multihop_rag_example.py +0 -210
- examples/gradio_chat_app.py +0 -228
- examples/gradio_lollms_chat.py +0 -259
- examples/internet_search_with_rag.py +0 -226
- examples/lollms_chat/calculator.py +0 -59
- examples/lollms_chat/derivative.py +0 -48
- examples/lollms_chat/test_openai_compatible_with_lollms_chat.py +0 -12
- examples/lollms_discussions_test.py +0 -155
- examples/mcp_examples/external_mcp.py +0 -267
- examples/mcp_examples/local_mcp.py +0 -171
- examples/mcp_examples/openai_mcp.py +0 -203
- examples/mcp_examples/run_remote_mcp_example_v2.py +0 -290
- examples/mcp_examples/run_standard_mcp_example.py +0 -204
- examples/simple_text_gen_test.py +0 -173
- examples/simple_text_gen_with_image_test.py +0 -178
- examples/test_local_models/local_chat.py +0 -9
- examples/text_2_audio.py +0 -77
- examples/text_2_image.py +0 -144
- examples/text_2_image_diffusers.py +0 -274
- examples/text_and_image_2_audio.py +0 -59
- examples/text_gen.py +0 -30
- examples/text_gen_system_prompt.py +0 -29
- lollms_client-0.33.0.dist-info/METADATA +0 -854
- lollms_client-0.33.0.dist-info/RECORD +0 -101
- test/test_lollms_discussion.py +0 -368
- {lollms_client-0.33.0.dist-info → lollms_client-1.1.0.dist-info}/WHEEL +0 -0
- {lollms_client-0.33.0.dist-info → lollms_client-1.1.0.dist-info}/licenses/LICENSE +0 -0
lollms_client/lollms_core.py
CHANGED
|
@@ -30,15 +30,9 @@ class LollmsClient():
|
|
|
30
30
|
Provides a unified interface to manage and use different bindings for various modalities.
|
|
31
31
|
"""
|
|
32
32
|
def __init__(self,
|
|
33
|
-
# LLM Binding Parameters
|
|
34
|
-
binding_name: str = "lollms",
|
|
35
|
-
host_address: Optional[str] = None, # Shared host address (for service based bindings) default for all bindings if not specified
|
|
36
|
-
models_path: Optional[str] = None, # Shared models folder path (for local file based bindings) default for all bindings if not specified
|
|
37
|
-
model_name: str = "",
|
|
38
|
-
llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
|
|
39
|
-
llm_binding_config: Optional[Dict[str, any]] = None,
|
|
40
33
|
|
|
41
34
|
# Optional Modality Binding Names
|
|
35
|
+
llm_binding_name: Optional[str] = None,
|
|
42
36
|
tts_binding_name: Optional[str] = None,
|
|
43
37
|
tti_binding_name: Optional[str] = None,
|
|
44
38
|
stt_binding_name: Optional[str] = None,
|
|
@@ -47,6 +41,7 @@ class LollmsClient():
|
|
|
47
41
|
mcp_binding_name: Optional[str] = None,
|
|
48
42
|
|
|
49
43
|
# Modality Binding Directories
|
|
44
|
+
llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
|
|
50
45
|
tts_bindings_dir: Path = Path(__file__).parent / "tts_bindings",
|
|
51
46
|
tti_bindings_dir: Path = Path(__file__).parent / "tti_bindings",
|
|
52
47
|
stt_bindings_dir: Path = Path(__file__).parent / "stt_bindings",
|
|
@@ -55,28 +50,13 @@ class LollmsClient():
|
|
|
55
50
|
mcp_bindings_dir: Path = Path(__file__).parent / "mcp_bindings",
|
|
56
51
|
|
|
57
52
|
# Configurations
|
|
53
|
+
llm_binding_config: Optional[Dict[str, any]] = None,
|
|
58
54
|
tts_binding_config: Optional[Dict[str, any]] = None,
|
|
59
55
|
tti_binding_config: Optional[Dict[str, any]] = None,
|
|
60
56
|
stt_binding_config: Optional[Dict[str, any]] = None,
|
|
61
57
|
ttv_binding_config: Optional[Dict[str, any]] = None,
|
|
62
58
|
ttm_binding_config: Optional[Dict[str, any]] = None,
|
|
63
59
|
mcp_binding_config: Optional[Dict[str, any]] = None,
|
|
64
|
-
|
|
65
|
-
# General Parameters (mostly defaults for LLM generation)
|
|
66
|
-
service_key: Optional[str] = None, # Shared service key/client_id
|
|
67
|
-
verify_ssl_certificate: bool = True,
|
|
68
|
-
ctx_size: Optional[int|None] = None,
|
|
69
|
-
n_predict: Optional[int|None] = None,
|
|
70
|
-
stream: bool = False,
|
|
71
|
-
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
72
|
-
top_k: int = 40, # Ollama default is 40
|
|
73
|
-
top_p: float = 0.9, # Ollama default is 0.9
|
|
74
|
-
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
75
|
-
repeat_last_n: int = 64, # Ollama default is 64
|
|
76
|
-
|
|
77
|
-
seed: Optional[int] = None,
|
|
78
|
-
n_threads: int = 8,
|
|
79
|
-
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
80
60
|
user_name ="user",
|
|
81
61
|
ai_name = "assistant",
|
|
82
62
|
**kwargs
|
|
@@ -125,29 +105,9 @@ class LollmsClient():
|
|
|
125
105
|
Raises:
|
|
126
106
|
ValueError: If the primary LLM binding cannot be created.
|
|
127
107
|
"""
|
|
128
|
-
self.host_address = host_address # Store initial preference
|
|
129
|
-
self.models_path = models_path
|
|
130
|
-
self.service_key = service_key
|
|
131
|
-
self.verify_ssl_certificate = verify_ssl_certificate
|
|
132
|
-
|
|
133
108
|
# --- LLM Binding Setup ---
|
|
134
|
-
self.binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
|
|
135
|
-
self.binding = self.binding_manager.create_binding(
|
|
136
|
-
binding_name=binding_name,
|
|
137
|
-
host_address=host_address, # Pass initial host preference
|
|
138
|
-
models_path=models_path,
|
|
139
|
-
model_name=model_name,
|
|
140
|
-
service_key=service_key,
|
|
141
|
-
verify_ssl_certificate=verify_ssl_certificate,
|
|
142
|
-
# Pass LLM specific config if needed
|
|
143
|
-
**(llm_binding_config or {})
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
if self.binding is None:
|
|
147
|
-
available = self.binding_manager.get_available_bindings()
|
|
148
|
-
raise ValueError(f"Failed to create LLM binding: {binding_name}. Available: {available}")
|
|
149
|
-
|
|
150
109
|
# --- Modality Binding Setup ---
|
|
110
|
+
self.llm_binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
|
|
151
111
|
self.tts_binding_manager = LollmsTTSBindingManager(tts_bindings_dir)
|
|
152
112
|
self.tti_binding_manager = LollmsTTIBindingManager(tti_bindings_dir)
|
|
153
113
|
self.stt_binding_manager = LollmsSTTBindingManager(stt_bindings_dir)
|
|
@@ -155,6 +115,8 @@ class LollmsClient():
|
|
|
155
115
|
self.ttm_binding_manager = LollmsTTMBindingManager(ttm_bindings_dir)
|
|
156
116
|
self.mcp_binding_manager = LollmsMCPBindingManager(mcp_bindings_dir)
|
|
157
117
|
|
|
118
|
+
|
|
119
|
+
self.llm: Optional[LollmsLLMBinding] = None
|
|
158
120
|
self.tts: Optional[LollmsTTSBinding] = None
|
|
159
121
|
self.tti: Optional[LollmsTTIBinding] = None
|
|
160
122
|
self.stt: Optional[LollmsSTTBinding] = None
|
|
@@ -162,10 +124,29 @@ class LollmsClient():
|
|
|
162
124
|
self.ttm: Optional[LollmsTTMBinding] = None
|
|
163
125
|
self.mcp: Optional[LollmsMCPBinding] = None
|
|
164
126
|
|
|
127
|
+
|
|
128
|
+
if llm_binding_name:
|
|
129
|
+
self.llm = self.llm_binding_manager.create_binding(
|
|
130
|
+
binding_name=llm_binding_name,
|
|
131
|
+
**{
|
|
132
|
+
k: v
|
|
133
|
+
for k, v in (llm_binding_config or {}).items()
|
|
134
|
+
if k != "binding_name"
|
|
135
|
+
}
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if self.llm is None:
|
|
139
|
+
available = self.llm_binding_manager.get_available_bindings()
|
|
140
|
+
ASCIIColors.warning(f"Failed to create LLM binding: {llm_binding_name}. Available: {available}")
|
|
141
|
+
|
|
165
142
|
if tts_binding_name:
|
|
166
143
|
self.tts = self.tts_binding_manager.create_binding(
|
|
167
144
|
binding_name=tts_binding_name,
|
|
168
|
-
**
|
|
145
|
+
**{
|
|
146
|
+
k: v
|
|
147
|
+
for k, v in (tts_binding_config or {}).items()
|
|
148
|
+
if k != "binding_name"
|
|
149
|
+
}
|
|
169
150
|
)
|
|
170
151
|
if self.tts is None:
|
|
171
152
|
ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
|
|
@@ -174,7 +155,11 @@ class LollmsClient():
|
|
|
174
155
|
if tti_binding_config:
|
|
175
156
|
self.tti = self.tti_binding_manager.create_binding(
|
|
176
157
|
binding_name=tti_binding_name,
|
|
177
|
-
**
|
|
158
|
+
**{
|
|
159
|
+
k: v
|
|
160
|
+
for k, v in (tti_binding_config or {}).items()
|
|
161
|
+
if k != "binding_name"
|
|
162
|
+
}
|
|
178
163
|
)
|
|
179
164
|
else:
|
|
180
165
|
self.tti = self.tti_binding_manager.create_binding(
|
|
@@ -187,8 +172,13 @@ class LollmsClient():
|
|
|
187
172
|
if stt_binding_config:
|
|
188
173
|
self.stt = self.stt_binding_manager.create_binding(
|
|
189
174
|
binding_name=stt_binding_name,
|
|
190
|
-
**
|
|
175
|
+
**{
|
|
176
|
+
k: v
|
|
177
|
+
for k, v in (stt_binding_config or {}).items()
|
|
178
|
+
if k != "binding_name"
|
|
179
|
+
}
|
|
191
180
|
)
|
|
181
|
+
|
|
192
182
|
else:
|
|
193
183
|
self.stt = self.stt_binding_manager.create_binding(
|
|
194
184
|
binding_name=stt_binding_name,
|
|
@@ -199,8 +189,13 @@ class LollmsClient():
|
|
|
199
189
|
if ttv_binding_config:
|
|
200
190
|
self.ttv = self.ttv_binding_manager.create_binding(
|
|
201
191
|
binding_name=ttv_binding_name,
|
|
202
|
-
**
|
|
192
|
+
**{
|
|
193
|
+
k: v
|
|
194
|
+
for k, v in ttv_binding_config.items()
|
|
195
|
+
if k != "binding_name"
|
|
196
|
+
}
|
|
203
197
|
)
|
|
198
|
+
|
|
204
199
|
else:
|
|
205
200
|
self.ttv = self.ttv_binding_manager.create_binding(
|
|
206
201
|
binding_name=ttv_binding_name
|
|
@@ -212,7 +207,11 @@ class LollmsClient():
|
|
|
212
207
|
if ttm_binding_config:
|
|
213
208
|
self.ttm = self.ttm_binding_manager.create_binding(
|
|
214
209
|
binding_name=ttm_binding_name,
|
|
215
|
-
**
|
|
210
|
+
**{
|
|
211
|
+
k: v
|
|
212
|
+
for k, v in (ttm_binding_config or {}).items()
|
|
213
|
+
if k != "binding_name"
|
|
214
|
+
}
|
|
216
215
|
)
|
|
217
216
|
else:
|
|
218
217
|
self.ttm = self.ttm_binding_manager.create_binding(
|
|
@@ -224,8 +223,12 @@ class LollmsClient():
|
|
|
224
223
|
if mcp_binding_name:
|
|
225
224
|
if mcp_binding_config:
|
|
226
225
|
self.mcp = self.mcp_binding_manager.create_binding(
|
|
227
|
-
mcp_binding_name,
|
|
228
|
-
**
|
|
226
|
+
binding_name=mcp_binding_name,
|
|
227
|
+
**{
|
|
228
|
+
k: v
|
|
229
|
+
for k, v in (mcp_binding_config or {}).items()
|
|
230
|
+
if k != "binding_name"
|
|
231
|
+
}
|
|
229
232
|
)
|
|
230
233
|
else:
|
|
231
234
|
self.mcp = self.mcp_binding_manager.create_binding(
|
|
@@ -235,17 +238,6 @@ class LollmsClient():
|
|
|
235
238
|
ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
|
|
236
239
|
|
|
237
240
|
# --- Store Default Generation Parameters ---
|
|
238
|
-
self.default_ctx_size = ctx_size
|
|
239
|
-
self.default_n_predict = n_predict
|
|
240
|
-
self.default_stream = stream
|
|
241
|
-
self.default_temperature = temperature
|
|
242
|
-
self.default_top_k = top_k
|
|
243
|
-
self.default_top_p = top_p
|
|
244
|
-
self.default_repeat_penalty = repeat_penalty
|
|
245
|
-
self.default_repeat_last_n = repeat_last_n
|
|
246
|
-
self.default_seed = seed
|
|
247
|
-
self.default_n_threads = n_threads
|
|
248
|
-
self.default_streaming_callback = streaming_callback
|
|
249
241
|
|
|
250
242
|
# --- Prompt Formatting Attributes ---
|
|
251
243
|
self.user_name = user_name
|
|
@@ -264,35 +256,30 @@ class LollmsClient():
|
|
|
264
256
|
#
|
|
265
257
|
def update_llm_binding(self, binding_name: str, config: Optional[Dict[str, Any]] = None):
|
|
266
258
|
"""Update the LLM binding with a new configuration."""
|
|
267
|
-
self.
|
|
259
|
+
self.llm = self.llm_binding_manager.create_binding(
|
|
268
260
|
binding_name=binding_name,
|
|
269
|
-
host_address=self.host_address,
|
|
270
|
-
models_path=self.models_path,
|
|
271
|
-
model_name=self.binding.model_name, # Keep the same model name
|
|
272
|
-
service_key=self.service_key,
|
|
273
|
-
verify_ssl_certificate=self.verify_ssl_certificate,
|
|
274
261
|
**(config or {})
|
|
275
262
|
)
|
|
276
|
-
if self.
|
|
277
|
-
available = self.
|
|
263
|
+
if self.llm is None:
|
|
264
|
+
available = self.llm_binding_manager.get_available_bindings()
|
|
278
265
|
raise ValueError(f"Failed to update LLM binding: {binding_name}. Available: {available}")
|
|
279
266
|
|
|
280
267
|
def get_ctx_size(self, model_name:str|None=None):
|
|
281
|
-
if self.
|
|
282
|
-
ctx_size = self.
|
|
283
|
-
return ctx_size if ctx_size else self.default_ctx_size
|
|
268
|
+
if self.llm:
|
|
269
|
+
ctx_size = self.llm.get_ctx_size(model_name)
|
|
270
|
+
return ctx_size if ctx_size else self.llm.default_ctx_size
|
|
284
271
|
else:
|
|
285
272
|
return None
|
|
286
273
|
|
|
287
274
|
def get_model_name(self):
|
|
288
|
-
if self.
|
|
289
|
-
return self.
|
|
275
|
+
if self.llm:
|
|
276
|
+
return self.llm.model_name
|
|
290
277
|
else:
|
|
291
278
|
return None
|
|
292
279
|
|
|
293
280
|
def set_model_name(self, model_name)->bool:
|
|
294
|
-
if self.
|
|
295
|
-
self.
|
|
281
|
+
if self.llm:
|
|
282
|
+
self.llm.model_name = model_name
|
|
296
283
|
return True
|
|
297
284
|
else:
|
|
298
285
|
return False
|
|
@@ -400,8 +387,8 @@ class LollmsClient():
|
|
|
400
387
|
Returns:
|
|
401
388
|
list: List of tokens.
|
|
402
389
|
"""
|
|
403
|
-
if self.
|
|
404
|
-
return self.
|
|
390
|
+
if self.llm:
|
|
391
|
+
return self.llm.tokenize(text)
|
|
405
392
|
raise RuntimeError("LLM binding not initialized.")
|
|
406
393
|
|
|
407
394
|
def detokenize(self, tokens: list) -> str:
|
|
@@ -414,8 +401,8 @@ class LollmsClient():
|
|
|
414
401
|
Returns:
|
|
415
402
|
str: Detokenized text.
|
|
416
403
|
"""
|
|
417
|
-
if self.
|
|
418
|
-
return self.
|
|
404
|
+
if self.llm:
|
|
405
|
+
return self.llm.detokenize(tokens)
|
|
419
406
|
raise RuntimeError("LLM binding not initialized.")
|
|
420
407
|
def count_tokens(self, text: str) -> int:
|
|
421
408
|
"""
|
|
@@ -427,8 +414,8 @@ class LollmsClient():
|
|
|
427
414
|
Returns:
|
|
428
415
|
int: Number of tokens.
|
|
429
416
|
"""
|
|
430
|
-
if self.
|
|
431
|
-
return self.
|
|
417
|
+
if self.llm:
|
|
418
|
+
return self.llm.count_tokens(text)
|
|
432
419
|
raise RuntimeError("LLM binding not initialized.")
|
|
433
420
|
|
|
434
421
|
def count_image_tokens(self, image: str) -> int:
|
|
@@ -441,8 +428,8 @@ class LollmsClient():
|
|
|
441
428
|
Returns:
|
|
442
429
|
int: Estimated number of tokens for the image. Returns -1 on error.
|
|
443
430
|
"""
|
|
444
|
-
if self.
|
|
445
|
-
return self.
|
|
431
|
+
if self.llm:
|
|
432
|
+
return self.llm.count_image_tokens(image)
|
|
446
433
|
raise RuntimeError("LLM binding not initialized.")
|
|
447
434
|
|
|
448
435
|
def get_model_details(self) -> dict:
|
|
@@ -452,8 +439,8 @@ class LollmsClient():
|
|
|
452
439
|
Returns:
|
|
453
440
|
dict: Model information dictionary.
|
|
454
441
|
"""
|
|
455
|
-
if self.
|
|
456
|
-
return self.
|
|
442
|
+
if self.llm:
|
|
443
|
+
return self.llm.get_model_info()
|
|
457
444
|
raise RuntimeError("LLM binding not initialized.")
|
|
458
445
|
|
|
459
446
|
def switch_model(self, model_name: str) -> bool:
|
|
@@ -466,8 +453,8 @@ class LollmsClient():
|
|
|
466
453
|
Returns:
|
|
467
454
|
bool: True if model loaded successfully, False otherwise.
|
|
468
455
|
"""
|
|
469
|
-
if self.
|
|
470
|
-
return self.
|
|
456
|
+
if self.llm:
|
|
457
|
+
return self.llm.load_model(model_name)
|
|
471
458
|
raise RuntimeError("LLM binding not initialized.")
|
|
472
459
|
|
|
473
460
|
def get_available_llm_bindings(self) -> List[str]:
|
|
@@ -477,7 +464,7 @@ class LollmsClient():
|
|
|
477
464
|
Returns:
|
|
478
465
|
List[str]: List of binding names that can be used for LLMs.
|
|
479
466
|
"""
|
|
480
|
-
return self.
|
|
467
|
+
return self.llm_binding_manager.get_available_bindings()
|
|
481
468
|
|
|
482
469
|
def generate_text(self,
|
|
483
470
|
prompt: str,
|
|
@@ -523,11 +510,11 @@ class LollmsClient():
|
|
|
523
510
|
Returns:
|
|
524
511
|
Union[str, dict]: Generated text or error dictionary if failed.
|
|
525
512
|
"""
|
|
526
|
-
if self.
|
|
513
|
+
if self.llm:
|
|
527
514
|
|
|
528
|
-
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size if self.default_ctx_size else None
|
|
515
|
+
ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size if self.llm.default_ctx_size else None
|
|
529
516
|
if ctx_size is None:
|
|
530
|
-
ctx_size = self.
|
|
517
|
+
ctx_size = self.llm.get_ctx_size()
|
|
531
518
|
if ctx_size is None:
|
|
532
519
|
ctx_size = 1024*8 # 1028*8= 8192 tokens, a common default for many models
|
|
533
520
|
nb_input_tokens = self.count_tokens(prompt)+ (sum([self.count_image_tokens(image) for image in images]) if images else 0)
|
|
@@ -536,21 +523,21 @@ class LollmsClient():
|
|
|
536
523
|
ASCIIColors.magenta(f"ctx_size : {ctx_size}")
|
|
537
524
|
ASCIIColors.magenta(f"nb_input_tokens : {nb_input_tokens}")
|
|
538
525
|
|
|
539
|
-
return self.
|
|
526
|
+
return self.llm.generate_text(
|
|
540
527
|
prompt=prompt,
|
|
541
528
|
images=images,
|
|
542
529
|
system_prompt=system_prompt,
|
|
543
|
-
n_predict=n_predict if n_predict else self.default_n_predict if self.default_n_predict else ctx_size - nb_input_tokens,
|
|
544
|
-
stream=stream if stream is not None else self.default_stream,
|
|
545
|
-
temperature=temperature if temperature is not None else self.default_temperature,
|
|
546
|
-
top_k=top_k if top_k is not None else self.default_top_k,
|
|
547
|
-
top_p=top_p if top_p is not None else self.default_top_p,
|
|
548
|
-
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
|
|
549
|
-
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
|
|
550
|
-
seed=seed if seed is not None else self.default_seed,
|
|
551
|
-
n_threads=n_threads if n_threads is not None else self.default_n_threads,
|
|
552
|
-
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
|
|
553
|
-
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
|
|
530
|
+
n_predict=n_predict if n_predict else self.llm.default_n_predict if self.llm.default_n_predict else ctx_size - nb_input_tokens,
|
|
531
|
+
stream=stream if stream is not None else self.llm.default_stream,
|
|
532
|
+
temperature=temperature if temperature is not None else self.llm.default_temperature,
|
|
533
|
+
top_k=top_k if top_k is not None else self.llm.default_top_k,
|
|
534
|
+
top_p=top_p if top_p is not None else self.llm.default_top_p,
|
|
535
|
+
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
|
|
536
|
+
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
|
|
537
|
+
seed=seed if seed is not None else self.llm.default_seed,
|
|
538
|
+
n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
|
|
539
|
+
ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
|
|
540
|
+
streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback,
|
|
554
541
|
split= split,
|
|
555
542
|
user_keyword=user_keyword,
|
|
556
543
|
ai_keyword=ai_keyword
|
|
@@ -592,20 +579,20 @@ class LollmsClient():
|
|
|
592
579
|
Returns:
|
|
593
580
|
Union[str, dict]: Generated text or error dictionary if failed.
|
|
594
581
|
"""
|
|
595
|
-
if self.
|
|
596
|
-
return self.
|
|
582
|
+
if self.llm:
|
|
583
|
+
return self.llm.generate_from_messages(
|
|
597
584
|
messages=messages,
|
|
598
|
-
n_predict=n_predict if n_predict is not None else self.default_n_predict,
|
|
599
|
-
stream=stream if stream is not None else self.default_stream,
|
|
600
|
-
temperature=temperature if temperature is not None else self.default_temperature,
|
|
601
|
-
top_k=top_k if top_k is not None else self.default_top_k,
|
|
602
|
-
top_p=top_p if top_p is not None else self.default_top_p,
|
|
603
|
-
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
|
|
604
|
-
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
|
|
605
|
-
seed=seed if seed is not None else self.default_seed,
|
|
606
|
-
n_threads=n_threads if n_threads is not None else self.default_n_threads,
|
|
607
|
-
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
|
|
608
|
-
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
|
|
585
|
+
n_predict=n_predict if n_predict is not None else self.llm.default_n_predict,
|
|
586
|
+
stream=stream if stream is not None else self.llm.default_stream,
|
|
587
|
+
temperature=temperature if temperature is not None else self.llm.default_temperature,
|
|
588
|
+
top_k=top_k if top_k is not None else self.llm.default_top_k,
|
|
589
|
+
top_p=top_p if top_p is not None else self.llm.default_top_p,
|
|
590
|
+
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
|
|
591
|
+
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
|
|
592
|
+
seed=seed if seed is not None else self.llm.default_seed,
|
|
593
|
+
n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
|
|
594
|
+
ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
|
|
595
|
+
streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback,
|
|
609
596
|
)
|
|
610
597
|
raise RuntimeError("LLM binding not initialized.")
|
|
611
598
|
|
|
@@ -650,21 +637,21 @@ class LollmsClient():
|
|
|
650
637
|
Returns:
|
|
651
638
|
Union[str, dict]: Generated text or an error dictionary if failed.
|
|
652
639
|
"""
|
|
653
|
-
if self.
|
|
654
|
-
return self.
|
|
640
|
+
if self.llm:
|
|
641
|
+
return self.llm.chat(
|
|
655
642
|
discussion=discussion,
|
|
656
643
|
branch_tip_id=branch_tip_id,
|
|
657
|
-
n_predict=n_predict if n_predict is not None else self.default_n_predict,
|
|
658
|
-
stream=stream if stream is not None else True if streaming_callback is not None else self.default_stream,
|
|
659
|
-
temperature=temperature if temperature is not None else self.default_temperature,
|
|
660
|
-
top_k=top_k if top_k is not None else self.default_top_k,
|
|
661
|
-
top_p=top_p if top_p is not None else self.default_top_p,
|
|
662
|
-
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
|
|
663
|
-
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
|
|
664
|
-
seed=seed if seed is not None else self.default_seed,
|
|
665
|
-
n_threads=n_threads if n_threads is not None else self.default_n_threads,
|
|
666
|
-
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
|
|
667
|
-
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
|
|
644
|
+
n_predict=n_predict if n_predict is not None else self.llm.default_n_predict,
|
|
645
|
+
stream=stream if stream is not None else True if streaming_callback is not None else self.llm.default_stream,
|
|
646
|
+
temperature=temperature if temperature is not None else self.llm.default_temperature,
|
|
647
|
+
top_k=top_k if top_k is not None else self.llm.default_top_k,
|
|
648
|
+
top_p=top_p if top_p is not None else self.llm.default_top_p,
|
|
649
|
+
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
|
|
650
|
+
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
|
|
651
|
+
seed=seed if seed is not None else self.llm.default_seed,
|
|
652
|
+
n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
|
|
653
|
+
ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
|
|
654
|
+
streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback
|
|
668
655
|
)
|
|
669
656
|
raise RuntimeError("LLM binding not initialized.")
|
|
670
657
|
|
|
@@ -679,15 +666,15 @@ class LollmsClient():
|
|
|
679
666
|
Returns:
|
|
680
667
|
list: List of embeddings.
|
|
681
668
|
"""
|
|
682
|
-
if self.
|
|
683
|
-
return self.
|
|
669
|
+
if self.llm:
|
|
670
|
+
return self.llm.embed(text, **kwargs)
|
|
684
671
|
raise RuntimeError("LLM binding not initialized.")
|
|
685
672
|
|
|
686
673
|
|
|
687
674
|
def listModels(self):
|
|
688
675
|
"""Lists models available to the current LLM binding."""
|
|
689
|
-
if self.
|
|
690
|
-
return self.
|
|
676
|
+
if self.llm:
|
|
677
|
+
return self.llm.listModels()
|
|
691
678
|
raise RuntimeError("LLM binding not initialized.")
|
|
692
679
|
|
|
693
680
|
# --- Convenience Methods for Lollms LLM Binding Features ---
|
|
@@ -698,8 +685,8 @@ class LollmsClient():
|
|
|
698
685
|
Returns:
|
|
699
686
|
Union[List[Dict], Dict]: List of personality dicts or error dict.
|
|
700
687
|
"""
|
|
701
|
-
if self.
|
|
702
|
-
return self.
|
|
688
|
+
if self.llm and hasattr(self.llm, 'lollms_listMountedPersonalities'):
|
|
689
|
+
return self.llm.lollms_listMountedPersonalities()
|
|
703
690
|
else:
|
|
704
691
|
ASCIIColors.warning("listMountedPersonalities is only available for the 'lollms' LLM binding.")
|
|
705
692
|
return {"status": False, "error": "Functionality not available for the current binding"}
|
|
@@ -910,7 +897,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
910
897
|
streaming_callback: Optional[Callable[[str, int, Optional[Dict], Optional[List]], bool]] = None,
|
|
911
898
|
**llm_generation_kwargs
|
|
912
899
|
) -> Dict[str, Any]:
|
|
913
|
-
if not self.
|
|
900
|
+
if not self.llm or not self.mcp:
|
|
914
901
|
return {"final_answer": "", "tool_calls": [], "error": "LLM or MCP binding not initialized."}
|
|
915
902
|
|
|
916
903
|
turn_history: List[Dict[str, Any]] = []
|
|
@@ -1076,7 +1063,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
1076
1063
|
"- Do not make up information. If the findings are insufficient to fully answer the request, state what you found and what remains unanswered.\n"
|
|
1077
1064
|
"- Format your response clearly using markdown where appropriate.\n"
|
|
1078
1065
|
)
|
|
1079
|
-
final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature, **(llm_generation_kwargs or {}))
|
|
1066
|
+
final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.llm.default_temperature, **(llm_generation_kwargs or {}))
|
|
1080
1067
|
|
|
1081
1068
|
if streaming_callback:
|
|
1082
1069
|
streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history = turn_history)
|
|
@@ -1117,7 +1104,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
1117
1104
|
"""
|
|
1118
1105
|
Enhanced RAG with dynamic objective refinement and a knowledge scratchpad.
|
|
1119
1106
|
"""
|
|
1120
|
-
if not self.
|
|
1107
|
+
if not self.llm:
|
|
1121
1108
|
return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
|
|
1122
1109
|
|
|
1123
1110
|
effective_ctx_size = ctx_size or getattr(self, "default_ctx_size", 20000)
|
|
@@ -1456,394 +1443,220 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
|
|
|
1456
1443
|
new_scratchpad_text = self.generate_text(prompt=synthesis_prompt, n_predict=1024, temperature=0.0)
|
|
1457
1444
|
return self.remove_thinking_blocks(new_scratchpad_text).strip()
|
|
1458
1445
|
|
|
1459
|
-
|
|
1460
1446
|
def generate_with_mcp_rag(
|
|
1461
1447
|
self,
|
|
1462
1448
|
prompt: str,
|
|
1449
|
+
context: Optional[str] = None,
|
|
1463
1450
|
use_mcps: Union[None, bool, List[str]] = None,
|
|
1464
1451
|
use_data_store: Union[None, Dict[str, Callable]] = None,
|
|
1465
1452
|
system_prompt: str = None,
|
|
1466
1453
|
reasoning_system_prompt: str = "You are a logical AI assistant. Your task is to achieve the user's goal by thinking step-by-step and using the available tools.",
|
|
1467
1454
|
images: Optional[List[str]] = None,
|
|
1468
|
-
max_reasoning_steps: int =
|
|
1469
|
-
decision_temperature: float =
|
|
1470
|
-
final_answer_temperature: float =
|
|
1455
|
+
max_reasoning_steps: int = 10,
|
|
1456
|
+
decision_temperature: float = 0.5,
|
|
1457
|
+
final_answer_temperature: float = 0.7,
|
|
1471
1458
|
streaming_callback: Optional[Callable[[str, 'MSG_TYPE', Optional[Dict], Optional[List]], bool]] = None,
|
|
1472
|
-
rag_top_k: int =
|
|
1473
|
-
rag_min_similarity_percent: float =
|
|
1474
|
-
output_summarization_threshold: int =
|
|
1459
|
+
rag_top_k: int = 5,
|
|
1460
|
+
rag_min_similarity_percent: float = 50.0,
|
|
1461
|
+
output_summarization_threshold: int = 500, # In tokens
|
|
1462
|
+
force_mcp_use: bool = False,
|
|
1475
1463
|
debug: bool = False,
|
|
1476
1464
|
**llm_generation_kwargs
|
|
1477
1465
|
) -> Dict[str, Any]:
|
|
1478
|
-
"""
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1466
|
+
"""
|
|
1467
|
+
Orchestrates a sophisticated and robust agentic process to generate a response.
|
|
1468
|
+
|
|
1469
|
+
This method employs a dynamic "observe-think-act" loop with several advanced architectural
|
|
1470
|
+
patterns for improved robustness and efficiency, particularly when handling code.
|
|
1471
|
+
|
|
1472
|
+
Key Features:
|
|
1473
|
+
- **Context-Aware Asset Ingestion**: The agent automatically detects if the `context`
|
|
1474
|
+
parameter (representing the previous turn) contains code. If so, it registers that
|
|
1475
|
+
code as an asset with a UUID, preventing the LLM from trying to paste large code
|
|
1476
|
+
blocks into its prompts and avoiding JSON errors.
|
|
1477
|
+
- **Tool Perception Filtering**: Identifies tools that directly consume code and HIDES
|
|
1478
|
+
them from the LLM's view, forcing it to use the safer `generate_and_call` workflow.
|
|
1479
|
+
- **Forced Safe Workflow**: The `generate_and_call` meta-tool is the ONLY way the agent
|
|
1480
|
+
can execute code, ensuring a robust, error-free, and efficient process.
|
|
1481
|
+
- **Verbose Internal Logging**: The `generate_and_call` tool is now fully instrumented
|
|
1482
|
+
with detailed logging and robust error handling to ensure every failure is visible
|
|
1483
|
+
and diagnosable, preventing silent loops.
|
|
1484
|
+
|
|
1492
1485
|
Args:
|
|
1493
|
-
prompt: The user's initial prompt or question.
|
|
1486
|
+
prompt: The user's initial prompt or question for the current turn.
|
|
1487
|
+
context: An optional string containing the content of the previous turn.
|
|
1494
1488
|
use_mcps: Controls MCP tool usage.
|
|
1495
1489
|
use_data_store: Controls RAG usage.
|
|
1496
|
-
system_prompt:
|
|
1497
|
-
reasoning_system_prompt:
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
rag_top_k: The number of top documents to retrieve during RAG.
|
|
1490
|
+
system_prompt: Main system prompt for the final answer.
|
|
1491
|
+
reasoning_system_prompt: System prompt for the decision-making process.
|
|
1492
|
+
images: A list of base64-encoded images provided by the user for the current turn.
|
|
1493
|
+
max_reasoning_steps: Maximum number of reasoning cycles.
|
|
1494
|
+
decision_temperature: Temperature for LLM's decision-making.
|
|
1495
|
+
final_answer_temperature: Temperature for final answer synthesis.
|
|
1496
|
+
streaming_callback: Function for real-time output of tokens and steps.
|
|
1497
|
+
rag_top_k: Number of top documents to retrieve during RAG.
|
|
1505
1498
|
rag_min_similarity_percent: Minimum similarity for RAG results.
|
|
1506
|
-
output_summarization_threshold:
|
|
1507
|
-
|
|
1508
|
-
debug
|
|
1499
|
+
output_summarization_threshold: Token count that triggers summarization.
|
|
1500
|
+
force_mcp_use: If True, bypasses the "fast answer" check.
|
|
1501
|
+
debug: If True, prints detailed prompting and response information.
|
|
1509
1502
|
**llm_generation_kwargs: Additional keyword arguments for LLM calls.
|
|
1510
1503
|
|
|
1511
1504
|
Returns:
|
|
1512
|
-
A dictionary containing the agent's full run
|
|
1513
|
-
answer, the complete internal scratchpad, a log of tool calls,
|
|
1514
|
-
any retrieved RAG sources, and other metadata.
|
|
1505
|
+
A dictionary containing the agent's full run.
|
|
1515
1506
|
"""
|
|
1516
|
-
|
|
1517
|
-
if not self.binding:
|
|
1507
|
+
if not self.llm:
|
|
1518
1508
|
return {"final_answer": "", "tool_calls": [], "sources": [], "error": "LLM binding not initialized."}
|
|
1509
|
+
if max_reasoning_steps is None:
|
|
1510
|
+
max_reasoning_steps = 10
|
|
1511
|
+
# --- Helper Functions ---
|
|
1512
|
+
def log_event(desc, event_type=MSG_TYPE.MSG_TYPE_CHUNK, meta=None, event_id=None) -> Optional[str]:
|
|
1513
|
+
if not streaming_callback: return None
|
|
1514
|
+
is_start = event_type == MSG_TYPE.MSG_TYPE_STEP_START
|
|
1515
|
+
event_id = str(uuid.uuid4()) if is_start and not event_id else event_id
|
|
1516
|
+
params = {"type": event_type, "description": desc, **(meta or {})}
|
|
1517
|
+
if event_id: params["id"] = event_id
|
|
1518
|
+
streaming_callback(desc, event_type, params)
|
|
1519
|
+
return event_id
|
|
1519
1520
|
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
if not decision_temperature:
|
|
1527
|
-
decision_temperature = 0.7
|
|
1528
|
-
if not output_summarization_threshold:
|
|
1529
|
-
output_summarization_threshold = 500
|
|
1530
|
-
|
|
1531
|
-
events = []
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
# --- Initialize Agent State ---
|
|
1535
|
-
sources_this_turn: List[Dict[str, Any]] = []
|
|
1536
|
-
tool_calls_this_turn: List[Dict[str, Any]] = []
|
|
1537
|
-
generated_code_store: Dict[str, str] = {} # NEW: Store for UUID -> code
|
|
1538
|
-
original_user_prompt = prompt
|
|
1539
|
-
|
|
1540
|
-
initial_state_parts = [
|
|
1541
|
-
"### Initial State",
|
|
1542
|
-
"- My goal is to address the user's request.",
|
|
1543
|
-
"- I have not taken any actions yet."
|
|
1544
|
-
]
|
|
1545
|
-
if images:
|
|
1546
|
-
initial_state_parts.append(f"- The user has provided {len(images)} image(s) for context.")
|
|
1547
|
-
current_scratchpad = "\n".join(initial_state_parts)
|
|
1548
|
-
|
|
1549
|
-
def log_prompt(prompt, type="prompt"):
|
|
1550
|
-
ASCIIColors.cyan(f"** DEBUG: {type} **")
|
|
1551
|
-
ASCIIColors.magenta(prompt[-15000:])
|
|
1552
|
-
prompt_size = self.count_tokens(prompt)
|
|
1553
|
-
ASCIIColors.red(f"Prompt size:{prompt_size}/{self.default_ctx_size}")
|
|
1521
|
+
def log_prompt(title: str, prompt_text: str):
|
|
1522
|
+
if not debug: return
|
|
1523
|
+
ASCIIColors.cyan(f"** DEBUG: {title} **")
|
|
1524
|
+
ASCIIColors.magenta(prompt_text[-15000:])
|
|
1525
|
+
prompt_size = self.count_tokens(prompt_text)
|
|
1526
|
+
ASCIIColors.red(f"Prompt size:{prompt_size}/{self.llm.default_ctx_size}")
|
|
1554
1527
|
ASCIIColors.cyan(f"** DEBUG: DONE **")
|
|
1555
1528
|
|
|
1556
|
-
# ---
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1529
|
+
# --- 1. Initialize State & Context-Aware Asset Ingestion ---
|
|
1530
|
+
original_user_prompt, tool_calls_this_turn, sources_this_turn = prompt, [], []
|
|
1531
|
+
asset_store: Dict[str, Dict] = {}
|
|
1532
|
+
initial_state_parts = ["### Initial State", "- My goal is to address the user's request comprehensively."]
|
|
1533
|
+
if images:
|
|
1534
|
+
for img_b64 in images:
|
|
1535
|
+
img_uuid = str(uuid.uuid4())
|
|
1536
|
+
asset_store[img_uuid] = {"type": "image", "content": img_b64}
|
|
1537
|
+
initial_state_parts.append(f"- User provided image, asset ID: {img_uuid}")
|
|
1538
|
+
if context:
|
|
1539
|
+
code_blocks = re.findall(r"```(?:\w+)?\n([\s\S]+?)\n```", context)
|
|
1540
|
+
if code_blocks:
|
|
1541
|
+
last_code_block = code_blocks[-1]
|
|
1542
|
+
code_uuid = str(uuid.uuid4())
|
|
1543
|
+
asset_store[code_uuid] = {"type": "code", "content": last_code_block}
|
|
1544
|
+
initial_state_parts.append(f"- The user's request likely refers to a code block from the previous turn's context. It has been registered as asset ID: {code_uuid}")
|
|
1545
|
+
current_scratchpad = "\n".join(initial_state_parts)
|
|
1569
1546
|
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
data[i] = code_store[item]
|
|
1582
|
-
else:
|
|
1583
|
-
_substitute_code_uuids_recursive(item, code_store)
|
|
1584
|
-
|
|
1585
|
-
discovery_step_id = log_event("**Discovering tools**",MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1586
|
-
# --- 1. Discover Available Tools ---
|
|
1587
|
-
available_tools = []
|
|
1588
|
-
if use_mcps and self.mcp:
|
|
1589
|
-
discovered_tools = self.mcp.discover_tools(force_refresh=True)
|
|
1590
|
-
if isinstance(use_mcps, list):
|
|
1591
|
-
available_tools.extend([t for t in discovered_tools if t["name"] in use_mcps])
|
|
1592
|
-
|
|
1547
|
+
# --- 2. Tool Discovery and Filtering ---
|
|
1548
|
+
discovery_step_id = log_event("Discovering and filtering tools...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1549
|
+
all_discovered_tools, visible_tools, code_consuming_tools = [], [], set()
|
|
1550
|
+
if use_mcps and hasattr(self, 'mcp'):
|
|
1551
|
+
mcp_tools = self.mcp.discover_tools(force_refresh=True)
|
|
1552
|
+
if isinstance(use_mcps, list): all_discovered_tools.extend([t for t in mcp_tools if t["name"] in use_mcps])
|
|
1553
|
+
elif use_mcps is True: all_discovered_tools.extend(mcp_tools)
|
|
1554
|
+
code_param_keywords = {'code', 'script', 'python_code', 'javascript', 'html', 'css'}
|
|
1555
|
+
for tool in all_discovered_tools:
|
|
1556
|
+
if any(p in code_param_keywords for p in tool.get("input_schema", {}).get("properties", {})): code_consuming_tools.add(tool['name'])
|
|
1557
|
+
else: visible_tools.append(tool)
|
|
1593
1558
|
if use_data_store:
|
|
1594
|
-
for
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
"input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}
|
|
1599
|
-
})
|
|
1559
|
+
for name, info in use_data_store.items(): visible_tools.append({"name": f"research::{name}", "description": info.get("description", f"Queries '{name}'."), "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}})
|
|
1560
|
+
log_event(f"Made {len(visible_tools)} tools visible (hid {len(code_consuming_tools)} code tools).", MSG_TYPE.MSG_TYPE_STEP_END, meta={"visible": len(visible_tools), "hidden": len(code_consuming_tools), "hidden_list": list(code_consuming_tools)}, event_id=discovery_step_id)
|
|
1561
|
+
|
|
1562
|
+
# --- 3. Fast Answer Path (Not shown for brevity, but retained) ---
|
|
1600
1563
|
|
|
1601
|
-
#
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
"name": "local_tools::view_generated_code",
|
|
1609
|
-
"description": """Views the code that was generated and stored to the buffer. You need to have a valid uuid of the generated code.""",
|
|
1610
|
-
"input_schema": {"type": "object", "properties": {"code_id": {"type": "string", "description": "The case sensitive uuid of the generated code."}}, "required": ["uuid"]}
|
|
1611
|
-
})
|
|
1612
|
-
# Add the new refactor_scratchpad tool definition
|
|
1613
|
-
available_tools.append({
|
|
1614
|
-
"name": "local_tools::refactor_scratchpad",
|
|
1615
|
-
"description": "Rewrites the scratchpad content to clean it and reorganize it. Only use if the scratchpad is messy or contains too much information compared to what you need.",
|
|
1616
|
-
"input_schema": {"type": "object", "properties": {}}
|
|
1617
|
-
})
|
|
1618
|
-
|
|
1619
|
-
formatted_tools_list = "\n".join([f"**{t['name']}**:\n{t['description']}\ninput schema:\n{json.dumps(t['input_schema'])}" for t in available_tools])
|
|
1620
|
-
formatted_tools_list += "\n**local_tools::request_clarification**:\nUse if the user's request is ambiguous and you can not infer a clear idea of his intent. this tool has no parameters."
|
|
1621
|
-
formatted_tools_list += "\n**local_tools::final_answer**:\nUse when you are ready to respond to the user. this tool has no parameters."
|
|
1622
|
-
|
|
1623
|
-
if discovery_step_id: log_event(f"**Discovering tools** found {len(available_tools)} tools",MSG_TYPE.MSG_TYPE_STEP_END, event_id=discovery_step_id)
|
|
1624
|
-
|
|
1625
|
-
# --- 2. Dynamic Reasoning Loop ---
|
|
1564
|
+
# --- 4. Format Tools for Main Loop ---
|
|
1565
|
+
CODE_PLACEHOLDER = "{GENERATED_CODE}"
|
|
1566
|
+
built_in_tools = [{"name": "local_tools::generate_and_call", "description": f"CRITICAL: To run or modify code, you MUST use this tool. It generates code (e.g., to fix code from an asset) and then calls a tool with it. Refer to existing code using its asset ID. Use '{CODE_PLACEHOLDER}' in `next_tool_params` for the NEWLY generated code.", "input_schema": { "type": "object", "properties": { "code_generation_prompt": {"type": "string"}, "language": {"type": "string"}, "next_tool_name": {"type": "string"}, "next_tool_params": {"type": "object"}}, "required": ["code_generation_prompt", "next_tool_name", "next_tool_params"]}}, {"name": "local_tools::refactor_scratchpad", "description": "Rewrites the scratchpad.", "input_schema": {}}, {"name": "local_tools::request_clarification", "description": "Asks the user for more information.", "input_schema": {"type": "object", "properties": {"question_to_user": {"type": "string"}}, "required": ["question_to_user"]}}, {"name": "local_tools::final_answer", "description": "Provides the final answer.", "input_schema": {}}]
|
|
1567
|
+
all_visible_tools = visible_tools + built_in_tools
|
|
1568
|
+
formatted_tools_list = "\n".join([f"**{t['name']}**:\n- Description: {t['description']}" for t in all_visible_tools])
|
|
1569
|
+
|
|
1570
|
+
# --- 5. Dynamic Reasoning Loop ---
|
|
1626
1571
|
for i in range(max_reasoning_steps):
|
|
1572
|
+
reasoning_step_id = log_event(f"Reasoning Step {i+1}/{max_reasoning_steps}", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1627
1573
|
try:
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1574
|
+
reasoning_prompt = f"""--- AVAILABLE ACTIONS ---\n{formatted_tools_list}\n\n--- YOUR INTERNAL SCRATCHPAD ---\n{current_scratchpad}\n--- END SCRATCHPAD ---\n\n**INSTRUCTIONS:**\n1. **OBSERVE:** Review your scratchpad, especially available asset IDs.\n2. **THINK:** Based on '{original_user_prompt}', what is the single next logical action using ONLY the available actions?\n3. **ACT:** Formulate your decision as a JSON object. Do NOT paste large code blocks into parameters; use their asset IDs instead."""
|
|
1575
|
+
action_schema = {"thought": "My reasoning.", "action": {"tool_name": "string", "tool_params": "object"}}
|
|
1576
|
+
action_data = self.generate_structured_content(prompt=reasoning_prompt, schema=action_schema, system_prompt=reasoning_system_prompt, temperature=decision_temperature, **llm_generation_kwargs)
|
|
1631
1577
|
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
--- CONTEXT ---
|
|
1636
|
-
{user_context}
|
|
1637
|
-
--- YOUR INTERNAL SCRATCHPAD (Work History & Analysis) ---
|
|
1638
|
-
{current_scratchpad}
|
|
1639
|
-
--- END OF SCRATCHPAD ---
|
|
1640
|
-
|
|
1641
|
-
**INSTRUCTIONS:**
|
|
1642
|
-
1. **OBSERVE:** Review the `Observation` from your most recent step in the scratchpad.
|
|
1643
|
-
2. **THINK:**
|
|
1644
|
-
- Does the latest observation completely fulfill the user's original request?
|
|
1645
|
-
- If YES, your next action MUST be to use the `final_answer` tool.
|
|
1646
|
-
- If NO, what is the single next logical step needed? This may involve writing code first with `prepare_code`, then using another tool.
|
|
1647
|
-
- If you are stuck or the request is ambiguous, use `local_tools::request_clarification`.
|
|
1648
|
-
3. **ACT:** Formulate your decision as a JSON object.
|
|
1649
|
-
** Important ** Always use this format alias::tool_name to call the tool
|
|
1650
|
-
"""
|
|
1651
|
-
action_template = {
|
|
1652
|
-
"thought": "My detailed analysis of the last observation and my reasoning for the next action and how it integrates with my global plan.",
|
|
1653
|
-
"action": {
|
|
1654
|
-
"tool_name": "The single tool to use (e.g., 'local_tools::prepare_code', 'local_tools::final_answer').",
|
|
1655
|
-
"tool_params": {"param1": "value1"},
|
|
1656
|
-
"clarification_question": "(string, ONLY if tool_name is 'local_tools::request_clarification')"
|
|
1657
|
-
}
|
|
1658
|
-
}
|
|
1659
|
-
if debug: log_prompt(reasoning_prompt_template, f"REASONING PROMPT (Step {i+1})")
|
|
1660
|
-
structured_action_response = self.generate_code(
|
|
1661
|
-
prompt=reasoning_prompt_template, template=json.dumps(action_template, indent=2),
|
|
1662
|
-
system_prompt=reasoning_system_prompt, temperature=decision_temperature,
|
|
1663
|
-
images=images if i == 0 else None
|
|
1664
|
-
)
|
|
1665
|
-
if structured_action_response is None:
|
|
1666
|
-
log_event("**Error generating thought.** Retrying..", MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
1578
|
+
if not action_data or not isinstance(action_data.get("action"), dict):
|
|
1579
|
+
log_event("Failed to generate a valid JSON action. Will retry.", MSG_TYPE.MSG_TYPE_WARNING, event_id=reasoning_step_id)
|
|
1580
|
+
current_scratchpad += "\n\n### Step Failure\n- **Error:** Failed to produce a valid JSON action."
|
|
1667
1581
|
continue
|
|
1668
|
-
if debug: log_prompt(structured_action_response, f"RAW REASONING RESPONSE (Step {i+1})")
|
|
1669
|
-
|
|
1670
|
-
try:
|
|
1671
|
-
action_data = robust_json_parser(structured_action_response)
|
|
1672
|
-
thought = action_data.get("thought", "No thought was generated.")
|
|
1673
|
-
action = action_data.get("action", {})
|
|
1674
|
-
if isinstance(action,str):
|
|
1675
|
-
tool_name = action
|
|
1676
|
-
tool_params = {}
|
|
1677
|
-
else:
|
|
1678
|
-
tool_name = action.get("tool_name")
|
|
1679
|
-
tool_params = action.get("tool_params", {})
|
|
1680
|
-
except (json.JSONDecodeError, TypeError) as e:
|
|
1681
|
-
current_scratchpad += f"\n\n### Step {i+1} Failure\n- **Error:** Failed to generate a valid JSON action: {e}"
|
|
1682
|
-
log_event(f"Step Failure: Invalid JSON action.", MSG_TYPE.MSG_TYPE_EXCEPTION, metadata={"details": str(e)})
|
|
1683
|
-
if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, metadata={"error": str(e)}, event_id=reasoning_step_id)
|
|
1684
|
-
|
|
1685
1582
|
|
|
1583
|
+
thought, action = action_data.get("thought", ""), action_data.get("action", {})
|
|
1584
|
+
tool_name, tool_params = action.get("tool_name"), action.get("tool_params", {})
|
|
1686
1585
|
current_scratchpad += f"\n\n### Step {i+1}: Thought\n{thought}"
|
|
1687
|
-
log_event(
|
|
1586
|
+
log_event(thought, MSG_TYPE.MSG_TYPE_THOUGHT_CONTENT)
|
|
1688
1587
|
|
|
1689
|
-
if
|
|
1690
|
-
# Handle error...
|
|
1691
|
-
break
|
|
1692
|
-
|
|
1693
|
-
# --- Handle special, non-executing tools ---
|
|
1588
|
+
if tool_name == "local_tools::final_answer": break
|
|
1694
1589
|
if tool_name == "local_tools::request_clarification":
|
|
1695
|
-
|
|
1696
|
-
if isinstance(action, dict):
|
|
1697
|
-
return {"final_answer": action.get("clarification_question", "Could you please provide more details?"), "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
|
|
1698
|
-
elif isinstance(action, str):
|
|
1699
|
-
return {"final_answer": action, "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
|
|
1700
|
-
else:
|
|
1701
|
-
return {"final_answer": "Could you please provide more details?", "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
|
|
1702
|
-
if tool_name == "local_tools::final_answer":
|
|
1703
|
-
current_scratchpad += f"\n\n### Step {i+1}: Action\n- **Action:** Decided to formulate the final answer."
|
|
1704
|
-
log_event("**Action**: Formulate final answer.", MSG_TYPE.MSG_TYPE_THOUGHT_CHUNK)
|
|
1705
|
-
if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**",MSG_TYPE.MSG_TYPE_STEP_END, event_id=reasoning_step_id)
|
|
1706
|
-
break
|
|
1590
|
+
return {"final_answer": tool_params.get("question_to_user", "?"), "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
|
|
1707
1591
|
|
|
1708
|
-
|
|
1709
|
-
if tool_name ==
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
tool_result = {"status": "success", "code_id": code_uuid, "summary": f"Code generated successfully. Use this ID in the next tool call that requires code."}
|
|
1721
|
-
tool_calls_this_turn.append({"name": "prepare_code", "params": tool_params, "result": tool_result})
|
|
1722
|
-
observation_text = f"```json\n{json.dumps(tool_result, indent=2)}\n```"
|
|
1723
|
-
current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
|
|
1724
|
-
log_event(f"Code generated with ID: {code_uuid}", MSG_TYPE.MSG_TYPE_OBSERVATION)
|
|
1725
|
-
if code_gen_id: log_event(f"Generating code...", MSG_TYPE.MSG_TYPE_TOOL_CALL, metadata={"id": code_gen_id, "result": tool_result})
|
|
1726
|
-
if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id= reasoning_step_id)
|
|
1727
|
-
continue # Go to the next reasoning step immediately
|
|
1728
|
-
if tool_name == 'local_tools::view_generated_code':
|
|
1729
|
-
code_id = tool_params.get("code_id")
|
|
1730
|
-
if code_id:
|
|
1731
|
-
tool_result = {"status": "success", "code_id": code_id, "generated_code":generated_code_store[code_uuid]}
|
|
1732
|
-
else:
|
|
1733
|
-
tool_result = {"status": "error", "code_id": code_id, "error":"Unknown uuid"}
|
|
1734
|
-
observation_text = f"```json\n{json.dumps(tool_result, indent=2)}\n```"
|
|
1735
|
-
current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
|
|
1736
|
-
log_event(f"Result from `{tool_name}`:\n```\n{generated_code_store[code_uuid]}\n```\n", MSG_TYPE.MSG_TYPE_TOOL_CALL, metadata={"id": code_gen_id, "result": tool_result})
|
|
1737
|
-
continue
|
|
1738
|
-
if tool_name == 'local_tools::refactor_scratchpad':
|
|
1739
|
-
scratchpad_cleaning_prompt = f"""Enhance this scratchpad content to be more organized and comprehensive. Keep relevant experience information and remove any useless redundancies. Try to log learned things from the context so that you won't make the same mistakes again. Do not remove the main objective information or any crucial information that may be useful for the next iterations. Answer directly with the new scratchpad content without any comments.
|
|
1740
|
-
--- YOUR INTERNAL SCRATCHPAD (Work History & Analysis) ---
|
|
1741
|
-
{current_scratchpad}
|
|
1742
|
-
--- END OF SCRATCHPAD ---"""
|
|
1743
|
-
current_scratchpad = self.generate_text(scratchpad_cleaning_prompt)
|
|
1744
|
-
log_event(f"**New scratchpad**:\n{current_scratchpad}", MSG_TYPE.MSG_TYPE_SCRATCHPAD)
|
|
1745
|
-
|
|
1746
|
-
# --- Substitute UUIDs and Execute Standard Tools ---
|
|
1747
|
-
log_event(f"**Calling tool**: `{tool_name}` with params:\n{dict_to_markdown(tool_params)}", MSG_TYPE.MSG_TYPE_TOOL_CALL)
|
|
1748
|
-
_substitute_code_uuids_recursive(tool_params, generated_code_store)
|
|
1749
|
-
|
|
1750
|
-
tool_call_id = log_event(f"**Executing tool**: {tool_name}",MSG_TYPE.MSG_TYPE_STEP_START, metadata={"name": tool_name, "parameters": tool_params, "id":"executing tool"})
|
|
1751
|
-
tool_result = None
|
|
1752
|
-
try:
|
|
1753
|
-
if tool_name.startswith("research::") and use_data_store:
|
|
1754
|
-
store_name = tool_name.split("::")[1]
|
|
1755
|
-
rag_callable = use_data_store.get(store_name, {}).get("callable")
|
|
1756
|
-
query = tool_params.get("query", "")
|
|
1757
|
-
retrieved_chunks = rag_callable(query, rag_top_k=rag_top_k, rag_min_similarity_percent=rag_min_similarity_percent)
|
|
1758
|
-
if retrieved_chunks:
|
|
1759
|
-
sources_this_turn.extend(retrieved_chunks)
|
|
1760
|
-
tool_result = {"status": "success", "summary": f"Found {len(retrieved_chunks)} relevant chunks.", "chunks": retrieved_chunks}
|
|
1592
|
+
tool_result = {"status": "failure", "error": f"Tool '{tool_name}' was called but did not execute properly."} # Default error
|
|
1593
|
+
if tool_name == "local_tools::generate_and_call":
|
|
1594
|
+
chain_id = log_event(f"Starting chained tool call...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1595
|
+
try:
|
|
1596
|
+
code_gen_prompt, lang = tool_params.get("code_generation_prompt", ""), tool_params.get("language", "python")
|
|
1597
|
+
next_tool_name, next_tool_params = tool_params.get("next_tool_name"), tool_params.get("next_tool_params", {})
|
|
1598
|
+
log_event("Received parameters for chain", MSG_TYPE.MSG_TYPE_STEP, meta={"parent_id": chain_id, "params": tool_params})
|
|
1599
|
+
|
|
1600
|
+
if not (use_mcps and hasattr(self, 'mcp')):
|
|
1601
|
+
tool_result = {"status": "failure", "error": "MCPs are not enabled, cannot execute tools."}
|
|
1602
|
+
elif next_tool_name not in code_consuming_tools:
|
|
1603
|
+
tool_result = {"status": "failure", "error": f"Tool '{next_tool_name}' is not a valid code-consuming tool. Valid options are: {list(code_consuming_tools)}"}
|
|
1761
1604
|
else:
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1605
|
+
def _hydrate(text: str, store: Dict) -> str:
|
|
1606
|
+
for k, v in store.items(): text = text.replace(k, v.get('content',''))
|
|
1607
|
+
return text
|
|
1608
|
+
hydrated_prompt = _hydrate(code_gen_prompt, asset_store)
|
|
1609
|
+
log_event(f"Generating {lang} code for {next_tool_name}", MSG_TYPE.MSG_TYPE_STEP, meta={"parent_id": chain_id, "hydrated_prompt": hydrated_prompt})
|
|
1610
|
+
generated_code = self.generate_code(prompt=hydrated_prompt, system_prompt=f"Generate ONLY raw {lang} code.", **llm_generation_kwargs)
|
|
1611
|
+
|
|
1612
|
+
def _substitute(data: Any) -> Any:
|
|
1613
|
+
if isinstance(data, dict): return {k: _substitute(v) for k, v in data.items()}
|
|
1614
|
+
if isinstance(data, list): return [_substitute(item) for item in data]
|
|
1615
|
+
if isinstance(data, str) and data == CODE_PLACEHOLDER: return generated_code
|
|
1616
|
+
return data
|
|
1617
|
+
hydrated_params = _substitute(next_tool_params)
|
|
1618
|
+
|
|
1619
|
+
log_event(f"Calling tool: {next_tool_name}", MSG_TYPE.MSG_TYPE_TOOL_CALL, meta={"parent_id": chain_id, "name": next_tool_name, "parameters": hydrated_params})
|
|
1620
|
+
tool_result = self.mcp.execute_tool(next_tool_name, hydrated_params, lollms_client_instance=self)
|
|
1621
|
+
except Exception as e:
|
|
1622
|
+
tool_result = {"status": "failure", "error": f"Exception in chained tool logic: {str(e)}"}
|
|
1623
|
+
log_event(f"Finished chained tool call.", MSG_TYPE.MSG_TYPE_STEP_END, event_id=chain_id)
|
|
1624
|
+
# ... other non-code tool handlers ...
|
|
1625
|
+
|
|
1626
|
+
# --- Process and Sanitize ALL Tool Outputs for the Scratchpad ---
|
|
1775
1627
|
sanitized_result = {}
|
|
1776
1628
|
if isinstance(tool_result, dict):
|
|
1777
1629
|
sanitized_result = tool_result.copy()
|
|
1778
|
-
summarized_fields = {}
|
|
1779
1630
|
for key, value in tool_result.items():
|
|
1780
|
-
if isinstance(value, str) and
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
if streaming_callback: streaming_callback(f"Summarizing long output from field '{key}'...", MSG_TYPE.MSG_TYPE_STEP, {"type": "summarization"})
|
|
1785
|
-
summary = self.sequential_summarize(text=value, chunk_processing_prompt=f"Summarize key info from this chunk of '{key}'.", callback=streaming_callback)
|
|
1786
|
-
summarized_fields[key] = summary
|
|
1787
|
-
sanitized_result[key] = f"[Content summarized, see summary below. Original length: {len(value)} chars]"
|
|
1788
|
-
observation_text = f"```json\n{json.dumps(sanitized_result, indent=2)}\n```"
|
|
1789
|
-
if summarized_fields:
|
|
1790
|
-
observation_text += "\n\n**Summaries of Long Outputs:**"
|
|
1791
|
-
for key, summary in summarized_fields.items():
|
|
1792
|
-
observation_text += f"\n- **Summary of '{key}':**\n{summary}"
|
|
1631
|
+
if isinstance(value, str) and value.startswith("data:image"):
|
|
1632
|
+
img_uuid = str(uuid.uuid4())
|
|
1633
|
+
asset_store[img_uuid] = {"type": "image", "content": value}
|
|
1634
|
+
sanitized_result[key] = f"[Image asset generated: {img_uuid}]"
|
|
1793
1635
|
else:
|
|
1794
|
-
|
|
1636
|
+
sanitized_result = {"raw_output": str(tool_result)}
|
|
1795
1637
|
|
|
1638
|
+
observation_text = f"```json\n{json.dumps(sanitized_result, indent=2)}\n```"
|
|
1639
|
+
log_event(f"Received output from: {tool_name}", MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, meta={"name": tool_name, "result": sanitized_result})
|
|
1796
1640
|
tool_calls_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
|
|
1797
1641
|
current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
|
|
1798
|
-
log_event(f"
|
|
1799
|
-
|
|
1800
|
-
if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
|
|
1642
|
+
log_event(f"Finished reasoning step {i+1}", MSG_TYPE.MSG_TYPE_STEP_END, event_id=reasoning_step_id)
|
|
1643
|
+
|
|
1801
1644
|
except Exception as ex:
|
|
1802
1645
|
trace_exception(ex)
|
|
1803
|
-
|
|
1804
|
-
if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
|
|
1805
|
-
|
|
1806
|
-
# --- Final Answer Synthesis ---
|
|
1807
|
-
synthesis_id = log_event("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1646
|
+
log_event(f"Error in reasoning loop: {str(ex)}", MSG_TYPE.MSG_TYPE_EXCEPTION, event_id=reasoning_step_id)
|
|
1808
1647
|
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
"{original_user_prompt}"
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
- If images were provided by the user, incorporate your analysis of them into the answer.
|
|
1817
|
-
- Do not talk about your internal process unless it's necessary to explain why you couldn't find an answer.
|
|
1818
|
-
"""
|
|
1819
|
-
if debug: log_prompt(final_answer_prompt, "FINAL ANSWER SYNTHESIS PROMPT")
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature, **llm_generation_kwargs)
|
|
1823
|
-
if type(final_answer_text) is dict:
|
|
1824
|
-
if streaming_callback:
|
|
1825
|
-
streaming_callback(final_answer_text["error"], MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
1826
|
-
return {
|
|
1827
|
-
"final_answer": "",
|
|
1828
|
-
"final_scratchpad": current_scratchpad,
|
|
1829
|
-
"tool_calls": tool_calls_this_turn,
|
|
1830
|
-
"sources": sources_this_turn,
|
|
1831
|
-
"clarification_required": False,
|
|
1832
|
-
"error": final_answer_text["error"]
|
|
1833
|
-
}
|
|
1648
|
+
# --- 6. Final Answer Synthesis ---
|
|
1649
|
+
synthesis_id = log_event("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1650
|
+
final_answer_prompt = f"""--- Original User Request ---\n"{original_user_prompt}"\n\n--- Your Internal Scratchpad ---\n{current_scratchpad}\n\n--- INSTRUCTIONS ---\nSynthesize a clear, comprehensive, and friendly answer for the user based ONLY on your scratchpad."""
|
|
1651
|
+
final_synthesis_images = [img for img in (images or [])] + [asset['content'] for asset in asset_store.values() if asset['type'] == 'image']
|
|
1652
|
+
final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=final_synthesis_images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature, **llm_generation_kwargs)
|
|
1653
|
+
if isinstance(final_answer_text, dict) and "error" in final_answer_text:
|
|
1654
|
+
return {"final_answer": "", "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": False, "error": final_answer_text["error"]}
|
|
1834
1655
|
final_answer = self.remove_thinking_blocks(final_answer_text)
|
|
1835
|
-
|
|
1656
|
+
log_event("Finished synthesizing answer.", MSG_TYPE.MSG_TYPE_STEP_END, event_id=synthesis_id)
|
|
1836
1657
|
|
|
1837
|
-
|
|
1658
|
+
return {"final_answer": final_answer, "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": False, "error": None}
|
|
1838
1659
|
|
|
1839
|
-
return {
|
|
1840
|
-
"final_answer": final_answer,
|
|
1841
|
-
"final_scratchpad": current_scratchpad,
|
|
1842
|
-
"tool_calls": tool_calls_this_turn,
|
|
1843
|
-
"sources": sources_this_turn,
|
|
1844
|
-
"clarification_required": False,
|
|
1845
|
-
"error": None
|
|
1846
|
-
}
|
|
1847
1660
|
def generate_code(
|
|
1848
1661
|
self,
|
|
1849
1662
|
prompt:str,
|
|
@@ -2497,7 +2310,7 @@ Do not split the code in multiple tags.
|
|
|
2497
2310
|
callback = self.sink
|
|
2498
2311
|
|
|
2499
2312
|
if ctx_size is None:
|
|
2500
|
-
ctx_size = self.default_ctx_size or 8192 # Provide a fallback default
|
|
2313
|
+
ctx_size = self.llm.default_ctx_size or 8192 # Provide a fallback default
|
|
2501
2314
|
if chunk_size is None:
|
|
2502
2315
|
chunk_size = ctx_size // 4
|
|
2503
2316
|
if overlap is None:
|
|
@@ -2573,7 +2386,7 @@ Current document analysis memory:
|
|
|
2573
2386
|
# Process text in chunks
|
|
2574
2387
|
while start_token_idx < total_tokens:
|
|
2575
2388
|
# Calculate available tokens for chunk + memory
|
|
2576
|
-
available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024) # Reserve space for output
|
|
2389
|
+
available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.llm.default_n_predict or 1024) # Reserve space for output
|
|
2577
2390
|
if available_tokens_for_dynamic_content <= 100: # Need some minimum space
|
|
2578
2391
|
ASCIIColors.error("Context size too small for summarization with current settings.")
|
|
2579
2392
|
return "Error: Context size too small."
|
|
@@ -2610,7 +2423,7 @@ Current document analysis memory:
|
|
|
2610
2423
|
ASCIIColors.magenta(f"--- Chunk {chunk_id} Prompt ---")
|
|
2611
2424
|
ASCIIColors.cyan(prompt)
|
|
2612
2425
|
|
|
2613
|
-
response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
|
|
2426
|
+
response = self.generate_text(prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback)
|
|
2614
2427
|
|
|
2615
2428
|
if isinstance(response, dict): # Handle generation error
|
|
2616
2429
|
ASCIIColors.error(f"Chunk {chunk_id} processing failed: {response.get('error')}")
|
|
@@ -2669,7 +2482,7 @@ The final output must be put inside a {final_output_format} markdown tag.
|
|
|
2669
2482
|
final_example_prompt = final_prompt_template.format(memory="<final_memory>")
|
|
2670
2483
|
try:
|
|
2671
2484
|
final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
|
|
2672
|
-
available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024) # Reserve space for output
|
|
2485
|
+
available_final_tokens = ctx_size - final_static_tokens - (self.llm.default_n_predict or 1024) # Reserve space for output
|
|
2673
2486
|
except RuntimeError as e:
|
|
2674
2487
|
ASCIIColors.error(f"Tokenization failed during final setup: {e}")
|
|
2675
2488
|
return "Error: Could not calculate final prompt size."
|
|
@@ -2686,7 +2499,7 @@ The final output must be put inside a {final_output_format} markdown tag.
|
|
|
2686
2499
|
ASCIIColors.magenta("--- Final Aggregation Prompt ---")
|
|
2687
2500
|
ASCIIColors.cyan(final_prompt)
|
|
2688
2501
|
|
|
2689
|
-
final_summary_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
|
|
2502
|
+
final_summary_raw = self.generate_text(final_prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback)
|
|
2690
2503
|
|
|
2691
2504
|
if isinstance(final_summary_raw, dict):
|
|
2692
2505
|
ASCIIColors.error(f"Final aggregation failed: {final_summary_raw.get('error')}")
|
|
@@ -2742,7 +2555,7 @@ The final output must be put inside a {final_output_format} markdown tag.
|
|
|
2742
2555
|
|
|
2743
2556
|
# Set defaults and validate input
|
|
2744
2557
|
if ctx_size is None:
|
|
2745
|
-
ctx_size = self.default_ctx_size or 8192
|
|
2558
|
+
ctx_size = self.llm.default_ctx_size or 8192
|
|
2746
2559
|
if chunk_size is None:
|
|
2747
2560
|
chunk_size = ctx_size // 4
|
|
2748
2561
|
if overlap is None:
|
|
@@ -2845,7 +2658,7 @@ Task: Update the markdown memory by adding new information from this chunk relev
|
|
|
2845
2658
|
|
|
2846
2659
|
while start_token_idx < len(file_tokens):
|
|
2847
2660
|
# Calculate available space dynamically
|
|
2848
|
-
available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024)
|
|
2661
|
+
available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.llm.default_n_predict or 1024)
|
|
2849
2662
|
if available_tokens_for_dynamic_content <= 100:
|
|
2850
2663
|
ASCIIColors.error(f"Context window too small during analysis of {file_name}.")
|
|
2851
2664
|
# Option: try truncating memory drastically or break
|
|
@@ -2885,7 +2698,7 @@ Task: Update the markdown memory by adding new information from this chunk relev
|
|
|
2885
2698
|
ASCIIColors.magenta(f"--- Deep Analysis Prompt (Global Chunk {global_chunk_id}) ---")
|
|
2886
2699
|
ASCIIColors.cyan(prompt)
|
|
2887
2700
|
|
|
2888
|
-
response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback for streaming output
|
|
2701
|
+
response = self.generate_text(prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback) # Use main callback for streaming output
|
|
2889
2702
|
|
|
2890
2703
|
if isinstance(response, dict): # Handle error
|
|
2891
2704
|
ASCIIColors.error(f"Chunk processing failed (Global {global_chunk_id}): {response.get('error')}")
|
|
@@ -2940,7 +2753,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
2940
2753
|
final_example_prompt = final_prompt.replace("{memory}", "<final_memory>")
|
|
2941
2754
|
try:
|
|
2942
2755
|
final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
|
|
2943
|
-
available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024)
|
|
2756
|
+
available_final_tokens = ctx_size - final_static_tokens - (self.llm.default_n_predict or 1024)
|
|
2944
2757
|
except RuntimeError as e:
|
|
2945
2758
|
ASCIIColors.error(f"Tokenization failed during final setup: {e}")
|
|
2946
2759
|
return "Error: Could not calculate final prompt size."
|
|
@@ -2956,7 +2769,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
2956
2769
|
ASCIIColors.magenta("--- Final Aggregation Prompt ---")
|
|
2957
2770
|
ASCIIColors.cyan(final_prompt)
|
|
2958
2771
|
|
|
2959
|
-
final_output_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback
|
|
2772
|
+
final_output_raw = self.generate_text(final_prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback) # Use main callback
|
|
2960
2773
|
|
|
2961
2774
|
if isinstance(final_output_raw, dict):
|
|
2962
2775
|
ASCIIColors.error(f"Final aggregation failed: {final_output_raw.get('error')}")
|
|
@@ -3031,9 +2844,9 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
3031
2844
|
tokens = []
|
|
3032
2845
|
else:
|
|
3033
2846
|
# Use the binding's tokenizer for accurate chunking
|
|
3034
|
-
tokens = self.
|
|
2847
|
+
tokens = self.llm.tokenize(text_to_process)
|
|
3035
2848
|
if chunk_size_tokens is None:
|
|
3036
|
-
chunk_size_tokens = self.default_ctx_size//2
|
|
2849
|
+
chunk_size_tokens = self.llm.default_ctx_size//2
|
|
3037
2850
|
|
|
3038
2851
|
if len(tokens) <= chunk_size_tokens:
|
|
3039
2852
|
if streaming_callback:
|
|
@@ -3064,7 +2877,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
3064
2877
|
step = chunk_size_tokens - overlap_tokens
|
|
3065
2878
|
for i in range(0, len(tokens), step):
|
|
3066
2879
|
chunk_tokens = tokens[i:i + chunk_size_tokens]
|
|
3067
|
-
chunk_text = self.
|
|
2880
|
+
chunk_text = self.llm.detokenize(chunk_tokens)
|
|
3068
2881
|
chunks.append(chunk_text)
|
|
3069
2882
|
|
|
3070
2883
|
chunk_summaries = []
|