lollms-client 0.32.1__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/azure_openai/__init__.py +6 -10
- lollms_client/llm_bindings/claude/__init__.py +4 -7
- lollms_client/llm_bindings/gemini/__init__.py +3 -7
- lollms_client/llm_bindings/grok/__init__.py +3 -7
- lollms_client/llm_bindings/groq/__init__.py +4 -7
- lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +4 -6
- lollms_client/llm_bindings/litellm/__init__.py +15 -6
- lollms_client/llm_bindings/llamacpp/__init__.py +214 -388
- lollms_client/llm_bindings/lollms/__init__.py +24 -14
- lollms_client/llm_bindings/lollms_webui/__init__.py +6 -12
- lollms_client/llm_bindings/mistral/__init__.py +58 -29
- lollms_client/llm_bindings/ollama/__init__.py +6 -11
- lollms_client/llm_bindings/open_router/__init__.py +45 -14
- lollms_client/llm_bindings/openai/__init__.py +7 -14
- lollms_client/llm_bindings/openllm/__init__.py +12 -12
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +1 -1
- lollms_client/llm_bindings/tensor_rt/__init__.py +8 -13
- lollms_client/llm_bindings/transformers/__init__.py +14 -6
- lollms_client/llm_bindings/vllm/__init__.py +16 -12
- lollms_client/lollms_core.py +296 -487
- lollms_client/lollms_discussion.py +436 -78
- lollms_client/lollms_llm_binding.py +223 -11
- lollms_client/lollms_mcp_binding.py +33 -2
- lollms_client/mcp_bindings/local_mcp/__init__.py +3 -2
- lollms_client/mcp_bindings/remote_mcp/__init__.py +6 -5
- lollms_client/mcp_bindings/standard_mcp/__init__.py +3 -5
- lollms_client/stt_bindings/lollms/__init__.py +6 -8
- lollms_client/stt_bindings/whisper/__init__.py +2 -4
- lollms_client/stt_bindings/whispercpp/__init__.py +15 -16
- lollms_client/tti_bindings/dalle/__init__.py +29 -28
- lollms_client/tti_bindings/diffusers/__init__.py +25 -21
- lollms_client/tti_bindings/gemini/__init__.py +215 -0
- lollms_client/tti_bindings/lollms/__init__.py +8 -9
- lollms_client-1.0.0.dist-info/METADATA +1214 -0
- lollms_client-1.0.0.dist-info/RECORD +69 -0
- {lollms_client-0.32.1.dist-info → lollms_client-1.0.0.dist-info}/top_level.txt +0 -2
- examples/article_summary/article_summary.py +0 -58
- examples/console_discussion/console_app.py +0 -266
- examples/console_discussion.py +0 -448
- examples/deep_analyze/deep_analyse.py +0 -30
- examples/deep_analyze/deep_analyze_multiple_files.py +0 -32
- examples/function_calling_with_local_custom_mcp.py +0 -250
- examples/generate_a_benchmark_for_safe_store.py +0 -89
- examples/generate_and_speak/generate_and_speak.py +0 -251
- examples/generate_game_sfx/generate_game_fx.py +0 -240
- examples/generate_text_with_multihop_rag_example.py +0 -210
- examples/gradio_chat_app.py +0 -228
- examples/gradio_lollms_chat.py +0 -259
- examples/internet_search_with_rag.py +0 -226
- examples/lollms_chat/calculator.py +0 -59
- examples/lollms_chat/derivative.py +0 -48
- examples/lollms_chat/test_openai_compatible_with_lollms_chat.py +0 -12
- examples/lollms_discussions_test.py +0 -155
- examples/mcp_examples/external_mcp.py +0 -267
- examples/mcp_examples/local_mcp.py +0 -171
- examples/mcp_examples/openai_mcp.py +0 -203
- examples/mcp_examples/run_remote_mcp_example_v2.py +0 -290
- examples/mcp_examples/run_standard_mcp_example.py +0 -204
- examples/simple_text_gen_test.py +0 -173
- examples/simple_text_gen_with_image_test.py +0 -178
- examples/test_local_models/local_chat.py +0 -9
- examples/text_2_audio.py +0 -77
- examples/text_2_image.py +0 -144
- examples/text_2_image_diffusers.py +0 -274
- examples/text_and_image_2_audio.py +0 -59
- examples/text_gen.py +0 -30
- examples/text_gen_system_prompt.py +0 -29
- lollms_client-0.32.1.dist-info/METADATA +0 -854
- lollms_client-0.32.1.dist-info/RECORD +0 -101
- test/test_lollms_discussion.py +0 -368
- {lollms_client-0.32.1.dist-info → lollms_client-1.0.0.dist-info}/WHEEL +0 -0
- {lollms_client-0.32.1.dist-info → lollms_client-1.0.0.dist-info}/licenses/LICENSE +0 -0
lollms_client/lollms_core.py
CHANGED
|
@@ -30,15 +30,9 @@ class LollmsClient():
|
|
|
30
30
|
Provides a unified interface to manage and use different bindings for various modalities.
|
|
31
31
|
"""
|
|
32
32
|
def __init__(self,
|
|
33
|
-
# LLM Binding Parameters
|
|
34
|
-
binding_name: str = "lollms",
|
|
35
|
-
host_address: Optional[str] = None, # Shared host address (for service based bindings) default for all bindings if not specified
|
|
36
|
-
models_path: Optional[str] = None, # Shared models folder path (for local file based bindings) default for all bindings if not specified
|
|
37
|
-
model_name: str = "",
|
|
38
|
-
llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
|
|
39
|
-
llm_binding_config: Optional[Dict[str, any]] = None,
|
|
40
33
|
|
|
41
34
|
# Optional Modality Binding Names
|
|
35
|
+
llm_binding_name: str = "lollms",
|
|
42
36
|
tts_binding_name: Optional[str] = None,
|
|
43
37
|
tti_binding_name: Optional[str] = None,
|
|
44
38
|
stt_binding_name: Optional[str] = None,
|
|
@@ -47,6 +41,7 @@ class LollmsClient():
|
|
|
47
41
|
mcp_binding_name: Optional[str] = None,
|
|
48
42
|
|
|
49
43
|
# Modality Binding Directories
|
|
44
|
+
llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
|
|
50
45
|
tts_bindings_dir: Path = Path(__file__).parent / "tts_bindings",
|
|
51
46
|
tti_bindings_dir: Path = Path(__file__).parent / "tti_bindings",
|
|
52
47
|
stt_bindings_dir: Path = Path(__file__).parent / "stt_bindings",
|
|
@@ -55,28 +50,13 @@ class LollmsClient():
|
|
|
55
50
|
mcp_bindings_dir: Path = Path(__file__).parent / "mcp_bindings",
|
|
56
51
|
|
|
57
52
|
# Configurations
|
|
53
|
+
llm_binding_config: Optional[Dict[str, any]] = None,
|
|
58
54
|
tts_binding_config: Optional[Dict[str, any]] = None,
|
|
59
55
|
tti_binding_config: Optional[Dict[str, any]] = None,
|
|
60
56
|
stt_binding_config: Optional[Dict[str, any]] = None,
|
|
61
57
|
ttv_binding_config: Optional[Dict[str, any]] = None,
|
|
62
58
|
ttm_binding_config: Optional[Dict[str, any]] = None,
|
|
63
59
|
mcp_binding_config: Optional[Dict[str, any]] = None,
|
|
64
|
-
|
|
65
|
-
# General Parameters (mostly defaults for LLM generation)
|
|
66
|
-
service_key: Optional[str] = None, # Shared service key/client_id
|
|
67
|
-
verify_ssl_certificate: bool = True,
|
|
68
|
-
ctx_size: Optional[int|None] = None,
|
|
69
|
-
n_predict: Optional[int|None] = None,
|
|
70
|
-
stream: bool = False,
|
|
71
|
-
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
72
|
-
top_k: int = 40, # Ollama default is 40
|
|
73
|
-
top_p: float = 0.9, # Ollama default is 0.9
|
|
74
|
-
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
75
|
-
repeat_last_n: int = 64, # Ollama default is 64
|
|
76
|
-
|
|
77
|
-
seed: Optional[int] = None,
|
|
78
|
-
n_threads: int = 8,
|
|
79
|
-
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
80
60
|
user_name ="user",
|
|
81
61
|
ai_name = "assistant",
|
|
82
62
|
**kwargs
|
|
@@ -125,27 +105,20 @@ class LollmsClient():
|
|
|
125
105
|
Raises:
|
|
126
106
|
ValueError: If the primary LLM binding cannot be created.
|
|
127
107
|
"""
|
|
128
|
-
self.host_address = host_address # Store initial preference
|
|
129
|
-
self.models_path = models_path
|
|
130
|
-
self.service_key = service_key
|
|
131
|
-
self.verify_ssl_certificate = verify_ssl_certificate
|
|
132
|
-
|
|
133
108
|
# --- LLM Binding Setup ---
|
|
134
|
-
self.
|
|
135
|
-
self.
|
|
136
|
-
binding_name=
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
# Pass LLM specific config if needed
|
|
143
|
-
**(llm_binding_config or {})
|
|
109
|
+
self.llm_binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
|
|
110
|
+
self.llm = self.llm_binding_manager.create_binding(
|
|
111
|
+
binding_name=llm_binding_name,
|
|
112
|
+
**{
|
|
113
|
+
k: v
|
|
114
|
+
for k, v in (llm_binding_config or {}).items()
|
|
115
|
+
if k != "binding_name"
|
|
116
|
+
}
|
|
144
117
|
)
|
|
145
118
|
|
|
146
|
-
if self.
|
|
147
|
-
available = self.
|
|
148
|
-
raise ValueError(f"Failed to create LLM binding: {
|
|
119
|
+
if self.llm is None:
|
|
120
|
+
available = self.llm_binding_manager.get_available_bindings()
|
|
121
|
+
raise ValueError(f"Failed to create LLM binding: {llm_binding_name}. Available: {available}")
|
|
149
122
|
|
|
150
123
|
# --- Modality Binding Setup ---
|
|
151
124
|
self.tts_binding_manager = LollmsTTSBindingManager(tts_bindings_dir)
|
|
@@ -165,7 +138,11 @@ class LollmsClient():
|
|
|
165
138
|
if tts_binding_name:
|
|
166
139
|
self.tts = self.tts_binding_manager.create_binding(
|
|
167
140
|
binding_name=tts_binding_name,
|
|
168
|
-
**
|
|
141
|
+
**{
|
|
142
|
+
k: v
|
|
143
|
+
for k, v in (tts_binding_config or {}).items()
|
|
144
|
+
if k != "binding_name"
|
|
145
|
+
}
|
|
169
146
|
)
|
|
170
147
|
if self.tts is None:
|
|
171
148
|
ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
|
|
@@ -174,7 +151,11 @@ class LollmsClient():
|
|
|
174
151
|
if tti_binding_config:
|
|
175
152
|
self.tti = self.tti_binding_manager.create_binding(
|
|
176
153
|
binding_name=tti_binding_name,
|
|
177
|
-
**
|
|
154
|
+
**{
|
|
155
|
+
k: v
|
|
156
|
+
for k, v in (tti_binding_config or {}).items()
|
|
157
|
+
if k != "binding_name"
|
|
158
|
+
}
|
|
178
159
|
)
|
|
179
160
|
else:
|
|
180
161
|
self.tti = self.tti_binding_manager.create_binding(
|
|
@@ -187,8 +168,13 @@ class LollmsClient():
|
|
|
187
168
|
if stt_binding_config:
|
|
188
169
|
self.stt = self.stt_binding_manager.create_binding(
|
|
189
170
|
binding_name=stt_binding_name,
|
|
190
|
-
**
|
|
171
|
+
**{
|
|
172
|
+
k: v
|
|
173
|
+
for k, v in (stt_binding_config or {}).items()
|
|
174
|
+
if k != "binding_name"
|
|
175
|
+
}
|
|
191
176
|
)
|
|
177
|
+
|
|
192
178
|
else:
|
|
193
179
|
self.stt = self.stt_binding_manager.create_binding(
|
|
194
180
|
binding_name=stt_binding_name,
|
|
@@ -199,8 +185,13 @@ class LollmsClient():
|
|
|
199
185
|
if ttv_binding_config:
|
|
200
186
|
self.ttv = self.ttv_binding_manager.create_binding(
|
|
201
187
|
binding_name=ttv_binding_name,
|
|
202
|
-
**
|
|
188
|
+
**{
|
|
189
|
+
k: v
|
|
190
|
+
for k, v in ttv_binding_config.items()
|
|
191
|
+
if k != "binding_name"
|
|
192
|
+
}
|
|
203
193
|
)
|
|
194
|
+
|
|
204
195
|
else:
|
|
205
196
|
self.ttv = self.ttv_binding_manager.create_binding(
|
|
206
197
|
binding_name=ttv_binding_name
|
|
@@ -212,7 +203,11 @@ class LollmsClient():
|
|
|
212
203
|
if ttm_binding_config:
|
|
213
204
|
self.ttm = self.ttm_binding_manager.create_binding(
|
|
214
205
|
binding_name=ttm_binding_name,
|
|
215
|
-
**
|
|
206
|
+
**{
|
|
207
|
+
k: v
|
|
208
|
+
for k, v in (ttm_binding_config or {}).items()
|
|
209
|
+
if k != "binding_name"
|
|
210
|
+
}
|
|
216
211
|
)
|
|
217
212
|
else:
|
|
218
213
|
self.ttm = self.ttm_binding_manager.create_binding(
|
|
@@ -224,8 +219,12 @@ class LollmsClient():
|
|
|
224
219
|
if mcp_binding_name:
|
|
225
220
|
if mcp_binding_config:
|
|
226
221
|
self.mcp = self.mcp_binding_manager.create_binding(
|
|
227
|
-
mcp_binding_name,
|
|
228
|
-
**
|
|
222
|
+
binding_name=mcp_binding_name,
|
|
223
|
+
**{
|
|
224
|
+
k: v
|
|
225
|
+
for k, v in (mcp_binding_config or {}).items()
|
|
226
|
+
if k != "binding_name"
|
|
227
|
+
}
|
|
229
228
|
)
|
|
230
229
|
else:
|
|
231
230
|
self.mcp = self.mcp_binding_manager.create_binding(
|
|
@@ -235,17 +234,6 @@ class LollmsClient():
|
|
|
235
234
|
ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
|
|
236
235
|
|
|
237
236
|
# --- Store Default Generation Parameters ---
|
|
238
|
-
self.default_ctx_size = ctx_size
|
|
239
|
-
self.default_n_predict = n_predict
|
|
240
|
-
self.default_stream = stream
|
|
241
|
-
self.default_temperature = temperature
|
|
242
|
-
self.default_top_k = top_k
|
|
243
|
-
self.default_top_p = top_p
|
|
244
|
-
self.default_repeat_penalty = repeat_penalty
|
|
245
|
-
self.default_repeat_last_n = repeat_last_n
|
|
246
|
-
self.default_seed = seed
|
|
247
|
-
self.default_n_threads = n_threads
|
|
248
|
-
self.default_streaming_callback = streaming_callback
|
|
249
237
|
|
|
250
238
|
# --- Prompt Formatting Attributes ---
|
|
251
239
|
self.user_name = user_name
|
|
@@ -264,35 +252,30 @@ class LollmsClient():
|
|
|
264
252
|
#
|
|
265
253
|
def update_llm_binding(self, binding_name: str, config: Optional[Dict[str, Any]] = None):
|
|
266
254
|
"""Update the LLM binding with a new configuration."""
|
|
267
|
-
self.
|
|
255
|
+
self.llm = self.llm_binding_manager.create_binding(
|
|
268
256
|
binding_name=binding_name,
|
|
269
|
-
host_address=self.host_address,
|
|
270
|
-
models_path=self.models_path,
|
|
271
|
-
model_name=self.binding.model_name, # Keep the same model name
|
|
272
|
-
service_key=self.service_key,
|
|
273
|
-
verify_ssl_certificate=self.verify_ssl_certificate,
|
|
274
257
|
**(config or {})
|
|
275
258
|
)
|
|
276
|
-
if self.
|
|
277
|
-
available = self.
|
|
259
|
+
if self.llm is None:
|
|
260
|
+
available = self.llm_binding_manager.get_available_bindings()
|
|
278
261
|
raise ValueError(f"Failed to update LLM binding: {binding_name}. Available: {available}")
|
|
279
262
|
|
|
280
263
|
def get_ctx_size(self, model_name:str|None=None):
|
|
281
|
-
if self.
|
|
282
|
-
ctx_size = self.
|
|
283
|
-
return ctx_size if ctx_size else self.default_ctx_size
|
|
264
|
+
if self.llm:
|
|
265
|
+
ctx_size = self.llm.get_ctx_size(model_name)
|
|
266
|
+
return ctx_size if ctx_size else self.llm.default_ctx_size
|
|
284
267
|
else:
|
|
285
268
|
return None
|
|
286
269
|
|
|
287
270
|
def get_model_name(self):
|
|
288
|
-
if self.
|
|
289
|
-
return self.
|
|
271
|
+
if self.llm:
|
|
272
|
+
return self.llm.model_name
|
|
290
273
|
else:
|
|
291
274
|
return None
|
|
292
275
|
|
|
293
276
|
def set_model_name(self, model_name)->bool:
|
|
294
|
-
if self.
|
|
295
|
-
self.
|
|
277
|
+
if self.llm:
|
|
278
|
+
self.llm.model_name = model_name
|
|
296
279
|
return True
|
|
297
280
|
else:
|
|
298
281
|
return False
|
|
@@ -400,8 +383,8 @@ class LollmsClient():
|
|
|
400
383
|
Returns:
|
|
401
384
|
list: List of tokens.
|
|
402
385
|
"""
|
|
403
|
-
if self.
|
|
404
|
-
return self.
|
|
386
|
+
if self.llm:
|
|
387
|
+
return self.llm.tokenize(text)
|
|
405
388
|
raise RuntimeError("LLM binding not initialized.")
|
|
406
389
|
|
|
407
390
|
def detokenize(self, tokens: list) -> str:
|
|
@@ -414,8 +397,8 @@ class LollmsClient():
|
|
|
414
397
|
Returns:
|
|
415
398
|
str: Detokenized text.
|
|
416
399
|
"""
|
|
417
|
-
if self.
|
|
418
|
-
return self.
|
|
400
|
+
if self.llm:
|
|
401
|
+
return self.llm.detokenize(tokens)
|
|
419
402
|
raise RuntimeError("LLM binding not initialized.")
|
|
420
403
|
def count_tokens(self, text: str) -> int:
|
|
421
404
|
"""
|
|
@@ -427,8 +410,8 @@ class LollmsClient():
|
|
|
427
410
|
Returns:
|
|
428
411
|
int: Number of tokens.
|
|
429
412
|
"""
|
|
430
|
-
if self.
|
|
431
|
-
return self.
|
|
413
|
+
if self.llm:
|
|
414
|
+
return self.llm.count_tokens(text)
|
|
432
415
|
raise RuntimeError("LLM binding not initialized.")
|
|
433
416
|
|
|
434
417
|
def count_image_tokens(self, image: str) -> int:
|
|
@@ -441,8 +424,8 @@ class LollmsClient():
|
|
|
441
424
|
Returns:
|
|
442
425
|
int: Estimated number of tokens for the image. Returns -1 on error.
|
|
443
426
|
"""
|
|
444
|
-
if self.
|
|
445
|
-
return self.
|
|
427
|
+
if self.llm:
|
|
428
|
+
return self.llm.count_image_tokens(image)
|
|
446
429
|
raise RuntimeError("LLM binding not initialized.")
|
|
447
430
|
|
|
448
431
|
def get_model_details(self) -> dict:
|
|
@@ -452,8 +435,8 @@ class LollmsClient():
|
|
|
452
435
|
Returns:
|
|
453
436
|
dict: Model information dictionary.
|
|
454
437
|
"""
|
|
455
|
-
if self.
|
|
456
|
-
return self.
|
|
438
|
+
if self.llm:
|
|
439
|
+
return self.llm.get_model_info()
|
|
457
440
|
raise RuntimeError("LLM binding not initialized.")
|
|
458
441
|
|
|
459
442
|
def switch_model(self, model_name: str) -> bool:
|
|
@@ -466,8 +449,8 @@ class LollmsClient():
|
|
|
466
449
|
Returns:
|
|
467
450
|
bool: True if model loaded successfully, False otherwise.
|
|
468
451
|
"""
|
|
469
|
-
if self.
|
|
470
|
-
return self.
|
|
452
|
+
if self.llm:
|
|
453
|
+
return self.llm.load_model(model_name)
|
|
471
454
|
raise RuntimeError("LLM binding not initialized.")
|
|
472
455
|
|
|
473
456
|
def get_available_llm_bindings(self) -> List[str]:
|
|
@@ -477,7 +460,7 @@ class LollmsClient():
|
|
|
477
460
|
Returns:
|
|
478
461
|
List[str]: List of binding names that can be used for LLMs.
|
|
479
462
|
"""
|
|
480
|
-
return self.
|
|
463
|
+
return self.llm_binding_manager.get_available_bindings()
|
|
481
464
|
|
|
482
465
|
def generate_text(self,
|
|
483
466
|
prompt: str,
|
|
@@ -523,11 +506,11 @@ class LollmsClient():
|
|
|
523
506
|
Returns:
|
|
524
507
|
Union[str, dict]: Generated text or error dictionary if failed.
|
|
525
508
|
"""
|
|
526
|
-
if self.
|
|
509
|
+
if self.llm:
|
|
527
510
|
|
|
528
|
-
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size if self.default_ctx_size else None
|
|
511
|
+
ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size if self.llm.default_ctx_size else None
|
|
529
512
|
if ctx_size is None:
|
|
530
|
-
ctx_size = self.
|
|
513
|
+
ctx_size = self.llm.get_ctx_size()
|
|
531
514
|
if ctx_size is None:
|
|
532
515
|
ctx_size = 1024*8 # 1028*8= 8192 tokens, a common default for many models
|
|
533
516
|
nb_input_tokens = self.count_tokens(prompt)+ (sum([self.count_image_tokens(image) for image in images]) if images else 0)
|
|
@@ -536,21 +519,21 @@ class LollmsClient():
|
|
|
536
519
|
ASCIIColors.magenta(f"ctx_size : {ctx_size}")
|
|
537
520
|
ASCIIColors.magenta(f"nb_input_tokens : {nb_input_tokens}")
|
|
538
521
|
|
|
539
|
-
return self.
|
|
522
|
+
return self.llm.generate_text(
|
|
540
523
|
prompt=prompt,
|
|
541
524
|
images=images,
|
|
542
525
|
system_prompt=system_prompt,
|
|
543
|
-
n_predict=n_predict if n_predict else self.default_n_predict if self.default_n_predict else ctx_size - nb_input_tokens,
|
|
544
|
-
stream=stream if stream is not None else self.default_stream,
|
|
545
|
-
temperature=temperature if temperature is not None else self.default_temperature,
|
|
546
|
-
top_k=top_k if top_k is not None else self.default_top_k,
|
|
547
|
-
top_p=top_p if top_p is not None else self.default_top_p,
|
|
548
|
-
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
|
|
549
|
-
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
|
|
550
|
-
seed=seed if seed is not None else self.default_seed,
|
|
551
|
-
n_threads=n_threads if n_threads is not None else self.default_n_threads,
|
|
552
|
-
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
|
|
553
|
-
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
|
|
526
|
+
n_predict=n_predict if n_predict else self.llm.default_n_predict if self.llm.default_n_predict else ctx_size - nb_input_tokens,
|
|
527
|
+
stream=stream if stream is not None else self.llm.default_stream,
|
|
528
|
+
temperature=temperature if temperature is not None else self.llm.default_temperature,
|
|
529
|
+
top_k=top_k if top_k is not None else self.llm.default_top_k,
|
|
530
|
+
top_p=top_p if top_p is not None else self.llm.default_top_p,
|
|
531
|
+
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
|
|
532
|
+
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
|
|
533
|
+
seed=seed if seed is not None else self.llm.default_seed,
|
|
534
|
+
n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
|
|
535
|
+
ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
|
|
536
|
+
streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback,
|
|
554
537
|
split= split,
|
|
555
538
|
user_keyword=user_keyword,
|
|
556
539
|
ai_keyword=ai_keyword
|
|
@@ -592,20 +575,20 @@ class LollmsClient():
|
|
|
592
575
|
Returns:
|
|
593
576
|
Union[str, dict]: Generated text or error dictionary if failed.
|
|
594
577
|
"""
|
|
595
|
-
if self.
|
|
596
|
-
return self.
|
|
578
|
+
if self.llm:
|
|
579
|
+
return self.llm.generate_from_messages(
|
|
597
580
|
messages=messages,
|
|
598
|
-
n_predict=n_predict if n_predict is not None else self.default_n_predict,
|
|
599
|
-
stream=stream if stream is not None else self.default_stream,
|
|
600
|
-
temperature=temperature if temperature is not None else self.default_temperature,
|
|
601
|
-
top_k=top_k if top_k is not None else self.default_top_k,
|
|
602
|
-
top_p=top_p if top_p is not None else self.default_top_p,
|
|
603
|
-
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
|
|
604
|
-
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
|
|
605
|
-
seed=seed if seed is not None else self.default_seed,
|
|
606
|
-
n_threads=n_threads if n_threads is not None else self.default_n_threads,
|
|
607
|
-
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
|
|
608
|
-
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
|
|
581
|
+
n_predict=n_predict if n_predict is not None else self.llm.default_n_predict,
|
|
582
|
+
stream=stream if stream is not None else self.llm.default_stream,
|
|
583
|
+
temperature=temperature if temperature is not None else self.llm.default_temperature,
|
|
584
|
+
top_k=top_k if top_k is not None else self.llm.default_top_k,
|
|
585
|
+
top_p=top_p if top_p is not None else self.llm.default_top_p,
|
|
586
|
+
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
|
|
587
|
+
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
|
|
588
|
+
seed=seed if seed is not None else self.llm.default_seed,
|
|
589
|
+
n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
|
|
590
|
+
ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
|
|
591
|
+
streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback,
|
|
609
592
|
)
|
|
610
593
|
raise RuntimeError("LLM binding not initialized.")
|
|
611
594
|
|
|
@@ -650,21 +633,21 @@ class LollmsClient():
|
|
|
650
633
|
Returns:
|
|
651
634
|
Union[str, dict]: Generated text or an error dictionary if failed.
|
|
652
635
|
"""
|
|
653
|
-
if self.
|
|
654
|
-
return self.
|
|
636
|
+
if self.llm:
|
|
637
|
+
return self.llm.chat(
|
|
655
638
|
discussion=discussion,
|
|
656
639
|
branch_tip_id=branch_tip_id,
|
|
657
|
-
n_predict=n_predict if n_predict is not None else self.default_n_predict,
|
|
658
|
-
stream=stream if stream is not None else True if streaming_callback is not None else self.default_stream,
|
|
659
|
-
temperature=temperature if temperature is not None else self.default_temperature,
|
|
660
|
-
top_k=top_k if top_k is not None else self.default_top_k,
|
|
661
|
-
top_p=top_p if top_p is not None else self.default_top_p,
|
|
662
|
-
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
|
|
663
|
-
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
|
|
664
|
-
seed=seed if seed is not None else self.default_seed,
|
|
665
|
-
n_threads=n_threads if n_threads is not None else self.default_n_threads,
|
|
666
|
-
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
|
|
667
|
-
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
|
|
640
|
+
n_predict=n_predict if n_predict is not None else self.llm.default_n_predict,
|
|
641
|
+
stream=stream if stream is not None else True if streaming_callback is not None else self.llm.default_stream,
|
|
642
|
+
temperature=temperature if temperature is not None else self.llm.default_temperature,
|
|
643
|
+
top_k=top_k if top_k is not None else self.llm.default_top_k,
|
|
644
|
+
top_p=top_p if top_p is not None else self.llm.default_top_p,
|
|
645
|
+
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
|
|
646
|
+
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
|
|
647
|
+
seed=seed if seed is not None else self.llm.default_seed,
|
|
648
|
+
n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
|
|
649
|
+
ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
|
|
650
|
+
streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback
|
|
668
651
|
)
|
|
669
652
|
raise RuntimeError("LLM binding not initialized.")
|
|
670
653
|
|
|
@@ -679,15 +662,15 @@ class LollmsClient():
|
|
|
679
662
|
Returns:
|
|
680
663
|
list: List of embeddings.
|
|
681
664
|
"""
|
|
682
|
-
if self.
|
|
683
|
-
return self.
|
|
665
|
+
if self.llm:
|
|
666
|
+
return self.llm.embed(text, **kwargs)
|
|
684
667
|
raise RuntimeError("LLM binding not initialized.")
|
|
685
668
|
|
|
686
669
|
|
|
687
670
|
def listModels(self):
|
|
688
671
|
"""Lists models available to the current LLM binding."""
|
|
689
|
-
if self.
|
|
690
|
-
return self.
|
|
672
|
+
if self.llm:
|
|
673
|
+
return self.llm.listModels()
|
|
691
674
|
raise RuntimeError("LLM binding not initialized.")
|
|
692
675
|
|
|
693
676
|
# --- Convenience Methods for Lollms LLM Binding Features ---
|
|
@@ -698,8 +681,8 @@ class LollmsClient():
|
|
|
698
681
|
Returns:
|
|
699
682
|
Union[List[Dict], Dict]: List of personality dicts or error dict.
|
|
700
683
|
"""
|
|
701
|
-
if self.
|
|
702
|
-
return self.
|
|
684
|
+
if self.llm and hasattr(self.llm, 'lollms_listMountedPersonalities'):
|
|
685
|
+
return self.llm.lollms_listMountedPersonalities()
|
|
703
686
|
else:
|
|
704
687
|
ASCIIColors.warning("listMountedPersonalities is only available for the 'lollms' LLM binding.")
|
|
705
688
|
return {"status": False, "error": "Functionality not available for the current binding"}
|
|
@@ -910,7 +893,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
910
893
|
streaming_callback: Optional[Callable[[str, int, Optional[Dict], Optional[List]], bool]] = None,
|
|
911
894
|
**llm_generation_kwargs
|
|
912
895
|
) -> Dict[str, Any]:
|
|
913
|
-
if not self.
|
|
896
|
+
if not self.llm or not self.mcp:
|
|
914
897
|
return {"final_answer": "", "tool_calls": [], "error": "LLM or MCP binding not initialized."}
|
|
915
898
|
|
|
916
899
|
turn_history: List[Dict[str, Any]] = []
|
|
@@ -1076,7 +1059,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
1076
1059
|
"- Do not make up information. If the findings are insufficient to fully answer the request, state what you found and what remains unanswered.\n"
|
|
1077
1060
|
"- Format your response clearly using markdown where appropriate.\n"
|
|
1078
1061
|
)
|
|
1079
|
-
final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature, **(llm_generation_kwargs or {}))
|
|
1062
|
+
final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.llm.default_temperature, **(llm_generation_kwargs or {}))
|
|
1080
1063
|
|
|
1081
1064
|
if streaming_callback:
|
|
1082
1065
|
streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history = turn_history)
|
|
@@ -1117,7 +1100,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
1117
1100
|
"""
|
|
1118
1101
|
Enhanced RAG with dynamic objective refinement and a knowledge scratchpad.
|
|
1119
1102
|
"""
|
|
1120
|
-
if not self.
|
|
1103
|
+
if not self.llm:
|
|
1121
1104
|
return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
|
|
1122
1105
|
|
|
1123
1106
|
effective_ctx_size = ctx_size or getattr(self, "default_ctx_size", 20000)
|
|
@@ -1456,394 +1439,220 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
|
|
|
1456
1439
|
new_scratchpad_text = self.generate_text(prompt=synthesis_prompt, n_predict=1024, temperature=0.0)
|
|
1457
1440
|
return self.remove_thinking_blocks(new_scratchpad_text).strip()
|
|
1458
1441
|
|
|
1459
|
-
|
|
1460
1442
|
def generate_with_mcp_rag(
|
|
1461
1443
|
self,
|
|
1462
1444
|
prompt: str,
|
|
1445
|
+
context: Optional[str] = None,
|
|
1463
1446
|
use_mcps: Union[None, bool, List[str]] = None,
|
|
1464
1447
|
use_data_store: Union[None, Dict[str, Callable]] = None,
|
|
1465
1448
|
system_prompt: str = None,
|
|
1466
1449
|
reasoning_system_prompt: str = "You are a logical AI assistant. Your task is to achieve the user's goal by thinking step-by-step and using the available tools.",
|
|
1467
1450
|
images: Optional[List[str]] = None,
|
|
1468
|
-
max_reasoning_steps: int =
|
|
1469
|
-
decision_temperature: float =
|
|
1470
|
-
final_answer_temperature: float =
|
|
1451
|
+
max_reasoning_steps: int = 10,
|
|
1452
|
+
decision_temperature: float = 0.5,
|
|
1453
|
+
final_answer_temperature: float = 0.7,
|
|
1471
1454
|
streaming_callback: Optional[Callable[[str, 'MSG_TYPE', Optional[Dict], Optional[List]], bool]] = None,
|
|
1472
|
-
rag_top_k: int =
|
|
1473
|
-
rag_min_similarity_percent: float =
|
|
1474
|
-
output_summarization_threshold: int =
|
|
1455
|
+
rag_top_k: int = 5,
|
|
1456
|
+
rag_min_similarity_percent: float = 50.0,
|
|
1457
|
+
output_summarization_threshold: int = 500, # In tokens
|
|
1458
|
+
force_mcp_use: bool = False,
|
|
1475
1459
|
debug: bool = False,
|
|
1476
1460
|
**llm_generation_kwargs
|
|
1477
1461
|
) -> Dict[str, Any]:
|
|
1478
|
-
"""
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1462
|
+
"""
|
|
1463
|
+
Orchestrates a sophisticated and robust agentic process to generate a response.
|
|
1464
|
+
|
|
1465
|
+
This method employs a dynamic "observe-think-act" loop with several advanced architectural
|
|
1466
|
+
patterns for improved robustness and efficiency, particularly when handling code.
|
|
1467
|
+
|
|
1468
|
+
Key Features:
|
|
1469
|
+
- **Context-Aware Asset Ingestion**: The agent automatically detects if the `context`
|
|
1470
|
+
parameter (representing the previous turn) contains code. If so, it registers that
|
|
1471
|
+
code as an asset with a UUID, preventing the LLM from trying to paste large code
|
|
1472
|
+
blocks into its prompts and avoiding JSON errors.
|
|
1473
|
+
- **Tool Perception Filtering**: Identifies tools that directly consume code and HIDES
|
|
1474
|
+
them from the LLM's view, forcing it to use the safer `generate_and_call` workflow.
|
|
1475
|
+
- **Forced Safe Workflow**: The `generate_and_call` meta-tool is the ONLY way the agent
|
|
1476
|
+
can execute code, ensuring a robust, error-free, and efficient process.
|
|
1477
|
+
- **Verbose Internal Logging**: The `generate_and_call` tool is now fully instrumented
|
|
1478
|
+
with detailed logging and robust error handling to ensure every failure is visible
|
|
1479
|
+
and diagnosable, preventing silent loops.
|
|
1480
|
+
|
|
1492
1481
|
Args:
|
|
1493
|
-
prompt: The user's initial prompt or question.
|
|
1482
|
+
prompt: The user's initial prompt or question for the current turn.
|
|
1483
|
+
context: An optional string containing the content of the previous turn.
|
|
1494
1484
|
use_mcps: Controls MCP tool usage.
|
|
1495
1485
|
use_data_store: Controls RAG usage.
|
|
1496
|
-
system_prompt:
|
|
1497
|
-
reasoning_system_prompt:
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
rag_top_k: The number of top documents to retrieve during RAG.
|
|
1486
|
+
system_prompt: Main system prompt for the final answer.
|
|
1487
|
+
reasoning_system_prompt: System prompt for the decision-making process.
|
|
1488
|
+
images: A list of base64-encoded images provided by the user for the current turn.
|
|
1489
|
+
max_reasoning_steps: Maximum number of reasoning cycles.
|
|
1490
|
+
decision_temperature: Temperature for LLM's decision-making.
|
|
1491
|
+
final_answer_temperature: Temperature for final answer synthesis.
|
|
1492
|
+
streaming_callback: Function for real-time output of tokens and steps.
|
|
1493
|
+
rag_top_k: Number of top documents to retrieve during RAG.
|
|
1505
1494
|
rag_min_similarity_percent: Minimum similarity for RAG results.
|
|
1506
|
-
output_summarization_threshold:
|
|
1507
|
-
|
|
1508
|
-
debug
|
|
1495
|
+
output_summarization_threshold: Token count that triggers summarization.
|
|
1496
|
+
force_mcp_use: If True, bypasses the "fast answer" check.
|
|
1497
|
+
debug: If True, prints detailed prompting and response information.
|
|
1509
1498
|
**llm_generation_kwargs: Additional keyword arguments for LLM calls.
|
|
1510
1499
|
|
|
1511
1500
|
Returns:
|
|
1512
|
-
A dictionary containing the agent's full run
|
|
1513
|
-
answer, the complete internal scratchpad, a log of tool calls,
|
|
1514
|
-
any retrieved RAG sources, and other metadata.
|
|
1501
|
+
A dictionary containing the agent's full run.
|
|
1515
1502
|
"""
|
|
1516
|
-
|
|
1517
|
-
if not self.binding:
|
|
1503
|
+
if not self.llm:
|
|
1518
1504
|
return {"final_answer": "", "tool_calls": [], "sources": [], "error": "LLM binding not initialized."}
|
|
1505
|
+
if max_reasoning_steps is None:
|
|
1506
|
+
max_reasoning_steps = 10
|
|
1507
|
+
# --- Helper Functions ---
|
|
1508
|
+
def log_event(desc, event_type=MSG_TYPE.MSG_TYPE_CHUNK, meta=None, event_id=None) -> Optional[str]:
|
|
1509
|
+
if not streaming_callback: return None
|
|
1510
|
+
is_start = event_type == MSG_TYPE.MSG_TYPE_STEP_START
|
|
1511
|
+
event_id = str(uuid.uuid4()) if is_start and not event_id else event_id
|
|
1512
|
+
params = {"type": event_type, "description": desc, **(meta or {})}
|
|
1513
|
+
if event_id: params["id"] = event_id
|
|
1514
|
+
streaming_callback(desc, event_type, params)
|
|
1515
|
+
return event_id
|
|
1519
1516
|
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
if not decision_temperature:
|
|
1527
|
-
decision_temperature = 0.7
|
|
1528
|
-
if not output_summarization_threshold:
|
|
1529
|
-
output_summarization_threshold = 500
|
|
1530
|
-
|
|
1531
|
-
events = []
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
# --- Initialize Agent State ---
|
|
1535
|
-
sources_this_turn: List[Dict[str, Any]] = []
|
|
1536
|
-
tool_calls_this_turn: List[Dict[str, Any]] = []
|
|
1537
|
-
generated_code_store: Dict[str, str] = {} # NEW: Store for UUID -> code
|
|
1538
|
-
original_user_prompt = prompt
|
|
1539
|
-
|
|
1540
|
-
initial_state_parts = [
|
|
1541
|
-
"### Initial State",
|
|
1542
|
-
"- My goal is to address the user's request.",
|
|
1543
|
-
"- I have not taken any actions yet."
|
|
1544
|
-
]
|
|
1545
|
-
if images:
|
|
1546
|
-
initial_state_parts.append(f"- The user has provided {len(images)} image(s) for context.")
|
|
1547
|
-
current_scratchpad = "\n".join(initial_state_parts)
|
|
1548
|
-
|
|
1549
|
-
def log_prompt(prompt, type="prompt"):
|
|
1550
|
-
ASCIIColors.cyan(f"** DEBUG: {type} **")
|
|
1551
|
-
ASCIIColors.magenta(prompt[-15000:])
|
|
1552
|
-
prompt_size = self.count_tokens(prompt)
|
|
1553
|
-
ASCIIColors.red(f"Prompt size:{prompt_size}/{self.default_ctx_size}")
|
|
1517
|
+
def log_prompt(title: str, prompt_text: str):
|
|
1518
|
+
if not debug: return
|
|
1519
|
+
ASCIIColors.cyan(f"** DEBUG: {title} **")
|
|
1520
|
+
ASCIIColors.magenta(prompt_text[-15000:])
|
|
1521
|
+
prompt_size = self.count_tokens(prompt_text)
|
|
1522
|
+
ASCIIColors.red(f"Prompt size:{prompt_size}/{self.llm.default_ctx_size}")
|
|
1554
1523
|
ASCIIColors.cyan(f"** DEBUG: DONE **")
|
|
1555
1524
|
|
|
1556
|
-
# ---
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1525
|
+
# --- 1. Initialize State & Context-Aware Asset Ingestion ---
|
|
1526
|
+
original_user_prompt, tool_calls_this_turn, sources_this_turn = prompt, [], []
|
|
1527
|
+
asset_store: Dict[str, Dict] = {}
|
|
1528
|
+
initial_state_parts = ["### Initial State", "- My goal is to address the user's request comprehensively."]
|
|
1529
|
+
if images:
|
|
1530
|
+
for img_b64 in images:
|
|
1531
|
+
img_uuid = str(uuid.uuid4())
|
|
1532
|
+
asset_store[img_uuid] = {"type": "image", "content": img_b64}
|
|
1533
|
+
initial_state_parts.append(f"- User provided image, asset ID: {img_uuid}")
|
|
1534
|
+
if context:
|
|
1535
|
+
code_blocks = re.findall(r"```(?:\w+)?\n([\s\S]+?)\n```", context)
|
|
1536
|
+
if code_blocks:
|
|
1537
|
+
last_code_block = code_blocks[-1]
|
|
1538
|
+
code_uuid = str(uuid.uuid4())
|
|
1539
|
+
asset_store[code_uuid] = {"type": "code", "content": last_code_block}
|
|
1540
|
+
initial_state_parts.append(f"- The user's request likely refers to a code block from the previous turn's context. It has been registered as asset ID: {code_uuid}")
|
|
1541
|
+
current_scratchpad = "\n".join(initial_state_parts)
|
|
1569
1542
|
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
data[i] = code_store[item]
|
|
1582
|
-
else:
|
|
1583
|
-
_substitute_code_uuids_recursive(item, code_store)
|
|
1584
|
-
|
|
1585
|
-
discovery_step_id = log_event("**Discovering tools**",MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1586
|
-
# --- 1. Discover Available Tools ---
|
|
1587
|
-
available_tools = []
|
|
1588
|
-
if use_mcps and self.mcp:
|
|
1589
|
-
discovered_tools = self.mcp.discover_tools(force_refresh=True)
|
|
1590
|
-
if isinstance(use_mcps, list):
|
|
1591
|
-
available_tools.extend([t for t in discovered_tools if t["name"] in use_mcps])
|
|
1592
|
-
|
|
1543
|
+
# --- 2. Tool Discovery and Filtering ---
|
|
1544
|
+
discovery_step_id = log_event("Discovering and filtering tools...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1545
|
+
all_discovered_tools, visible_tools, code_consuming_tools = [], [], set()
|
|
1546
|
+
if use_mcps and hasattr(self, 'mcp'):
|
|
1547
|
+
mcp_tools = self.mcp.discover_tools(force_refresh=True)
|
|
1548
|
+
if isinstance(use_mcps, list): all_discovered_tools.extend([t for t in mcp_tools if t["name"] in use_mcps])
|
|
1549
|
+
elif use_mcps is True: all_discovered_tools.extend(mcp_tools)
|
|
1550
|
+
code_param_keywords = {'code', 'script', 'python_code', 'javascript', 'html', 'css'}
|
|
1551
|
+
for tool in all_discovered_tools:
|
|
1552
|
+
if any(p in code_param_keywords for p in tool.get("input_schema", {}).get("properties", {})): code_consuming_tools.add(tool['name'])
|
|
1553
|
+
else: visible_tools.append(tool)
|
|
1593
1554
|
if use_data_store:
|
|
1594
|
-
for
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
"input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}
|
|
1599
|
-
})
|
|
1555
|
+
for name, info in use_data_store.items(): visible_tools.append({"name": f"research::{name}", "description": info.get("description", f"Queries '{name}'."), "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}})
|
|
1556
|
+
log_event(f"Made {len(visible_tools)} tools visible (hid {len(code_consuming_tools)} code tools).", MSG_TYPE.MSG_TYPE_STEP_END, meta={"visible": len(visible_tools), "hidden": len(code_consuming_tools), "hidden_list": list(code_consuming_tools)}, event_id=discovery_step_id)
|
|
1557
|
+
|
|
1558
|
+
# --- 3. Fast Answer Path (Not shown for brevity, but retained) ---
|
|
1600
1559
|
|
|
1601
|
-
#
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
"name": "local_tools::view_generated_code",
|
|
1609
|
-
"description": """Views the code that was generated and stored to the buffer. You need to have a valid uuid of the generated code.""",
|
|
1610
|
-
"input_schema": {"type": "object", "properties": {"code_id": {"type": "string", "description": "The case sensitive uuid of the generated code."}}, "required": ["uuid"]}
|
|
1611
|
-
})
|
|
1612
|
-
# Add the new refactor_scratchpad tool definition
|
|
1613
|
-
available_tools.append({
|
|
1614
|
-
"name": "local_tools::refactor_scratchpad",
|
|
1615
|
-
"description": "Rewrites the scratchpad content to clean it and reorganize it. Only use if the scratchpad is messy or contains too much information compared to what you need.",
|
|
1616
|
-
"input_schema": {"type": "object", "properties": {}}
|
|
1617
|
-
})
|
|
1618
|
-
|
|
1619
|
-
formatted_tools_list = "\n".join([f"**{t['name']}**:\n{t['description']}\ninput schema:\n{json.dumps(t['input_schema'])}" for t in available_tools])
|
|
1620
|
-
formatted_tools_list += "\n**local_tools::request_clarification**:\nUse if the user's request is ambiguous and you can not infer a clear idea of his intent. this tool has no parameters."
|
|
1621
|
-
formatted_tools_list += "\n**local_tools::final_answer**:\nUse when you are ready to respond to the user. this tool has no parameters."
|
|
1622
|
-
|
|
1623
|
-
if discovery_step_id: log_event(f"**Discovering tools** found {len(available_tools)} tools",MSG_TYPE.MSG_TYPE_STEP_END, event_id=discovery_step_id)
|
|
1624
|
-
|
|
1625
|
-
# --- 2. Dynamic Reasoning Loop ---
|
|
1560
|
+
# --- 4. Format Tools for Main Loop ---
|
|
1561
|
+
CODE_PLACEHOLDER = "{GENERATED_CODE}"
|
|
1562
|
+
built_in_tools = [{"name": "local_tools::generate_and_call", "description": f"CRITICAL: To run or modify code, you MUST use this tool. It generates code (e.g., to fix code from an asset) and then calls a tool with it. Refer to existing code using its asset ID. Use '{CODE_PLACEHOLDER}' in `next_tool_params` for the NEWLY generated code.", "input_schema": { "type": "object", "properties": { "code_generation_prompt": {"type": "string"}, "language": {"type": "string"}, "next_tool_name": {"type": "string"}, "next_tool_params": {"type": "object"}}, "required": ["code_generation_prompt", "next_tool_name", "next_tool_params"]}}, {"name": "local_tools::refactor_scratchpad", "description": "Rewrites the scratchpad.", "input_schema": {}}, {"name": "local_tools::request_clarification", "description": "Asks the user for more information.", "input_schema": {"type": "object", "properties": {"question_to_user": {"type": "string"}}, "required": ["question_to_user"]}}, {"name": "local_tools::final_answer", "description": "Provides the final answer.", "input_schema": {}}]
|
|
1563
|
+
all_visible_tools = visible_tools + built_in_tools
|
|
1564
|
+
formatted_tools_list = "\n".join([f"**{t['name']}**:\n- Description: {t['description']}" for t in all_visible_tools])
|
|
1565
|
+
|
|
1566
|
+
# --- 5. Dynamic Reasoning Loop ---
|
|
1626
1567
|
for i in range(max_reasoning_steps):
|
|
1568
|
+
reasoning_step_id = log_event(f"Reasoning Step {i+1}/{max_reasoning_steps}", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1627
1569
|
try:
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1570
|
+
reasoning_prompt = f"""--- AVAILABLE ACTIONS ---\n{formatted_tools_list}\n\n--- YOUR INTERNAL SCRATCHPAD ---\n{current_scratchpad}\n--- END SCRATCHPAD ---\n\n**INSTRUCTIONS:**\n1. **OBSERVE:** Review your scratchpad, especially available asset IDs.\n2. **THINK:** Based on '{original_user_prompt}', what is the single next logical action using ONLY the available actions?\n3. **ACT:** Formulate your decision as a JSON object. Do NOT paste large code blocks into parameters; use their asset IDs instead."""
|
|
1571
|
+
action_schema = {"thought": "My reasoning.", "action": {"tool_name": "string", "tool_params": "object"}}
|
|
1572
|
+
action_data = self.generate_structured_content(prompt=reasoning_prompt, schema=action_schema, system_prompt=reasoning_system_prompt, temperature=decision_temperature, **llm_generation_kwargs)
|
|
1631
1573
|
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
--- CONTEXT ---
|
|
1636
|
-
{user_context}
|
|
1637
|
-
--- YOUR INTERNAL SCRATCHPAD (Work History & Analysis) ---
|
|
1638
|
-
{current_scratchpad}
|
|
1639
|
-
--- END OF SCRATCHPAD ---
|
|
1640
|
-
|
|
1641
|
-
**INSTRUCTIONS:**
|
|
1642
|
-
1. **OBSERVE:** Review the `Observation` from your most recent step in the scratchpad.
|
|
1643
|
-
2. **THINK:**
|
|
1644
|
-
- Does the latest observation completely fulfill the user's original request?
|
|
1645
|
-
- If YES, your next action MUST be to use the `final_answer` tool.
|
|
1646
|
-
- If NO, what is the single next logical step needed? This may involve writing code first with `prepare_code`, then using another tool.
|
|
1647
|
-
- If you are stuck or the request is ambiguous, use `local_tools::request_clarification`.
|
|
1648
|
-
3. **ACT:** Formulate your decision as a JSON object.
|
|
1649
|
-
** Important ** Always use this format alias::tool_name to call the tool
|
|
1650
|
-
"""
|
|
1651
|
-
action_template = {
|
|
1652
|
-
"thought": "My detailed analysis of the last observation and my reasoning for the next action and how it integrates with my global plan.",
|
|
1653
|
-
"action": {
|
|
1654
|
-
"tool_name": "The single tool to use (e.g., 'local_tools::prepare_code', 'local_tools::final_answer').",
|
|
1655
|
-
"tool_params": {"param1": "value1"},
|
|
1656
|
-
"clarification_question": "(string, ONLY if tool_name is 'local_tools::request_clarification')"
|
|
1657
|
-
}
|
|
1658
|
-
}
|
|
1659
|
-
if debug: log_prompt(reasoning_prompt_template, f"REASONING PROMPT (Step {i+1})")
|
|
1660
|
-
structured_action_response = self.generate_code(
|
|
1661
|
-
prompt=reasoning_prompt_template, template=json.dumps(action_template, indent=2),
|
|
1662
|
-
system_prompt=reasoning_system_prompt, temperature=decision_temperature,
|
|
1663
|
-
images=images if i == 0 else None
|
|
1664
|
-
)
|
|
1665
|
-
if structured_action_response is None:
|
|
1666
|
-
log_event("**Error generating thought.** Retrying..", MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
1574
|
+
if not action_data or not isinstance(action_data.get("action"), dict):
|
|
1575
|
+
log_event("Failed to generate a valid JSON action. Will retry.", MSG_TYPE.MSG_TYPE_WARNING, event_id=reasoning_step_id)
|
|
1576
|
+
current_scratchpad += "\n\n### Step Failure\n- **Error:** Failed to produce a valid JSON action."
|
|
1667
1577
|
continue
|
|
1668
|
-
if debug: log_prompt(structured_action_response, f"RAW REASONING RESPONSE (Step {i+1})")
|
|
1669
|
-
|
|
1670
|
-
try:
|
|
1671
|
-
action_data = robust_json_parser(structured_action_response)
|
|
1672
|
-
thought = action_data.get("thought", "No thought was generated.")
|
|
1673
|
-
action = action_data.get("action", {})
|
|
1674
|
-
if isinstance(action,str):
|
|
1675
|
-
tool_name = action
|
|
1676
|
-
tool_params = {}
|
|
1677
|
-
else:
|
|
1678
|
-
tool_name = action.get("tool_name")
|
|
1679
|
-
tool_params = action.get("tool_params", {})
|
|
1680
|
-
except (json.JSONDecodeError, TypeError) as e:
|
|
1681
|
-
current_scratchpad += f"\n\n### Step {i+1} Failure\n- **Error:** Failed to generate a valid JSON action: {e}"
|
|
1682
|
-
log_event(f"Step Failure: Invalid JSON action.", MSG_TYPE.MSG_TYPE_EXCEPTION, metadata={"details": str(e)})
|
|
1683
|
-
if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, metadata={"error": str(e)}, event_id=reasoning_step_id)
|
|
1684
|
-
|
|
1685
1578
|
|
|
1579
|
+
thought, action = action_data.get("thought", ""), action_data.get("action", {})
|
|
1580
|
+
tool_name, tool_params = action.get("tool_name"), action.get("tool_params", {})
|
|
1686
1581
|
current_scratchpad += f"\n\n### Step {i+1}: Thought\n{thought}"
|
|
1687
|
-
log_event(
|
|
1582
|
+
log_event(thought, MSG_TYPE.MSG_TYPE_THOUGHT_CONTENT)
|
|
1688
1583
|
|
|
1689
|
-
if
|
|
1690
|
-
# Handle error...
|
|
1691
|
-
break
|
|
1692
|
-
|
|
1693
|
-
# --- Handle special, non-executing tools ---
|
|
1584
|
+
if tool_name == "local_tools::final_answer": break
|
|
1694
1585
|
if tool_name == "local_tools::request_clarification":
|
|
1695
|
-
|
|
1696
|
-
if isinstance(action, dict):
|
|
1697
|
-
return {"final_answer": action.get("clarification_question", "Could you please provide more details?"), "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
|
|
1698
|
-
elif isinstance(action, str):
|
|
1699
|
-
return {"final_answer": action, "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
|
|
1700
|
-
else:
|
|
1701
|
-
return {"final_answer": "Could you please provide more details?", "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
|
|
1702
|
-
if tool_name == "local_tools::final_answer":
|
|
1703
|
-
current_scratchpad += f"\n\n### Step {i+1}: Action\n- **Action:** Decided to formulate the final answer."
|
|
1704
|
-
log_event("**Action**: Formulate final answer.", MSG_TYPE.MSG_TYPE_THOUGHT_CHUNK)
|
|
1705
|
-
if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**",MSG_TYPE.MSG_TYPE_STEP_END, event_id=reasoning_step_id)
|
|
1706
|
-
break
|
|
1586
|
+
return {"final_answer": tool_params.get("question_to_user", "?"), "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
|
|
1707
1587
|
|
|
1708
|
-
|
|
1709
|
-
if tool_name ==
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
tool_result = {"status": "success", "code_id": code_uuid, "summary": f"Code generated successfully. Use this ID in the next tool call that requires code."}
|
|
1721
|
-
tool_calls_this_turn.append({"name": "prepare_code", "params": tool_params, "result": tool_result})
|
|
1722
|
-
observation_text = f"```json\n{json.dumps(tool_result, indent=2)}\n```"
|
|
1723
|
-
current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
|
|
1724
|
-
log_event(f"Code generated with ID: {code_uuid}", MSG_TYPE.MSG_TYPE_OBSERVATION)
|
|
1725
|
-
if code_gen_id: log_event(f"Generating code...", MSG_TYPE.MSG_TYPE_TOOL_CALL, metadata={"id": code_gen_id, "result": tool_result})
|
|
1726
|
-
if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id= reasoning_step_id)
|
|
1727
|
-
continue # Go to the next reasoning step immediately
|
|
1728
|
-
if tool_name == 'local_tools::view_generated_code':
|
|
1729
|
-
code_id = tool_params.get("code_id")
|
|
1730
|
-
if code_id:
|
|
1731
|
-
tool_result = {"status": "success", "code_id": code_id, "generated_code":generated_code_store[code_uuid]}
|
|
1732
|
-
else:
|
|
1733
|
-
tool_result = {"status": "error", "code_id": code_id, "error":"Unknown uuid"}
|
|
1734
|
-
observation_text = f"```json\n{json.dumps(tool_result, indent=2)}\n```"
|
|
1735
|
-
current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
|
|
1736
|
-
log_event(f"Result from `{tool_name}`:\n```\n{generated_code_store[code_uuid]}\n```\n", MSG_TYPE.MSG_TYPE_TOOL_CALL, metadata={"id": code_gen_id, "result": tool_result})
|
|
1737
|
-
continue
|
|
1738
|
-
if tool_name == 'local_tools::refactor_scratchpad':
|
|
1739
|
-
scratchpad_cleaning_prompt = f"""Enhance this scratchpad content to be more organized and comprehensive. Keep relevant experience information and remove any useless redundancies. Try to log learned things from the context so that you won't make the same mistakes again. Do not remove the main objective information or any crucial information that may be useful for the next iterations. Answer directly with the new scratchpad content without any comments.
|
|
1740
|
-
--- YOUR INTERNAL SCRATCHPAD (Work History & Analysis) ---
|
|
1741
|
-
{current_scratchpad}
|
|
1742
|
-
--- END OF SCRATCHPAD ---"""
|
|
1743
|
-
current_scratchpad = self.generate_text(scratchpad_cleaning_prompt)
|
|
1744
|
-
log_event(f"**New scratchpad**:\n{current_scratchpad}", MSG_TYPE.MSG_TYPE_SCRATCHPAD)
|
|
1745
|
-
|
|
1746
|
-
# --- Substitute UUIDs and Execute Standard Tools ---
|
|
1747
|
-
log_event(f"**Calling tool**: `{tool_name}` with params:\n{dict_to_markdown(tool_params)}", MSG_TYPE.MSG_TYPE_TOOL_CALL)
|
|
1748
|
-
_substitute_code_uuids_recursive(tool_params, generated_code_store)
|
|
1749
|
-
|
|
1750
|
-
tool_call_id = log_event(f"**Executing tool**: {tool_name}",MSG_TYPE.MSG_TYPE_STEP_START, metadata={"name": tool_name, "parameters": tool_params, "id":"executing tool"})
|
|
1751
|
-
tool_result = None
|
|
1752
|
-
try:
|
|
1753
|
-
if tool_name.startswith("research::") and use_data_store:
|
|
1754
|
-
store_name = tool_name.split("::")[1]
|
|
1755
|
-
rag_callable = use_data_store.get(store_name, {}).get("callable")
|
|
1756
|
-
query = tool_params.get("query", "")
|
|
1757
|
-
retrieved_chunks = rag_callable(query, rag_top_k=rag_top_k, rag_min_similarity_percent=rag_min_similarity_percent)
|
|
1758
|
-
if retrieved_chunks:
|
|
1759
|
-
sources_this_turn.extend(retrieved_chunks)
|
|
1760
|
-
tool_result = {"status": "success", "summary": f"Found {len(retrieved_chunks)} relevant chunks.", "chunks": retrieved_chunks}
|
|
1588
|
+
tool_result = {"status": "failure", "error": f"Tool '{tool_name}' was called but did not execute properly."} # Default error
|
|
1589
|
+
if tool_name == "local_tools::generate_and_call":
|
|
1590
|
+
chain_id = log_event(f"Starting chained tool call...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1591
|
+
try:
|
|
1592
|
+
code_gen_prompt, lang = tool_params.get("code_generation_prompt", ""), tool_params.get("language", "python")
|
|
1593
|
+
next_tool_name, next_tool_params = tool_params.get("next_tool_name"), tool_params.get("next_tool_params", {})
|
|
1594
|
+
log_event("Received parameters for chain", MSG_TYPE.MSG_TYPE_STEP, meta={"parent_id": chain_id, "params": tool_params})
|
|
1595
|
+
|
|
1596
|
+
if not (use_mcps and hasattr(self, 'mcp')):
|
|
1597
|
+
tool_result = {"status": "failure", "error": "MCPs are not enabled, cannot execute tools."}
|
|
1598
|
+
elif next_tool_name not in code_consuming_tools:
|
|
1599
|
+
tool_result = {"status": "failure", "error": f"Tool '{next_tool_name}' is not a valid code-consuming tool. Valid options are: {list(code_consuming_tools)}"}
|
|
1761
1600
|
else:
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1601
|
+
def _hydrate(text: str, store: Dict) -> str:
|
|
1602
|
+
for k, v in store.items(): text = text.replace(k, v.get('content',''))
|
|
1603
|
+
return text
|
|
1604
|
+
hydrated_prompt = _hydrate(code_gen_prompt, asset_store)
|
|
1605
|
+
log_event(f"Generating {lang} code for {next_tool_name}", MSG_TYPE.MSG_TYPE_STEP, meta={"parent_id": chain_id, "hydrated_prompt": hydrated_prompt})
|
|
1606
|
+
generated_code = self.generate_code(prompt=hydrated_prompt, system_prompt=f"Generate ONLY raw {lang} code.", **llm_generation_kwargs)
|
|
1607
|
+
|
|
1608
|
+
def _substitute(data: Any) -> Any:
|
|
1609
|
+
if isinstance(data, dict): return {k: _substitute(v) for k, v in data.items()}
|
|
1610
|
+
if isinstance(data, list): return [_substitute(item) for item in data]
|
|
1611
|
+
if isinstance(data, str) and data == CODE_PLACEHOLDER: return generated_code
|
|
1612
|
+
return data
|
|
1613
|
+
hydrated_params = _substitute(next_tool_params)
|
|
1614
|
+
|
|
1615
|
+
log_event(f"Calling tool: {next_tool_name}", MSG_TYPE.MSG_TYPE_TOOL_CALL, meta={"parent_id": chain_id, "name": next_tool_name, "parameters": hydrated_params})
|
|
1616
|
+
tool_result = self.mcp.execute_tool(next_tool_name, hydrated_params, lollms_client_instance=self)
|
|
1617
|
+
except Exception as e:
|
|
1618
|
+
tool_result = {"status": "failure", "error": f"Exception in chained tool logic: {str(e)}"}
|
|
1619
|
+
log_event(f"Finished chained tool call.", MSG_TYPE.MSG_TYPE_STEP_END, event_id=chain_id)
|
|
1620
|
+
# ... other non-code tool handlers ...
|
|
1621
|
+
|
|
1622
|
+
# --- Process and Sanitize ALL Tool Outputs for the Scratchpad ---
|
|
1775
1623
|
sanitized_result = {}
|
|
1776
1624
|
if isinstance(tool_result, dict):
|
|
1777
1625
|
sanitized_result = tool_result.copy()
|
|
1778
|
-
summarized_fields = {}
|
|
1779
1626
|
for key, value in tool_result.items():
|
|
1780
|
-
if isinstance(value, str) and
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
if streaming_callback: streaming_callback(f"Summarizing long output from field '{key}'...", MSG_TYPE.MSG_TYPE_STEP, {"type": "summarization"})
|
|
1785
|
-
summary = self.sequential_summarize(text=value, chunk_processing_prompt=f"Summarize key info from this chunk of '{key}'.", callback=streaming_callback)
|
|
1786
|
-
summarized_fields[key] = summary
|
|
1787
|
-
sanitized_result[key] = f"[Content summarized, see summary below. Original length: {len(value)} chars]"
|
|
1788
|
-
observation_text = f"```json\n{json.dumps(sanitized_result, indent=2)}\n```"
|
|
1789
|
-
if summarized_fields:
|
|
1790
|
-
observation_text += "\n\n**Summaries of Long Outputs:**"
|
|
1791
|
-
for key, summary in summarized_fields.items():
|
|
1792
|
-
observation_text += f"\n- **Summary of '{key}':**\n{summary}"
|
|
1627
|
+
if isinstance(value, str) and value.startswith("data:image"):
|
|
1628
|
+
img_uuid = str(uuid.uuid4())
|
|
1629
|
+
asset_store[img_uuid] = {"type": "image", "content": value}
|
|
1630
|
+
sanitized_result[key] = f"[Image asset generated: {img_uuid}]"
|
|
1793
1631
|
else:
|
|
1794
|
-
|
|
1632
|
+
sanitized_result = {"raw_output": str(tool_result)}
|
|
1795
1633
|
|
|
1634
|
+
observation_text = f"```json\n{json.dumps(sanitized_result, indent=2)}\n```"
|
|
1635
|
+
log_event(f"Received output from: {tool_name}", MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, meta={"name": tool_name, "result": sanitized_result})
|
|
1796
1636
|
tool_calls_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
|
|
1797
1637
|
current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
|
|
1798
|
-
log_event(f"
|
|
1799
|
-
|
|
1800
|
-
if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
|
|
1638
|
+
log_event(f"Finished reasoning step {i+1}", MSG_TYPE.MSG_TYPE_STEP_END, event_id=reasoning_step_id)
|
|
1639
|
+
|
|
1801
1640
|
except Exception as ex:
|
|
1802
1641
|
trace_exception(ex)
|
|
1803
|
-
|
|
1804
|
-
if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
|
|
1805
|
-
|
|
1806
|
-
# --- Final Answer Synthesis ---
|
|
1807
|
-
synthesis_id = log_event("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1642
|
+
log_event(f"Error in reasoning loop: {str(ex)}", MSG_TYPE.MSG_TYPE_EXCEPTION, event_id=reasoning_step_id)
|
|
1808
1643
|
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
"{original_user_prompt}"
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
- If images were provided by the user, incorporate your analysis of them into the answer.
|
|
1817
|
-
- Do not talk about your internal process unless it's necessary to explain why you couldn't find an answer.
|
|
1818
|
-
"""
|
|
1819
|
-
if debug: log_prompt(final_answer_prompt, "FINAL ANSWER SYNTHESIS PROMPT")
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature, **llm_generation_kwargs)
|
|
1823
|
-
if type(final_answer_text) is dict:
|
|
1824
|
-
if streaming_callback:
|
|
1825
|
-
streaming_callback(final_answer_text["error"], MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
1826
|
-
return {
|
|
1827
|
-
"final_answer": "",
|
|
1828
|
-
"final_scratchpad": current_scratchpad,
|
|
1829
|
-
"tool_calls": tool_calls_this_turn,
|
|
1830
|
-
"sources": sources_this_turn,
|
|
1831
|
-
"clarification_required": False,
|
|
1832
|
-
"error": final_answer_text["error"]
|
|
1833
|
-
}
|
|
1644
|
+
# --- 6. Final Answer Synthesis ---
|
|
1645
|
+
synthesis_id = log_event("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1646
|
+
final_answer_prompt = f"""--- Original User Request ---\n"{original_user_prompt}"\n\n--- Your Internal Scratchpad ---\n{current_scratchpad}\n\n--- INSTRUCTIONS ---\nSynthesize a clear, comprehensive, and friendly answer for the user based ONLY on your scratchpad."""
|
|
1647
|
+
final_synthesis_images = [img for img in (images or [])] + [asset['content'] for asset in asset_store.values() if asset['type'] == 'image']
|
|
1648
|
+
final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=final_synthesis_images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature, **llm_generation_kwargs)
|
|
1649
|
+
if isinstance(final_answer_text, dict) and "error" in final_answer_text:
|
|
1650
|
+
return {"final_answer": "", "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": False, "error": final_answer_text["error"]}
|
|
1834
1651
|
final_answer = self.remove_thinking_blocks(final_answer_text)
|
|
1835
|
-
|
|
1652
|
+
log_event("Finished synthesizing answer.", MSG_TYPE.MSG_TYPE_STEP_END, event_id=synthesis_id)
|
|
1836
1653
|
|
|
1837
|
-
|
|
1654
|
+
return {"final_answer": final_answer, "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": False, "error": None}
|
|
1838
1655
|
|
|
1839
|
-
return {
|
|
1840
|
-
"final_answer": final_answer,
|
|
1841
|
-
"final_scratchpad": current_scratchpad,
|
|
1842
|
-
"tool_calls": tool_calls_this_turn,
|
|
1843
|
-
"sources": sources_this_turn,
|
|
1844
|
-
"clarification_required": False,
|
|
1845
|
-
"error": None
|
|
1846
|
-
}
|
|
1847
1656
|
def generate_code(
|
|
1848
1657
|
self,
|
|
1849
1658
|
prompt:str,
|
|
@@ -2497,7 +2306,7 @@ Do not split the code in multiple tags.
|
|
|
2497
2306
|
callback = self.sink
|
|
2498
2307
|
|
|
2499
2308
|
if ctx_size is None:
|
|
2500
|
-
ctx_size = self.default_ctx_size or 8192 # Provide a fallback default
|
|
2309
|
+
ctx_size = self.llm.default_ctx_size or 8192 # Provide a fallback default
|
|
2501
2310
|
if chunk_size is None:
|
|
2502
2311
|
chunk_size = ctx_size // 4
|
|
2503
2312
|
if overlap is None:
|
|
@@ -2573,7 +2382,7 @@ Current document analysis memory:
|
|
|
2573
2382
|
# Process text in chunks
|
|
2574
2383
|
while start_token_idx < total_tokens:
|
|
2575
2384
|
# Calculate available tokens for chunk + memory
|
|
2576
|
-
available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024) # Reserve space for output
|
|
2385
|
+
available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.llm.default_n_predict or 1024) # Reserve space for output
|
|
2577
2386
|
if available_tokens_for_dynamic_content <= 100: # Need some minimum space
|
|
2578
2387
|
ASCIIColors.error("Context size too small for summarization with current settings.")
|
|
2579
2388
|
return "Error: Context size too small."
|
|
@@ -2610,7 +2419,7 @@ Current document analysis memory:
|
|
|
2610
2419
|
ASCIIColors.magenta(f"--- Chunk {chunk_id} Prompt ---")
|
|
2611
2420
|
ASCIIColors.cyan(prompt)
|
|
2612
2421
|
|
|
2613
|
-
response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
|
|
2422
|
+
response = self.generate_text(prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback)
|
|
2614
2423
|
|
|
2615
2424
|
if isinstance(response, dict): # Handle generation error
|
|
2616
2425
|
ASCIIColors.error(f"Chunk {chunk_id} processing failed: {response.get('error')}")
|
|
@@ -2669,7 +2478,7 @@ The final output must be put inside a {final_output_format} markdown tag.
|
|
|
2669
2478
|
final_example_prompt = final_prompt_template.format(memory="<final_memory>")
|
|
2670
2479
|
try:
|
|
2671
2480
|
final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
|
|
2672
|
-
available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024) # Reserve space for output
|
|
2481
|
+
available_final_tokens = ctx_size - final_static_tokens - (self.llm.default_n_predict or 1024) # Reserve space for output
|
|
2673
2482
|
except RuntimeError as e:
|
|
2674
2483
|
ASCIIColors.error(f"Tokenization failed during final setup: {e}")
|
|
2675
2484
|
return "Error: Could not calculate final prompt size."
|
|
@@ -2686,7 +2495,7 @@ The final output must be put inside a {final_output_format} markdown tag.
|
|
|
2686
2495
|
ASCIIColors.magenta("--- Final Aggregation Prompt ---")
|
|
2687
2496
|
ASCIIColors.cyan(final_prompt)
|
|
2688
2497
|
|
|
2689
|
-
final_summary_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
|
|
2498
|
+
final_summary_raw = self.generate_text(final_prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback)
|
|
2690
2499
|
|
|
2691
2500
|
if isinstance(final_summary_raw, dict):
|
|
2692
2501
|
ASCIIColors.error(f"Final aggregation failed: {final_summary_raw.get('error')}")
|
|
@@ -2742,7 +2551,7 @@ The final output must be put inside a {final_output_format} markdown tag.
|
|
|
2742
2551
|
|
|
2743
2552
|
# Set defaults and validate input
|
|
2744
2553
|
if ctx_size is None:
|
|
2745
|
-
ctx_size = self.default_ctx_size or 8192
|
|
2554
|
+
ctx_size = self.llm.default_ctx_size or 8192
|
|
2746
2555
|
if chunk_size is None:
|
|
2747
2556
|
chunk_size = ctx_size // 4
|
|
2748
2557
|
if overlap is None:
|
|
@@ -2845,7 +2654,7 @@ Task: Update the markdown memory by adding new information from this chunk relev
|
|
|
2845
2654
|
|
|
2846
2655
|
while start_token_idx < len(file_tokens):
|
|
2847
2656
|
# Calculate available space dynamically
|
|
2848
|
-
available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024)
|
|
2657
|
+
available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.llm.default_n_predict or 1024)
|
|
2849
2658
|
if available_tokens_for_dynamic_content <= 100:
|
|
2850
2659
|
ASCIIColors.error(f"Context window too small during analysis of {file_name}.")
|
|
2851
2660
|
# Option: try truncating memory drastically or break
|
|
@@ -2885,7 +2694,7 @@ Task: Update the markdown memory by adding new information from this chunk relev
|
|
|
2885
2694
|
ASCIIColors.magenta(f"--- Deep Analysis Prompt (Global Chunk {global_chunk_id}) ---")
|
|
2886
2695
|
ASCIIColors.cyan(prompt)
|
|
2887
2696
|
|
|
2888
|
-
response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback for streaming output
|
|
2697
|
+
response = self.generate_text(prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback) # Use main callback for streaming output
|
|
2889
2698
|
|
|
2890
2699
|
if isinstance(response, dict): # Handle error
|
|
2891
2700
|
ASCIIColors.error(f"Chunk processing failed (Global {global_chunk_id}): {response.get('error')}")
|
|
@@ -2940,7 +2749,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
2940
2749
|
final_example_prompt = final_prompt.replace("{memory}", "<final_memory>")
|
|
2941
2750
|
try:
|
|
2942
2751
|
final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
|
|
2943
|
-
available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024)
|
|
2752
|
+
available_final_tokens = ctx_size - final_static_tokens - (self.llm.default_n_predict or 1024)
|
|
2944
2753
|
except RuntimeError as e:
|
|
2945
2754
|
ASCIIColors.error(f"Tokenization failed during final setup: {e}")
|
|
2946
2755
|
return "Error: Could not calculate final prompt size."
|
|
@@ -2956,7 +2765,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
2956
2765
|
ASCIIColors.magenta("--- Final Aggregation Prompt ---")
|
|
2957
2766
|
ASCIIColors.cyan(final_prompt)
|
|
2958
2767
|
|
|
2959
|
-
final_output_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback
|
|
2768
|
+
final_output_raw = self.generate_text(final_prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback) # Use main callback
|
|
2960
2769
|
|
|
2961
2770
|
if isinstance(final_output_raw, dict):
|
|
2962
2771
|
ASCIIColors.error(f"Final aggregation failed: {final_output_raw.get('error')}")
|
|
@@ -3031,9 +2840,9 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
3031
2840
|
tokens = []
|
|
3032
2841
|
else:
|
|
3033
2842
|
# Use the binding's tokenizer for accurate chunking
|
|
3034
|
-
tokens = self.
|
|
2843
|
+
tokens = self.llm.tokenize(text_to_process)
|
|
3035
2844
|
if chunk_size_tokens is None:
|
|
3036
|
-
chunk_size_tokens = self.default_ctx_size//2
|
|
2845
|
+
chunk_size_tokens = self.llm.default_ctx_size//2
|
|
3037
2846
|
|
|
3038
2847
|
if len(tokens) <= chunk_size_tokens:
|
|
3039
2848
|
if streaming_callback:
|
|
@@ -3064,7 +2873,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
3064
2873
|
step = chunk_size_tokens - overlap_tokens
|
|
3065
2874
|
for i in range(0, len(tokens), step):
|
|
3066
2875
|
chunk_tokens = tokens[i:i + chunk_size_tokens]
|
|
3067
|
-
chunk_text = self.
|
|
2876
|
+
chunk_text = self.llm.detokenize(chunk_tokens)
|
|
3068
2877
|
chunks.append(chunk_text)
|
|
3069
2878
|
|
|
3070
2879
|
chunk_summaries = []
|