lollms-client 0.33.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

Files changed (74) hide show
  1. lollms_client/__init__.py +1 -1
  2. lollms_client/llm_bindings/azure_openai/__init__.py +6 -10
  3. lollms_client/llm_bindings/claude/__init__.py +4 -7
  4. lollms_client/llm_bindings/gemini/__init__.py +3 -7
  5. lollms_client/llm_bindings/grok/__init__.py +3 -7
  6. lollms_client/llm_bindings/groq/__init__.py +4 -6
  7. lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +4 -6
  8. lollms_client/llm_bindings/litellm/__init__.py +15 -6
  9. lollms_client/llm_bindings/llamacpp/__init__.py +27 -9
  10. lollms_client/llm_bindings/lollms/__init__.py +24 -14
  11. lollms_client/llm_bindings/lollms_webui/__init__.py +6 -12
  12. lollms_client/llm_bindings/mistral/__init__.py +3 -5
  13. lollms_client/llm_bindings/ollama/__init__.py +6 -11
  14. lollms_client/llm_bindings/open_router/__init__.py +4 -6
  15. lollms_client/llm_bindings/openai/__init__.py +7 -14
  16. lollms_client/llm_bindings/openllm/__init__.py +12 -12
  17. lollms_client/llm_bindings/pythonllamacpp/__init__.py +1 -1
  18. lollms_client/llm_bindings/tensor_rt/__init__.py +8 -13
  19. lollms_client/llm_bindings/transformers/__init__.py +14 -6
  20. lollms_client/llm_bindings/vllm/__init__.py +16 -12
  21. lollms_client/lollms_core.py +303 -490
  22. lollms_client/lollms_discussion.py +431 -78
  23. lollms_client/lollms_llm_binding.py +192 -381
  24. lollms_client/lollms_mcp_binding.py +33 -2
  25. lollms_client/lollms_tti_binding.py +107 -2
  26. lollms_client/mcp_bindings/local_mcp/__init__.py +3 -2
  27. lollms_client/mcp_bindings/remote_mcp/__init__.py +6 -5
  28. lollms_client/mcp_bindings/standard_mcp/__init__.py +3 -5
  29. lollms_client/stt_bindings/lollms/__init__.py +6 -8
  30. lollms_client/stt_bindings/whisper/__init__.py +2 -4
  31. lollms_client/stt_bindings/whispercpp/__init__.py +15 -16
  32. lollms_client/tti_bindings/dalle/__init__.py +50 -29
  33. lollms_client/tti_bindings/diffusers/__init__.py +227 -439
  34. lollms_client/tti_bindings/gemini/__init__.py +320 -0
  35. lollms_client/tti_bindings/lollms/__init__.py +8 -9
  36. lollms_client-1.1.0.dist-info/METADATA +1214 -0
  37. lollms_client-1.1.0.dist-info/RECORD +69 -0
  38. {lollms_client-0.33.0.dist-info → lollms_client-1.1.0.dist-info}/top_level.txt +0 -2
  39. examples/article_summary/article_summary.py +0 -58
  40. examples/console_discussion/console_app.py +0 -266
  41. examples/console_discussion.py +0 -448
  42. examples/deep_analyze/deep_analyse.py +0 -30
  43. examples/deep_analyze/deep_analyze_multiple_files.py +0 -32
  44. examples/function_calling_with_local_custom_mcp.py +0 -250
  45. examples/generate_a_benchmark_for_safe_store.py +0 -89
  46. examples/generate_and_speak/generate_and_speak.py +0 -251
  47. examples/generate_game_sfx/generate_game_fx.py +0 -240
  48. examples/generate_text_with_multihop_rag_example.py +0 -210
  49. examples/gradio_chat_app.py +0 -228
  50. examples/gradio_lollms_chat.py +0 -259
  51. examples/internet_search_with_rag.py +0 -226
  52. examples/lollms_chat/calculator.py +0 -59
  53. examples/lollms_chat/derivative.py +0 -48
  54. examples/lollms_chat/test_openai_compatible_with_lollms_chat.py +0 -12
  55. examples/lollms_discussions_test.py +0 -155
  56. examples/mcp_examples/external_mcp.py +0 -267
  57. examples/mcp_examples/local_mcp.py +0 -171
  58. examples/mcp_examples/openai_mcp.py +0 -203
  59. examples/mcp_examples/run_remote_mcp_example_v2.py +0 -290
  60. examples/mcp_examples/run_standard_mcp_example.py +0 -204
  61. examples/simple_text_gen_test.py +0 -173
  62. examples/simple_text_gen_with_image_test.py +0 -178
  63. examples/test_local_models/local_chat.py +0 -9
  64. examples/text_2_audio.py +0 -77
  65. examples/text_2_image.py +0 -144
  66. examples/text_2_image_diffusers.py +0 -274
  67. examples/text_and_image_2_audio.py +0 -59
  68. examples/text_gen.py +0 -30
  69. examples/text_gen_system_prompt.py +0 -29
  70. lollms_client-0.33.0.dist-info/METADATA +0 -854
  71. lollms_client-0.33.0.dist-info/RECORD +0 -101
  72. test/test_lollms_discussion.py +0 -368
  73. {lollms_client-0.33.0.dist-info → lollms_client-1.1.0.dist-info}/WHEEL +0 -0
  74. {lollms_client-0.33.0.dist-info → lollms_client-1.1.0.dist-info}/licenses/LICENSE +0 -0
@@ -30,15 +30,9 @@ class LollmsClient():
30
30
  Provides a unified interface to manage and use different bindings for various modalities.
31
31
  """
32
32
  def __init__(self,
33
- # LLM Binding Parameters
34
- binding_name: str = "lollms",
35
- host_address: Optional[str] = None, # Shared host address (for service based bindings) default for all bindings if not specified
36
- models_path: Optional[str] = None, # Shared models folder path (for local file based bindings) default for all bindings if not specified
37
- model_name: str = "",
38
- llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
39
- llm_binding_config: Optional[Dict[str, any]] = None,
40
33
 
41
34
  # Optional Modality Binding Names
35
+ llm_binding_name: Optional[str] = None,
42
36
  tts_binding_name: Optional[str] = None,
43
37
  tti_binding_name: Optional[str] = None,
44
38
  stt_binding_name: Optional[str] = None,
@@ -47,6 +41,7 @@ class LollmsClient():
47
41
  mcp_binding_name: Optional[str] = None,
48
42
 
49
43
  # Modality Binding Directories
44
+ llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
50
45
  tts_bindings_dir: Path = Path(__file__).parent / "tts_bindings",
51
46
  tti_bindings_dir: Path = Path(__file__).parent / "tti_bindings",
52
47
  stt_bindings_dir: Path = Path(__file__).parent / "stt_bindings",
@@ -55,28 +50,13 @@ class LollmsClient():
55
50
  mcp_bindings_dir: Path = Path(__file__).parent / "mcp_bindings",
56
51
 
57
52
  # Configurations
53
+ llm_binding_config: Optional[Dict[str, any]] = None,
58
54
  tts_binding_config: Optional[Dict[str, any]] = None,
59
55
  tti_binding_config: Optional[Dict[str, any]] = None,
60
56
  stt_binding_config: Optional[Dict[str, any]] = None,
61
57
  ttv_binding_config: Optional[Dict[str, any]] = None,
62
58
  ttm_binding_config: Optional[Dict[str, any]] = None,
63
59
  mcp_binding_config: Optional[Dict[str, any]] = None,
64
-
65
- # General Parameters (mostly defaults for LLM generation)
66
- service_key: Optional[str] = None, # Shared service key/client_id
67
- verify_ssl_certificate: bool = True,
68
- ctx_size: Optional[int|None] = None,
69
- n_predict: Optional[int|None] = None,
70
- stream: bool = False,
71
- temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
72
- top_k: int = 40, # Ollama default is 40
73
- top_p: float = 0.9, # Ollama default is 0.9
74
- repeat_penalty: float = 1.1, # Ollama default is 1.1
75
- repeat_last_n: int = 64, # Ollama default is 64
76
-
77
- seed: Optional[int] = None,
78
- n_threads: int = 8,
79
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
80
60
  user_name ="user",
81
61
  ai_name = "assistant",
82
62
  **kwargs
@@ -125,29 +105,9 @@ class LollmsClient():
125
105
  Raises:
126
106
  ValueError: If the primary LLM binding cannot be created.
127
107
  """
128
- self.host_address = host_address # Store initial preference
129
- self.models_path = models_path
130
- self.service_key = service_key
131
- self.verify_ssl_certificate = verify_ssl_certificate
132
-
133
108
  # --- LLM Binding Setup ---
134
- self.binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
135
- self.binding = self.binding_manager.create_binding(
136
- binding_name=binding_name,
137
- host_address=host_address, # Pass initial host preference
138
- models_path=models_path,
139
- model_name=model_name,
140
- service_key=service_key,
141
- verify_ssl_certificate=verify_ssl_certificate,
142
- # Pass LLM specific config if needed
143
- **(llm_binding_config or {})
144
- )
145
-
146
- if self.binding is None:
147
- available = self.binding_manager.get_available_bindings()
148
- raise ValueError(f"Failed to create LLM binding: {binding_name}. Available: {available}")
149
-
150
109
  # --- Modality Binding Setup ---
110
+ self.llm_binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
151
111
  self.tts_binding_manager = LollmsTTSBindingManager(tts_bindings_dir)
152
112
  self.tti_binding_manager = LollmsTTIBindingManager(tti_bindings_dir)
153
113
  self.stt_binding_manager = LollmsSTTBindingManager(stt_bindings_dir)
@@ -155,6 +115,8 @@ class LollmsClient():
155
115
  self.ttm_binding_manager = LollmsTTMBindingManager(ttm_bindings_dir)
156
116
  self.mcp_binding_manager = LollmsMCPBindingManager(mcp_bindings_dir)
157
117
 
118
+
119
+ self.llm: Optional[LollmsLLMBinding] = None
158
120
  self.tts: Optional[LollmsTTSBinding] = None
159
121
  self.tti: Optional[LollmsTTIBinding] = None
160
122
  self.stt: Optional[LollmsSTTBinding] = None
@@ -162,10 +124,29 @@ class LollmsClient():
162
124
  self.ttm: Optional[LollmsTTMBinding] = None
163
125
  self.mcp: Optional[LollmsMCPBinding] = None
164
126
 
127
+
128
+ if llm_binding_name:
129
+ self.llm = self.llm_binding_manager.create_binding(
130
+ binding_name=llm_binding_name,
131
+ **{
132
+ k: v
133
+ for k, v in (llm_binding_config or {}).items()
134
+ if k != "binding_name"
135
+ }
136
+ )
137
+
138
+ if self.llm is None:
139
+ available = self.llm_binding_manager.get_available_bindings()
140
+ ASCIIColors.warning(f"Failed to create LLM binding: {llm_binding_name}. Available: {available}")
141
+
165
142
  if tts_binding_name:
166
143
  self.tts = self.tts_binding_manager.create_binding(
167
144
  binding_name=tts_binding_name,
168
- **tts_binding_config
145
+ **{
146
+ k: v
147
+ for k, v in (tts_binding_config or {}).items()
148
+ if k != "binding_name"
149
+ }
169
150
  )
170
151
  if self.tts is None:
171
152
  ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
@@ -174,7 +155,11 @@ class LollmsClient():
174
155
  if tti_binding_config:
175
156
  self.tti = self.tti_binding_manager.create_binding(
176
157
  binding_name=tti_binding_name,
177
- **tti_binding_config
158
+ **{
159
+ k: v
160
+ for k, v in (tti_binding_config or {}).items()
161
+ if k != "binding_name"
162
+ }
178
163
  )
179
164
  else:
180
165
  self.tti = self.tti_binding_manager.create_binding(
@@ -187,8 +172,13 @@ class LollmsClient():
187
172
  if stt_binding_config:
188
173
  self.stt = self.stt_binding_manager.create_binding(
189
174
  binding_name=stt_binding_name,
190
- **stt_binding_config
175
+ **{
176
+ k: v
177
+ for k, v in (stt_binding_config or {}).items()
178
+ if k != "binding_name"
179
+ }
191
180
  )
181
+
192
182
  else:
193
183
  self.stt = self.stt_binding_manager.create_binding(
194
184
  binding_name=stt_binding_name,
@@ -199,8 +189,13 @@ class LollmsClient():
199
189
  if ttv_binding_config:
200
190
  self.ttv = self.ttv_binding_manager.create_binding(
201
191
  binding_name=ttv_binding_name,
202
- **ttv_binding_config
192
+ **{
193
+ k: v
194
+ for k, v in ttv_binding_config.items()
195
+ if k != "binding_name"
196
+ }
203
197
  )
198
+
204
199
  else:
205
200
  self.ttv = self.ttv_binding_manager.create_binding(
206
201
  binding_name=ttv_binding_name
@@ -212,7 +207,11 @@ class LollmsClient():
212
207
  if ttm_binding_config:
213
208
  self.ttm = self.ttm_binding_manager.create_binding(
214
209
  binding_name=ttm_binding_name,
215
- **ttm_binding_config
210
+ **{
211
+ k: v
212
+ for k, v in (ttm_binding_config or {}).items()
213
+ if k != "binding_name"
214
+ }
216
215
  )
217
216
  else:
218
217
  self.ttm = self.ttm_binding_manager.create_binding(
@@ -224,8 +223,12 @@ class LollmsClient():
224
223
  if mcp_binding_name:
225
224
  if mcp_binding_config:
226
225
  self.mcp = self.mcp_binding_manager.create_binding(
227
- mcp_binding_name,
228
- **mcp_binding_config
226
+ binding_name=mcp_binding_name,
227
+ **{
228
+ k: v
229
+ for k, v in (mcp_binding_config or {}).items()
230
+ if k != "binding_name"
231
+ }
229
232
  )
230
233
  else:
231
234
  self.mcp = self.mcp_binding_manager.create_binding(
@@ -235,17 +238,6 @@ class LollmsClient():
235
238
  ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
236
239
 
237
240
  # --- Store Default Generation Parameters ---
238
- self.default_ctx_size = ctx_size
239
- self.default_n_predict = n_predict
240
- self.default_stream = stream
241
- self.default_temperature = temperature
242
- self.default_top_k = top_k
243
- self.default_top_p = top_p
244
- self.default_repeat_penalty = repeat_penalty
245
- self.default_repeat_last_n = repeat_last_n
246
- self.default_seed = seed
247
- self.default_n_threads = n_threads
248
- self.default_streaming_callback = streaming_callback
249
241
 
250
242
  # --- Prompt Formatting Attributes ---
251
243
  self.user_name = user_name
@@ -264,35 +256,30 @@ class LollmsClient():
264
256
  #
265
257
  def update_llm_binding(self, binding_name: str, config: Optional[Dict[str, Any]] = None):
266
258
  """Update the LLM binding with a new configuration."""
267
- self.binding = self.binding_manager.create_binding(
259
+ self.llm = self.llm_binding_manager.create_binding(
268
260
  binding_name=binding_name,
269
- host_address=self.host_address,
270
- models_path=self.models_path,
271
- model_name=self.binding.model_name, # Keep the same model name
272
- service_key=self.service_key,
273
- verify_ssl_certificate=self.verify_ssl_certificate,
274
261
  **(config or {})
275
262
  )
276
- if self.binding is None:
277
- available = self.binding_manager.get_available_bindings()
263
+ if self.llm is None:
264
+ available = self.llm_binding_manager.get_available_bindings()
278
265
  raise ValueError(f"Failed to update LLM binding: {binding_name}. Available: {available}")
279
266
 
280
267
  def get_ctx_size(self, model_name:str|None=None):
281
- if self.binding:
282
- ctx_size = self.binding.get_ctx_size(model_name)
283
- return ctx_size if ctx_size else self.default_ctx_size
268
+ if self.llm:
269
+ ctx_size = self.llm.get_ctx_size(model_name)
270
+ return ctx_size if ctx_size else self.llm.default_ctx_size
284
271
  else:
285
272
  return None
286
273
 
287
274
  def get_model_name(self):
288
- if self.binding:
289
- return self.binding.model_name
275
+ if self.llm:
276
+ return self.llm.model_name
290
277
  else:
291
278
  return None
292
279
 
293
280
  def set_model_name(self, model_name)->bool:
294
- if self.binding:
295
- self.binding.model_name = model_name
281
+ if self.llm:
282
+ self.llm.model_name = model_name
296
283
  return True
297
284
  else:
298
285
  return False
@@ -400,8 +387,8 @@ class LollmsClient():
400
387
  Returns:
401
388
  list: List of tokens.
402
389
  """
403
- if self.binding:
404
- return self.binding.tokenize(text)
390
+ if self.llm:
391
+ return self.llm.tokenize(text)
405
392
  raise RuntimeError("LLM binding not initialized.")
406
393
 
407
394
  def detokenize(self, tokens: list) -> str:
@@ -414,8 +401,8 @@ class LollmsClient():
414
401
  Returns:
415
402
  str: Detokenized text.
416
403
  """
417
- if self.binding:
418
- return self.binding.detokenize(tokens)
404
+ if self.llm:
405
+ return self.llm.detokenize(tokens)
419
406
  raise RuntimeError("LLM binding not initialized.")
420
407
  def count_tokens(self, text: str) -> int:
421
408
  """
@@ -427,8 +414,8 @@ class LollmsClient():
427
414
  Returns:
428
415
  int: Number of tokens.
429
416
  """
430
- if self.binding:
431
- return self.binding.count_tokens(text)
417
+ if self.llm:
418
+ return self.llm.count_tokens(text)
432
419
  raise RuntimeError("LLM binding not initialized.")
433
420
 
434
421
  def count_image_tokens(self, image: str) -> int:
@@ -441,8 +428,8 @@ class LollmsClient():
441
428
  Returns:
442
429
  int: Estimated number of tokens for the image. Returns -1 on error.
443
430
  """
444
- if self.binding:
445
- return self.binding.count_image_tokens(image)
431
+ if self.llm:
432
+ return self.llm.count_image_tokens(image)
446
433
  raise RuntimeError("LLM binding not initialized.")
447
434
 
448
435
  def get_model_details(self) -> dict:
@@ -452,8 +439,8 @@ class LollmsClient():
452
439
  Returns:
453
440
  dict: Model information dictionary.
454
441
  """
455
- if self.binding:
456
- return self.binding.get_model_info()
442
+ if self.llm:
443
+ return self.llm.get_model_info()
457
444
  raise RuntimeError("LLM binding not initialized.")
458
445
 
459
446
  def switch_model(self, model_name: str) -> bool:
@@ -466,8 +453,8 @@ class LollmsClient():
466
453
  Returns:
467
454
  bool: True if model loaded successfully, False otherwise.
468
455
  """
469
- if self.binding:
470
- return self.binding.load_model(model_name)
456
+ if self.llm:
457
+ return self.llm.load_model(model_name)
471
458
  raise RuntimeError("LLM binding not initialized.")
472
459
 
473
460
  def get_available_llm_bindings(self) -> List[str]:
@@ -477,7 +464,7 @@ class LollmsClient():
477
464
  Returns:
478
465
  List[str]: List of binding names that can be used for LLMs.
479
466
  """
480
- return self.binding_manager.get_available_bindings()
467
+ return self.llm_binding_manager.get_available_bindings()
481
468
 
482
469
  def generate_text(self,
483
470
  prompt: str,
@@ -523,11 +510,11 @@ class LollmsClient():
523
510
  Returns:
524
511
  Union[str, dict]: Generated text or error dictionary if failed.
525
512
  """
526
- if self.binding:
513
+ if self.llm:
527
514
 
528
- ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size if self.default_ctx_size else None
515
+ ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size if self.llm.default_ctx_size else None
529
516
  if ctx_size is None:
530
- ctx_size = self.binding.get_ctx_size()
517
+ ctx_size = self.llm.get_ctx_size()
531
518
  if ctx_size is None:
532
519
  ctx_size = 1024*8 # 1028*8= 8192 tokens, a common default for many models
533
520
  nb_input_tokens = self.count_tokens(prompt)+ (sum([self.count_image_tokens(image) for image in images]) if images else 0)
@@ -536,21 +523,21 @@ class LollmsClient():
536
523
  ASCIIColors.magenta(f"ctx_size : {ctx_size}")
537
524
  ASCIIColors.magenta(f"nb_input_tokens : {nb_input_tokens}")
538
525
 
539
- return self.binding.generate_text(
526
+ return self.llm.generate_text(
540
527
  prompt=prompt,
541
528
  images=images,
542
529
  system_prompt=system_prompt,
543
- n_predict=n_predict if n_predict else self.default_n_predict if self.default_n_predict else ctx_size - nb_input_tokens,
544
- stream=stream if stream is not None else self.default_stream,
545
- temperature=temperature if temperature is not None else self.default_temperature,
546
- top_k=top_k if top_k is not None else self.default_top_k,
547
- top_p=top_p if top_p is not None else self.default_top_p,
548
- repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
549
- repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
550
- seed=seed if seed is not None else self.default_seed,
551
- n_threads=n_threads if n_threads is not None else self.default_n_threads,
552
- ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
553
- streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
530
+ n_predict=n_predict if n_predict else self.llm.default_n_predict if self.llm.default_n_predict else ctx_size - nb_input_tokens,
531
+ stream=stream if stream is not None else self.llm.default_stream,
532
+ temperature=temperature if temperature is not None else self.llm.default_temperature,
533
+ top_k=top_k if top_k is not None else self.llm.default_top_k,
534
+ top_p=top_p if top_p is not None else self.llm.default_top_p,
535
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
536
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
537
+ seed=seed if seed is not None else self.llm.default_seed,
538
+ n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
539
+ ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
540
+ streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback,
554
541
  split= split,
555
542
  user_keyword=user_keyword,
556
543
  ai_keyword=ai_keyword
@@ -592,20 +579,20 @@ class LollmsClient():
592
579
  Returns:
593
580
  Union[str, dict]: Generated text or error dictionary if failed.
594
581
  """
595
- if self.binding:
596
- return self.binding.generate_from_messages(
582
+ if self.llm:
583
+ return self.llm.generate_from_messages(
597
584
  messages=messages,
598
- n_predict=n_predict if n_predict is not None else self.default_n_predict,
599
- stream=stream if stream is not None else self.default_stream,
600
- temperature=temperature if temperature is not None else self.default_temperature,
601
- top_k=top_k if top_k is not None else self.default_top_k,
602
- top_p=top_p if top_p is not None else self.default_top_p,
603
- repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
604
- repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
605
- seed=seed if seed is not None else self.default_seed,
606
- n_threads=n_threads if n_threads is not None else self.default_n_threads,
607
- ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
608
- streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
585
+ n_predict=n_predict if n_predict is not None else self.llm.default_n_predict,
586
+ stream=stream if stream is not None else self.llm.default_stream,
587
+ temperature=temperature if temperature is not None else self.llm.default_temperature,
588
+ top_k=top_k if top_k is not None else self.llm.default_top_k,
589
+ top_p=top_p if top_p is not None else self.llm.default_top_p,
590
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
591
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
592
+ seed=seed if seed is not None else self.llm.default_seed,
593
+ n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
594
+ ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
595
+ streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback,
609
596
  )
610
597
  raise RuntimeError("LLM binding not initialized.")
611
598
 
@@ -650,21 +637,21 @@ class LollmsClient():
650
637
  Returns:
651
638
  Union[str, dict]: Generated text or an error dictionary if failed.
652
639
  """
653
- if self.binding:
654
- return self.binding.chat(
640
+ if self.llm:
641
+ return self.llm.chat(
655
642
  discussion=discussion,
656
643
  branch_tip_id=branch_tip_id,
657
- n_predict=n_predict if n_predict is not None else self.default_n_predict,
658
- stream=stream if stream is not None else True if streaming_callback is not None else self.default_stream,
659
- temperature=temperature if temperature is not None else self.default_temperature,
660
- top_k=top_k if top_k is not None else self.default_top_k,
661
- top_p=top_p if top_p is not None else self.default_top_p,
662
- repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
663
- repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
664
- seed=seed if seed is not None else self.default_seed,
665
- n_threads=n_threads if n_threads is not None else self.default_n_threads,
666
- ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
667
- streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
644
+ n_predict=n_predict if n_predict is not None else self.llm.default_n_predict,
645
+ stream=stream if stream is not None else True if streaming_callback is not None else self.llm.default_stream,
646
+ temperature=temperature if temperature is not None else self.llm.default_temperature,
647
+ top_k=top_k if top_k is not None else self.llm.default_top_k,
648
+ top_p=top_p if top_p is not None else self.llm.default_top_p,
649
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
650
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
651
+ seed=seed if seed is not None else self.llm.default_seed,
652
+ n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
653
+ ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
654
+ streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback
668
655
  )
669
656
  raise RuntimeError("LLM binding not initialized.")
670
657
 
@@ -679,15 +666,15 @@ class LollmsClient():
679
666
  Returns:
680
667
  list: List of embeddings.
681
668
  """
682
- if self.binding:
683
- return self.binding.embed(text, **kwargs)
669
+ if self.llm:
670
+ return self.llm.embed(text, **kwargs)
684
671
  raise RuntimeError("LLM binding not initialized.")
685
672
 
686
673
 
687
674
  def listModels(self):
688
675
  """Lists models available to the current LLM binding."""
689
- if self.binding:
690
- return self.binding.listModels()
676
+ if self.llm:
677
+ return self.llm.listModels()
691
678
  raise RuntimeError("LLM binding not initialized.")
692
679
 
693
680
  # --- Convenience Methods for Lollms LLM Binding Features ---
@@ -698,8 +685,8 @@ class LollmsClient():
698
685
  Returns:
699
686
  Union[List[Dict], Dict]: List of personality dicts or error dict.
700
687
  """
701
- if self.binding and hasattr(self.binding, 'lollms_listMountedPersonalities'):
702
- return self.binding.lollms_listMountedPersonalities()
688
+ if self.llm and hasattr(self.llm, 'lollms_listMountedPersonalities'):
689
+ return self.llm.lollms_listMountedPersonalities()
703
690
  else:
704
691
  ASCIIColors.warning("listMountedPersonalities is only available for the 'lollms' LLM binding.")
705
692
  return {"status": False, "error": "Functionality not available for the current binding"}
@@ -910,7 +897,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
910
897
  streaming_callback: Optional[Callable[[str, int, Optional[Dict], Optional[List]], bool]] = None,
911
898
  **llm_generation_kwargs
912
899
  ) -> Dict[str, Any]:
913
- if not self.binding or not self.mcp:
900
+ if not self.llm or not self.mcp:
914
901
  return {"final_answer": "", "tool_calls": [], "error": "LLM or MCP binding not initialized."}
915
902
 
916
903
  turn_history: List[Dict[str, Any]] = []
@@ -1076,7 +1063,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
1076
1063
  "- Do not make up information. If the findings are insufficient to fully answer the request, state what you found and what remains unanswered.\n"
1077
1064
  "- Format your response clearly using markdown where appropriate.\n"
1078
1065
  )
1079
- final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature, **(llm_generation_kwargs or {}))
1066
+ final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.llm.default_temperature, **(llm_generation_kwargs or {}))
1080
1067
 
1081
1068
  if streaming_callback:
1082
1069
  streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history = turn_history)
@@ -1117,7 +1104,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
1117
1104
  """
1118
1105
  Enhanced RAG with dynamic objective refinement and a knowledge scratchpad.
1119
1106
  """
1120
- if not self.binding:
1107
+ if not self.llm:
1121
1108
  return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
1122
1109
 
1123
1110
  effective_ctx_size = ctx_size or getattr(self, "default_ctx_size", 20000)
@@ -1456,394 +1443,220 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1456
1443
  new_scratchpad_text = self.generate_text(prompt=synthesis_prompt, n_predict=1024, temperature=0.0)
1457
1444
  return self.remove_thinking_blocks(new_scratchpad_text).strip()
1458
1445
 
1459
-
1460
1446
  def generate_with_mcp_rag(
1461
1447
  self,
1462
1448
  prompt: str,
1449
+ context: Optional[str] = None,
1463
1450
  use_mcps: Union[None, bool, List[str]] = None,
1464
1451
  use_data_store: Union[None, Dict[str, Callable]] = None,
1465
1452
  system_prompt: str = None,
1466
1453
  reasoning_system_prompt: str = "You are a logical AI assistant. Your task is to achieve the user's goal by thinking step-by-step and using the available tools.",
1467
1454
  images: Optional[List[str]] = None,
1468
- max_reasoning_steps: int = None,
1469
- decision_temperature: float = None,
1470
- final_answer_temperature: float = None,
1455
+ max_reasoning_steps: int = 10,
1456
+ decision_temperature: float = 0.5,
1457
+ final_answer_temperature: float = 0.7,
1471
1458
  streaming_callback: Optional[Callable[[str, 'MSG_TYPE', Optional[Dict], Optional[List]], bool]] = None,
1472
- rag_top_k: int = None,
1473
- rag_min_similarity_percent: float = None,
1474
- output_summarization_threshold: int = None, # In tokens
1459
+ rag_top_k: int = 5,
1460
+ rag_min_similarity_percent: float = 50.0,
1461
+ output_summarization_threshold: int = 500, # In tokens
1462
+ force_mcp_use: bool = False,
1475
1463
  debug: bool = False,
1476
1464
  **llm_generation_kwargs
1477
1465
  ) -> Dict[str, Any]:
1478
- """Generates a response using a dynamic agent with stateful, ID-based step tracking.
1479
-
1480
- This method orchestrates a sophisticated agentic process where an AI
1481
- repeatedly observes its state, thinks about the next best action, and
1482
- acts. This "observe-think-act" loop allows the agent to adapt to new
1483
- information, recover from failures, and build a comprehensive
1484
- understanding of the problem before responding.
1485
-
1486
- A key feature is its stateful step notification system, designed for rich
1487
- UI integration. When a step starts, it sends a `step_start` message with
1488
- a unique ID and description. When it finishes, it sends a `step_end`
1489
- message with the same ID, allowing a user interface to track the
1490
- progress of specific, long-running tasks like tool calls.
1491
-
1466
+ """
1467
+ Orchestrates a sophisticated and robust agentic process to generate a response.
1468
+
1469
+ This method employs a dynamic "observe-think-act" loop with several advanced architectural
1470
+ patterns for improved robustness and efficiency, particularly when handling code.
1471
+
1472
+ Key Features:
1473
+ - **Context-Aware Asset Ingestion**: The agent automatically detects if the `context`
1474
+ parameter (representing the previous turn) contains code. If so, it registers that
1475
+ code as an asset with a UUID, preventing the LLM from trying to paste large code
1476
+ blocks into its prompts and avoiding JSON errors.
1477
+ - **Tool Perception Filtering**: Identifies tools that directly consume code and HIDES
1478
+ them from the LLM's view, forcing it to use the safer `generate_and_call` workflow.
1479
+ - **Forced Safe Workflow**: The `generate_and_call` meta-tool is the ONLY way the agent
1480
+ can execute code, ensuring a robust, error-free, and efficient process.
1481
+ - **Verbose Internal Logging**: The `generate_and_call` tool is now fully instrumented
1482
+ with detailed logging and robust error handling to ensure every failure is visible
1483
+ and diagnosable, preventing silent loops.
1484
+
1492
1485
  Args:
1493
- prompt: The user's initial prompt or question.
1486
+ prompt: The user's initial prompt or question for the current turn.
1487
+ context: An optional string containing the content of the previous turn.
1494
1488
  use_mcps: Controls MCP tool usage.
1495
1489
  use_data_store: Controls RAG usage.
1496
- system_prompt: The main system prompt for the final answer generation.
1497
- reasoning_system_prompt: The system prompt for the iterative
1498
- decision-making process.
1499
- images: A list of base64-encoded images provided by the user.
1500
- max_reasoning_steps: The maximum number of reasoning cycles.
1501
- decision_temperature: The temperature for the LLM's decision-making.
1502
- final_answer_temperature: The temperature for the final answer synthesis.
1503
- streaming_callback: A function for real-time output of tokens and steps.
1504
- rag_top_k: The number of top documents to retrieve during RAG.
1490
+ system_prompt: Main system prompt for the final answer.
1491
+ reasoning_system_prompt: System prompt for the decision-making process.
1492
+ images: A list of base64-encoded images provided by the user for the current turn.
1493
+ max_reasoning_steps: Maximum number of reasoning cycles.
1494
+ decision_temperature: Temperature for LLM's decision-making.
1495
+ final_answer_temperature: Temperature for final answer synthesis.
1496
+ streaming_callback: Function for real-time output of tokens and steps.
1497
+ rag_top_k: Number of top documents to retrieve during RAG.
1505
1498
  rag_min_similarity_percent: Minimum similarity for RAG results.
1506
- output_summarization_threshold: The token count that triggers automatic
1507
- summarization of a tool's text output.
1508
- debug : If true, we'll report the detailed promptin and response information
1499
+ output_summarization_threshold: Token count that triggers summarization.
1500
+ force_mcp_use: If True, bypasses the "fast answer" check.
1501
+ debug: If True, prints detailed prompting and response information.
1509
1502
  **llm_generation_kwargs: Additional keyword arguments for LLM calls.
1510
1503
 
1511
1504
  Returns:
1512
- A dictionary containing the agent's full run, including the final
1513
- answer, the complete internal scratchpad, a log of tool calls,
1514
- any retrieved RAG sources, and other metadata.
1505
+ A dictionary containing the agent's full run.
1515
1506
  """
1516
- reasoning_step_id = None
1517
- if not self.binding:
1507
+ if not self.llm:
1518
1508
  return {"final_answer": "", "tool_calls": [], "sources": [], "error": "LLM binding not initialized."}
1509
+ if max_reasoning_steps is None:
1510
+ max_reasoning_steps = 10
1511
+ # --- Helper Functions ---
1512
+ def log_event(desc, event_type=MSG_TYPE.MSG_TYPE_CHUNK, meta=None, event_id=None) -> Optional[str]:
1513
+ if not streaming_callback: return None
1514
+ is_start = event_type == MSG_TYPE.MSG_TYPE_STEP_START
1515
+ event_id = str(uuid.uuid4()) if is_start and not event_id else event_id
1516
+ params = {"type": event_type, "description": desc, **(meta or {})}
1517
+ if event_id: params["id"] = event_id
1518
+ streaming_callback(desc, event_type, params)
1519
+ return event_id
1519
1520
 
1520
- if not max_reasoning_steps:
1521
- max_reasoning_steps= 10
1522
- if not rag_min_similarity_percent:
1523
- rag_min_similarity_percent= 50
1524
- if not rag_top_k:
1525
- rag_top_k = 5
1526
- if not decision_temperature:
1527
- decision_temperature = 0.7
1528
- if not output_summarization_threshold:
1529
- output_summarization_threshold = 500
1530
-
1531
- events = []
1532
-
1533
-
1534
- # --- Initialize Agent State ---
1535
- sources_this_turn: List[Dict[str, Any]] = []
1536
- tool_calls_this_turn: List[Dict[str, Any]] = []
1537
- generated_code_store: Dict[str, str] = {} # NEW: Store for UUID -> code
1538
- original_user_prompt = prompt
1539
-
1540
- initial_state_parts = [
1541
- "### Initial State",
1542
- "- My goal is to address the user's request.",
1543
- "- I have not taken any actions yet."
1544
- ]
1545
- if images:
1546
- initial_state_parts.append(f"- The user has provided {len(images)} image(s) for context.")
1547
- current_scratchpad = "\n".join(initial_state_parts)
1548
-
1549
- def log_prompt(prompt, type="prompt"):
1550
- ASCIIColors.cyan(f"** DEBUG: {type} **")
1551
- ASCIIColors.magenta(prompt[-15000:])
1552
- prompt_size = self.count_tokens(prompt)
1553
- ASCIIColors.red(f"Prompt size:{prompt_size}/{self.default_ctx_size}")
1521
+ def log_prompt(title: str, prompt_text: str):
1522
+ if not debug: return
1523
+ ASCIIColors.cyan(f"** DEBUG: {title} **")
1524
+ ASCIIColors.magenta(prompt_text[-15000:])
1525
+ prompt_size = self.count_tokens(prompt_text)
1526
+ ASCIIColors.red(f"Prompt size:{prompt_size}/{self.llm.default_ctx_size}")
1554
1527
  ASCIIColors.cyan(f"** DEBUG: DONE **")
1555
1528
 
1556
- # --- Define Inner Helper Functions ---
1557
- def log_event(
1558
- description: str,
1559
- event_type: MSG_TYPE = MSG_TYPE.MSG_TYPE_CHUNK,
1560
- metadata: Optional[Dict] = None,
1561
- event_id=None
1562
- ) -> Optional[str]:
1563
- if not streaming_callback: return None
1564
- event_id = str(uuid.uuid4()) if event_type==MSG_TYPE.MSG_TYPE_STEP_START else event_id
1565
- params = {"type": event_type, "description": description, **(metadata or {})}
1566
- params["id"] = event_id
1567
- streaming_callback(description, event_type, params)
1568
- return event_id
1529
+ # --- 1. Initialize State & Context-Aware Asset Ingestion ---
1530
+ original_user_prompt, tool_calls_this_turn, sources_this_turn = prompt, [], []
1531
+ asset_store: Dict[str, Dict] = {}
1532
+ initial_state_parts = ["### Initial State", "- My goal is to address the user's request comprehensively."]
1533
+ if images:
1534
+ for img_b64 in images:
1535
+ img_uuid = str(uuid.uuid4())
1536
+ asset_store[img_uuid] = {"type": "image", "content": img_b64}
1537
+ initial_state_parts.append(f"- User provided image, asset ID: {img_uuid}")
1538
+ if context:
1539
+ code_blocks = re.findall(r"```(?:\w+)?\n([\s\S]+?)\n```", context)
1540
+ if code_blocks:
1541
+ last_code_block = code_blocks[-1]
1542
+ code_uuid = str(uuid.uuid4())
1543
+ asset_store[code_uuid] = {"type": "code", "content": last_code_block}
1544
+ initial_state_parts.append(f"- The user's request likely refers to a code block from the previous turn's context. It has been registered as asset ID: {code_uuid}")
1545
+ current_scratchpad = "\n".join(initial_state_parts)
1569
1546
 
1570
- def _substitute_code_uuids_recursive(data: Any, code_store: Dict[str, str]):
1571
- """Recursively finds and replaces code UUIDs in tool parameters."""
1572
- if isinstance(data, dict):
1573
- for key, value in data.items():
1574
- if isinstance(value, str) and value in code_store:
1575
- data[key] = code_store[value]
1576
- else:
1577
- _substitute_code_uuids_recursive(value, code_store)
1578
- elif isinstance(data, list):
1579
- for i, item in enumerate(data):
1580
- if isinstance(item, str) and item in code_store:
1581
- data[i] = code_store[item]
1582
- else:
1583
- _substitute_code_uuids_recursive(item, code_store)
1584
-
1585
- discovery_step_id = log_event("**Discovering tools**",MSG_TYPE.MSG_TYPE_STEP_START)
1586
- # --- 1. Discover Available Tools ---
1587
- available_tools = []
1588
- if use_mcps and self.mcp:
1589
- discovered_tools = self.mcp.discover_tools(force_refresh=True)
1590
- if isinstance(use_mcps, list):
1591
- available_tools.extend([t for t in discovered_tools if t["name"] in use_mcps])
1592
-
1547
+ # --- 2. Tool Discovery and Filtering ---
1548
+ discovery_step_id = log_event("Discovering and filtering tools...", MSG_TYPE.MSG_TYPE_STEP_START)
1549
+ all_discovered_tools, visible_tools, code_consuming_tools = [], [], set()
1550
+ if use_mcps and hasattr(self, 'mcp'):
1551
+ mcp_tools = self.mcp.discover_tools(force_refresh=True)
1552
+ if isinstance(use_mcps, list): all_discovered_tools.extend([t for t in mcp_tools if t["name"] in use_mcps])
1553
+ elif use_mcps is True: all_discovered_tools.extend(mcp_tools)
1554
+ code_param_keywords = {'code', 'script', 'python_code', 'javascript', 'html', 'css'}
1555
+ for tool in all_discovered_tools:
1556
+ if any(p in code_param_keywords for p in tool.get("input_schema", {}).get("properties", {})): code_consuming_tools.add(tool['name'])
1557
+ else: visible_tools.append(tool)
1593
1558
  if use_data_store:
1594
- for store_name in use_data_store:
1595
- available_tools.append({
1596
- "name": f"research::{store_name}",
1597
- "description": f"Queries the '{store_name}' knowledge base for relevant information.",
1598
- "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}
1599
- })
1559
+ for name, info in use_data_store.items(): visible_tools.append({"name": f"research::{name}", "description": info.get("description", f"Queries '{name}'."), "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}})
1560
+ log_event(f"Made {len(visible_tools)} tools visible (hid {len(code_consuming_tools)} code tools).", MSG_TYPE.MSG_TYPE_STEP_END, meta={"visible": len(visible_tools), "hidden": len(code_consuming_tools), "hidden_list": list(code_consuming_tools)}, event_id=discovery_step_id)
1561
+
1562
+ # --- 3. Fast Answer Path (Not shown for brevity, but retained) ---
1600
1563
 
1601
- # Add the new prepare_code tool definition
1602
- available_tools.append({
1603
- "name": "local_tools::prepare_code",
1604
- "description": """Generates and stores code into a buffer to be used by another tool. Never put code into a tool directly, first call this to generate the code and then paste the uuid in the tool that requires code. Only use this for generating code to be sent to another tool. You can put the uuid of the generated code into the fields that require long code among the tools. If no tool requires code as input do not use prepare_code. prepare_code do not execute the code nor does it audit it.""",
1605
- "input_schema": {"type": "object", "properties": {"prompt": {"type": "string", "description": "A detailed natural language description of the code's purpose and requirements."}, "language": {"type": "string", "description": "The programming language of the generated code. By default it uses python."}}, "required": ["prompt"]}
1606
- })
1607
- available_tools.append({
1608
- "name": "local_tools::view_generated_code",
1609
- "description": """Views the code that was generated and stored to the buffer. You need to have a valid uuid of the generated code.""",
1610
- "input_schema": {"type": "object", "properties": {"code_id": {"type": "string", "description": "The case sensitive uuid of the generated code."}}, "required": ["uuid"]}
1611
- })
1612
- # Add the new refactor_scratchpad tool definition
1613
- available_tools.append({
1614
- "name": "local_tools::refactor_scratchpad",
1615
- "description": "Rewrites the scratchpad content to clean it and reorganize it. Only use if the scratchpad is messy or contains too much information compared to what you need.",
1616
- "input_schema": {"type": "object", "properties": {}}
1617
- })
1618
-
1619
- formatted_tools_list = "\n".join([f"**{t['name']}**:\n{t['description']}\ninput schema:\n{json.dumps(t['input_schema'])}" for t in available_tools])
1620
- formatted_tools_list += "\n**local_tools::request_clarification**:\nUse if the user's request is ambiguous and you can not infer a clear idea of his intent. this tool has no parameters."
1621
- formatted_tools_list += "\n**local_tools::final_answer**:\nUse when you are ready to respond to the user. this tool has no parameters."
1622
-
1623
- if discovery_step_id: log_event(f"**Discovering tools** found {len(available_tools)} tools",MSG_TYPE.MSG_TYPE_STEP_END, event_id=discovery_step_id)
1624
-
1625
- # --- 2. Dynamic Reasoning Loop ---
1564
+ # --- 4. Format Tools for Main Loop ---
1565
+ CODE_PLACEHOLDER = "{GENERATED_CODE}"
1566
+ built_in_tools = [{"name": "local_tools::generate_and_call", "description": f"CRITICAL: To run or modify code, you MUST use this tool. It generates code (e.g., to fix code from an asset) and then calls a tool with it. Refer to existing code using its asset ID. Use '{CODE_PLACEHOLDER}' in `next_tool_params` for the NEWLY generated code.", "input_schema": { "type": "object", "properties": { "code_generation_prompt": {"type": "string"}, "language": {"type": "string"}, "next_tool_name": {"type": "string"}, "next_tool_params": {"type": "object"}}, "required": ["code_generation_prompt", "next_tool_name", "next_tool_params"]}}, {"name": "local_tools::refactor_scratchpad", "description": "Rewrites the scratchpad.", "input_schema": {}}, {"name": "local_tools::request_clarification", "description": "Asks the user for more information.", "input_schema": {"type": "object", "properties": {"question_to_user": {"type": "string"}}, "required": ["question_to_user"]}}, {"name": "local_tools::final_answer", "description": "Provides the final answer.", "input_schema": {}}]
1567
+ all_visible_tools = visible_tools + built_in_tools
1568
+ formatted_tools_list = "\n".join([f"**{t['name']}**:\n- Description: {t['description']}" for t in all_visible_tools])
1569
+
1570
+ # --- 5. Dynamic Reasoning Loop ---
1626
1571
  for i in range(max_reasoning_steps):
1572
+ reasoning_step_id = log_event(f"Reasoning Step {i+1}/{max_reasoning_steps}", MSG_TYPE.MSG_TYPE_STEP_START)
1627
1573
  try:
1628
- reasoning_step_id = log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_START)
1629
- user_context = f'Original User Request: "{original_user_prompt}"'
1630
- if images: user_context += f'\n(Note: {len(images)} image(s) were provided with this request.)'
1574
+ reasoning_prompt = f"""--- AVAILABLE ACTIONS ---\n{formatted_tools_list}\n\n--- YOUR INTERNAL SCRATCHPAD ---\n{current_scratchpad}\n--- END SCRATCHPAD ---\n\n**INSTRUCTIONS:**\n1. **OBSERVE:** Review your scratchpad, especially available asset IDs.\n2. **THINK:** Based on '{original_user_prompt}', what is the single next logical action using ONLY the available actions?\n3. **ACT:** Formulate your decision as a JSON object. Do NOT paste large code blocks into parameters; use their asset IDs instead."""
1575
+ action_schema = {"thought": "My reasoning.", "action": {"tool_name": "string", "tool_params": "object"}}
1576
+ action_data = self.generate_structured_content(prompt=reasoning_prompt, schema=action_schema, system_prompt=reasoning_system_prompt, temperature=decision_temperature, **llm_generation_kwargs)
1631
1577
 
1632
- reasoning_prompt_template = f"""
1633
- --- AVAILABLE TOOLS ---
1634
- {formatted_tools_list}
1635
- --- CONTEXT ---
1636
- {user_context}
1637
- --- YOUR INTERNAL SCRATCHPAD (Work History & Analysis) ---
1638
- {current_scratchpad}
1639
- --- END OF SCRATCHPAD ---
1640
-
1641
- **INSTRUCTIONS:**
1642
- 1. **OBSERVE:** Review the `Observation` from your most recent step in the scratchpad.
1643
- 2. **THINK:**
1644
- - Does the latest observation completely fulfill the user's original request?
1645
- - If YES, your next action MUST be to use the `final_answer` tool.
1646
- - If NO, what is the single next logical step needed? This may involve writing code first with `prepare_code`, then using another tool.
1647
- - If you are stuck or the request is ambiguous, use `local_tools::request_clarification`.
1648
- 3. **ACT:** Formulate your decision as a JSON object.
1649
- ** Important ** Always use this format alias::tool_name to call the tool
1650
- """
1651
- action_template = {
1652
- "thought": "My detailed analysis of the last observation and my reasoning for the next action and how it integrates with my global plan.",
1653
- "action": {
1654
- "tool_name": "The single tool to use (e.g., 'local_tools::prepare_code', 'local_tools::final_answer').",
1655
- "tool_params": {"param1": "value1"},
1656
- "clarification_question": "(string, ONLY if tool_name is 'local_tools::request_clarification')"
1657
- }
1658
- }
1659
- if debug: log_prompt(reasoning_prompt_template, f"REASONING PROMPT (Step {i+1})")
1660
- structured_action_response = self.generate_code(
1661
- prompt=reasoning_prompt_template, template=json.dumps(action_template, indent=2),
1662
- system_prompt=reasoning_system_prompt, temperature=decision_temperature,
1663
- images=images if i == 0 else None
1664
- )
1665
- if structured_action_response is None:
1666
- log_event("**Error generating thought.** Retrying..", MSG_TYPE.MSG_TYPE_EXCEPTION)
1578
+ if not action_data or not isinstance(action_data.get("action"), dict):
1579
+ log_event("Failed to generate a valid JSON action. Will retry.", MSG_TYPE.MSG_TYPE_WARNING, event_id=reasoning_step_id)
1580
+ current_scratchpad += "\n\n### Step Failure\n- **Error:** Failed to produce a valid JSON action."
1667
1581
  continue
1668
- if debug: log_prompt(structured_action_response, f"RAW REASONING RESPONSE (Step {i+1})")
1669
-
1670
- try:
1671
- action_data = robust_json_parser(structured_action_response)
1672
- thought = action_data.get("thought", "No thought was generated.")
1673
- action = action_data.get("action", {})
1674
- if isinstance(action,str):
1675
- tool_name = action
1676
- tool_params = {}
1677
- else:
1678
- tool_name = action.get("tool_name")
1679
- tool_params = action.get("tool_params", {})
1680
- except (json.JSONDecodeError, TypeError) as e:
1681
- current_scratchpad += f"\n\n### Step {i+1} Failure\n- **Error:** Failed to generate a valid JSON action: {e}"
1682
- log_event(f"Step Failure: Invalid JSON action.", MSG_TYPE.MSG_TYPE_EXCEPTION, metadata={"details": str(e)})
1683
- if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, metadata={"error": str(e)}, event_id=reasoning_step_id)
1684
-
1685
1582
 
1583
+ thought, action = action_data.get("thought", ""), action_data.get("action", {})
1584
+ tool_name, tool_params = action.get("tool_name"), action.get("tool_params", {})
1686
1585
  current_scratchpad += f"\n\n### Step {i+1}: Thought\n{thought}"
1687
- log_event(f"{thought}", MSG_TYPE.MSG_TYPE_THOUGHT_CONTENT)
1586
+ log_event(thought, MSG_TYPE.MSG_TYPE_THOUGHT_CONTENT)
1688
1587
 
1689
- if not tool_name:
1690
- # Handle error...
1691
- break
1692
-
1693
- # --- Handle special, non-executing tools ---
1588
+ if tool_name == "local_tools::final_answer": break
1694
1589
  if tool_name == "local_tools::request_clarification":
1695
- # Handle clarification...
1696
- if isinstance(action, dict):
1697
- return {"final_answer": action.get("clarification_question", "Could you please provide more details?"), "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
1698
- elif isinstance(action, str):
1699
- return {"final_answer": action, "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
1700
- else:
1701
- return {"final_answer": "Could you please provide more details?", "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
1702
- if tool_name == "local_tools::final_answer":
1703
- current_scratchpad += f"\n\n### Step {i+1}: Action\n- **Action:** Decided to formulate the final answer."
1704
- log_event("**Action**: Formulate final answer.", MSG_TYPE.MSG_TYPE_THOUGHT_CHUNK)
1705
- if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**",MSG_TYPE.MSG_TYPE_STEP_END, event_id=reasoning_step_id)
1706
- break
1590
+ return {"final_answer": tool_params.get("question_to_user", "?"), "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
1707
1591
 
1708
- # --- Handle the `prepare_code` tool specifically ---
1709
- if tool_name == 'local_tools::prepare_code':
1710
- code_gen_id = log_event(f"Generating code...", MSG_TYPE.MSG_TYPE_STEP_START, metadata={"name": "prepare_code", "id": "gencode"})
1711
- code_prompt = tool_params.get("prompt", "Generate the requested code.")
1712
-
1713
- # Use a specific system prompt to get raw code
1714
- code_generation_system_prompt = "You are a code generation assistant. Generate ONLY the raw code based on the user's request. Do not add any explanations, markdown code fences, or other text outside of the code itself."
1715
- generated_code = self.generate_code(prompt=code_prompt, system_prompt=code_generation_system_prompt, **llm_generation_kwargs)
1716
-
1717
- code_uuid = str(uuid.uuid4())
1718
- generated_code_store[code_uuid] = generated_code
1719
-
1720
- tool_result = {"status": "success", "code_id": code_uuid, "summary": f"Code generated successfully. Use this ID in the next tool call that requires code."}
1721
- tool_calls_this_turn.append({"name": "prepare_code", "params": tool_params, "result": tool_result})
1722
- observation_text = f"```json\n{json.dumps(tool_result, indent=2)}\n```"
1723
- current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
1724
- log_event(f"Code generated with ID: {code_uuid}", MSG_TYPE.MSG_TYPE_OBSERVATION)
1725
- if code_gen_id: log_event(f"Generating code...", MSG_TYPE.MSG_TYPE_TOOL_CALL, metadata={"id": code_gen_id, "result": tool_result})
1726
- if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id= reasoning_step_id)
1727
- continue # Go to the next reasoning step immediately
1728
- if tool_name == 'local_tools::view_generated_code':
1729
- code_id = tool_params.get("code_id")
1730
- if code_id:
1731
- tool_result = {"status": "success", "code_id": code_id, "generated_code":generated_code_store[code_uuid]}
1732
- else:
1733
- tool_result = {"status": "error", "code_id": code_id, "error":"Unknown uuid"}
1734
- observation_text = f"```json\n{json.dumps(tool_result, indent=2)}\n```"
1735
- current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
1736
- log_event(f"Result from `{tool_name}`:\n```\n{generated_code_store[code_uuid]}\n```\n", MSG_TYPE.MSG_TYPE_TOOL_CALL, metadata={"id": code_gen_id, "result": tool_result})
1737
- continue
1738
- if tool_name == 'local_tools::refactor_scratchpad':
1739
- scratchpad_cleaning_prompt = f"""Enhance this scratchpad content to be more organized and comprehensive. Keep relevant experience information and remove any useless redundancies. Try to log learned things from the context so that you won't make the same mistakes again. Do not remove the main objective information or any crucial information that may be useful for the next iterations. Answer directly with the new scratchpad content without any comments.
1740
- --- YOUR INTERNAL SCRATCHPAD (Work History & Analysis) ---
1741
- {current_scratchpad}
1742
- --- END OF SCRATCHPAD ---"""
1743
- current_scratchpad = self.generate_text(scratchpad_cleaning_prompt)
1744
- log_event(f"**New scratchpad**:\n{current_scratchpad}", MSG_TYPE.MSG_TYPE_SCRATCHPAD)
1745
-
1746
- # --- Substitute UUIDs and Execute Standard Tools ---
1747
- log_event(f"**Calling tool**: `{tool_name}` with params:\n{dict_to_markdown(tool_params)}", MSG_TYPE.MSG_TYPE_TOOL_CALL)
1748
- _substitute_code_uuids_recursive(tool_params, generated_code_store)
1749
-
1750
- tool_call_id = log_event(f"**Executing tool**: {tool_name}",MSG_TYPE.MSG_TYPE_STEP_START, metadata={"name": tool_name, "parameters": tool_params, "id":"executing tool"})
1751
- tool_result = None
1752
- try:
1753
- if tool_name.startswith("research::") and use_data_store:
1754
- store_name = tool_name.split("::")[1]
1755
- rag_callable = use_data_store.get(store_name, {}).get("callable")
1756
- query = tool_params.get("query", "")
1757
- retrieved_chunks = rag_callable(query, rag_top_k=rag_top_k, rag_min_similarity_percent=rag_min_similarity_percent)
1758
- if retrieved_chunks:
1759
- sources_this_turn.extend(retrieved_chunks)
1760
- tool_result = {"status": "success", "summary": f"Found {len(retrieved_chunks)} relevant chunks.", "chunks": retrieved_chunks}
1592
+ tool_result = {"status": "failure", "error": f"Tool '{tool_name}' was called but did not execute properly."} # Default error
1593
+ if tool_name == "local_tools::generate_and_call":
1594
+ chain_id = log_event(f"Starting chained tool call...", MSG_TYPE.MSG_TYPE_STEP_START)
1595
+ try:
1596
+ code_gen_prompt, lang = tool_params.get("code_generation_prompt", ""), tool_params.get("language", "python")
1597
+ next_tool_name, next_tool_params = tool_params.get("next_tool_name"), tool_params.get("next_tool_params", {})
1598
+ log_event("Received parameters for chain", MSG_TYPE.MSG_TYPE_STEP, meta={"parent_id": chain_id, "params": tool_params})
1599
+
1600
+ if not (use_mcps and hasattr(self, 'mcp')):
1601
+ tool_result = {"status": "failure", "error": "MCPs are not enabled, cannot execute tools."}
1602
+ elif next_tool_name not in code_consuming_tools:
1603
+ tool_result = {"status": "failure", "error": f"Tool '{next_tool_name}' is not a valid code-consuming tool. Valid options are: {list(code_consuming_tools)}"}
1761
1604
  else:
1762
- tool_result = {"status": "success", "summary": "No relevant documents found."}
1763
- elif use_mcps and self.mcp:
1764
- mcp_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
1765
- tool_result = {"status": "success", "output": mcp_result} if not (isinstance(mcp_result, dict) and "error" in mcp_result) else {"status": "failure", **mcp_result}
1766
- else:
1767
- tool_result = {"status": "failure", "error": f"Tool '{tool_name}' not found."}
1768
- except Exception as e:
1769
- trace_exception(e)
1770
- tool_result = {"status": "failure", "error": f"Exception executing tool: {str(e)}"}
1771
-
1772
- if tool_call_id: log_event(f"**Executing tool**: {tool_name}", MSG_TYPE.MSG_TYPE_STEP_END, metadata={"result": tool_result}, event_id= tool_call_id)
1773
-
1774
- observation_text = ""
1605
+ def _hydrate(text: str, store: Dict) -> str:
1606
+ for k, v in store.items(): text = text.replace(k, v.get('content',''))
1607
+ return text
1608
+ hydrated_prompt = _hydrate(code_gen_prompt, asset_store)
1609
+ log_event(f"Generating {lang} code for {next_tool_name}", MSG_TYPE.MSG_TYPE_STEP, meta={"parent_id": chain_id, "hydrated_prompt": hydrated_prompt})
1610
+ generated_code = self.generate_code(prompt=hydrated_prompt, system_prompt=f"Generate ONLY raw {lang} code.", **llm_generation_kwargs)
1611
+
1612
+ def _substitute(data: Any) -> Any:
1613
+ if isinstance(data, dict): return {k: _substitute(v) for k, v in data.items()}
1614
+ if isinstance(data, list): return [_substitute(item) for item in data]
1615
+ if isinstance(data, str) and data == CODE_PLACEHOLDER: return generated_code
1616
+ return data
1617
+ hydrated_params = _substitute(next_tool_params)
1618
+
1619
+ log_event(f"Calling tool: {next_tool_name}", MSG_TYPE.MSG_TYPE_TOOL_CALL, meta={"parent_id": chain_id, "name": next_tool_name, "parameters": hydrated_params})
1620
+ tool_result = self.mcp.execute_tool(next_tool_name, hydrated_params, lollms_client_instance=self)
1621
+ except Exception as e:
1622
+ tool_result = {"status": "failure", "error": f"Exception in chained tool logic: {str(e)}"}
1623
+ log_event(f"Finished chained tool call.", MSG_TYPE.MSG_TYPE_STEP_END, event_id=chain_id)
1624
+ # ... other non-code tool handlers ...
1625
+
1626
+ # --- Process and Sanitize ALL Tool Outputs for the Scratchpad ---
1775
1627
  sanitized_result = {}
1776
1628
  if isinstance(tool_result, dict):
1777
1629
  sanitized_result = tool_result.copy()
1778
- summarized_fields = {}
1779
1630
  for key, value in tool_result.items():
1780
- if isinstance(value, str) and key.endswith("_base64") and len(value) > 256:
1781
- sanitized_result[key] = f"[Image was generated. Size: {len(value)} bytes]"
1782
- continue
1783
- if isinstance(value, str) and len(self.tokenize(value)) > output_summarization_threshold:
1784
- if streaming_callback: streaming_callback(f"Summarizing long output from field '{key}'...", MSG_TYPE.MSG_TYPE_STEP, {"type": "summarization"})
1785
- summary = self.sequential_summarize(text=value, chunk_processing_prompt=f"Summarize key info from this chunk of '{key}'.", callback=streaming_callback)
1786
- summarized_fields[key] = summary
1787
- sanitized_result[key] = f"[Content summarized, see summary below. Original length: {len(value)} chars]"
1788
- observation_text = f"```json\n{json.dumps(sanitized_result, indent=2)}\n```"
1789
- if summarized_fields:
1790
- observation_text += "\n\n**Summaries of Long Outputs:**"
1791
- for key, summary in summarized_fields.items():
1792
- observation_text += f"\n- **Summary of '{key}':**\n{summary}"
1631
+ if isinstance(value, str) and value.startswith("data:image"):
1632
+ img_uuid = str(uuid.uuid4())
1633
+ asset_store[img_uuid] = {"type": "image", "content": value}
1634
+ sanitized_result[key] = f"[Image asset generated: {img_uuid}]"
1793
1635
  else:
1794
- observation_text = f"Tool returned non-dictionary output: {str(tool_result)}"
1636
+ sanitized_result = {"raw_output": str(tool_result)}
1795
1637
 
1638
+ observation_text = f"```json\n{json.dumps(sanitized_result, indent=2)}\n```"
1639
+ log_event(f"Received output from: {tool_name}", MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, meta={"name": tool_name, "result": sanitized_result})
1796
1640
  tool_calls_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
1797
1641
  current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
1798
- log_event(f"Result from `{tool_name}`:\n{dict_to_markdown(sanitized_result)}", MSG_TYPE.MSG_TYPE_OBSERVATION)
1799
-
1800
- if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
1642
+ log_event(f"Finished reasoning step {i+1}", MSG_TYPE.MSG_TYPE_STEP_END, event_id=reasoning_step_id)
1643
+
1801
1644
  except Exception as ex:
1802
1645
  trace_exception(ex)
1803
- current_scratchpad += f"\n\n### Error : {ex}"
1804
- if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
1805
-
1806
- # --- Final Answer Synthesis ---
1807
- synthesis_id = log_event("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
1646
+ log_event(f"Error in reasoning loop: {str(ex)}", MSG_TYPE.MSG_TYPE_EXCEPTION, event_id=reasoning_step_id)
1808
1647
 
1809
- final_answer_prompt = f"""
1810
- --- Original User Request ---
1811
- "{original_user_prompt}"
1812
- --- Your Internal Scratchpad (Actions Taken & Findings) ---
1813
- {current_scratchpad}
1814
- --- INSTRUCTIONS ---
1815
- - Synthesize a clear and friendly answer for the user based ONLY on your scratchpad.
1816
- - If images were provided by the user, incorporate your analysis of them into the answer.
1817
- - Do not talk about your internal process unless it's necessary to explain why you couldn't find an answer.
1818
- """
1819
- if debug: log_prompt(final_answer_prompt, "FINAL ANSWER SYNTHESIS PROMPT")
1820
-
1821
-
1822
- final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature, **llm_generation_kwargs)
1823
- if type(final_answer_text) is dict:
1824
- if streaming_callback:
1825
- streaming_callback(final_answer_text["error"], MSG_TYPE.MSG_TYPE_EXCEPTION)
1826
- return {
1827
- "final_answer": "",
1828
- "final_scratchpad": current_scratchpad,
1829
- "tool_calls": tool_calls_this_turn,
1830
- "sources": sources_this_turn,
1831
- "clarification_required": False,
1832
- "error": final_answer_text["error"]
1833
- }
1648
+ # --- 6. Final Answer Synthesis ---
1649
+ synthesis_id = log_event("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
1650
+ final_answer_prompt = f"""--- Original User Request ---\n"{original_user_prompt}"\n\n--- Your Internal Scratchpad ---\n{current_scratchpad}\n\n--- INSTRUCTIONS ---\nSynthesize a clear, comprehensive, and friendly answer for the user based ONLY on your scratchpad."""
1651
+ final_synthesis_images = [img for img in (images or [])] + [asset['content'] for asset in asset_store.values() if asset['type'] == 'image']
1652
+ final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=final_synthesis_images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature, **llm_generation_kwargs)
1653
+ if isinstance(final_answer_text, dict) and "error" in final_answer_text:
1654
+ return {"final_answer": "", "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": False, "error": final_answer_text["error"]}
1834
1655
  final_answer = self.remove_thinking_blocks(final_answer_text)
1835
- if debug: log_prompt(final_answer_text, "FINAL ANSWER RESPONSE")
1656
+ log_event("Finished synthesizing answer.", MSG_TYPE.MSG_TYPE_STEP_END, event_id=synthesis_id)
1836
1657
 
1837
- if synthesis_id: log_event("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_END, event_id= synthesis_id)
1658
+ return {"final_answer": final_answer, "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": False, "error": None}
1838
1659
 
1839
- return {
1840
- "final_answer": final_answer,
1841
- "final_scratchpad": current_scratchpad,
1842
- "tool_calls": tool_calls_this_turn,
1843
- "sources": sources_this_turn,
1844
- "clarification_required": False,
1845
- "error": None
1846
- }
1847
1660
  def generate_code(
1848
1661
  self,
1849
1662
  prompt:str,
@@ -2497,7 +2310,7 @@ Do not split the code in multiple tags.
2497
2310
  callback = self.sink
2498
2311
 
2499
2312
  if ctx_size is None:
2500
- ctx_size = self.default_ctx_size or 8192 # Provide a fallback default
2313
+ ctx_size = self.llm.default_ctx_size or 8192 # Provide a fallback default
2501
2314
  if chunk_size is None:
2502
2315
  chunk_size = ctx_size // 4
2503
2316
  if overlap is None:
@@ -2573,7 +2386,7 @@ Current document analysis memory:
2573
2386
  # Process text in chunks
2574
2387
  while start_token_idx < total_tokens:
2575
2388
  # Calculate available tokens for chunk + memory
2576
- available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024) # Reserve space for output
2389
+ available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.llm.default_n_predict or 1024) # Reserve space for output
2577
2390
  if available_tokens_for_dynamic_content <= 100: # Need some minimum space
2578
2391
  ASCIIColors.error("Context size too small for summarization with current settings.")
2579
2392
  return "Error: Context size too small."
@@ -2610,7 +2423,7 @@ Current document analysis memory:
2610
2423
  ASCIIColors.magenta(f"--- Chunk {chunk_id} Prompt ---")
2611
2424
  ASCIIColors.cyan(prompt)
2612
2425
 
2613
- response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
2426
+ response = self.generate_text(prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback)
2614
2427
 
2615
2428
  if isinstance(response, dict): # Handle generation error
2616
2429
  ASCIIColors.error(f"Chunk {chunk_id} processing failed: {response.get('error')}")
@@ -2669,7 +2482,7 @@ The final output must be put inside a {final_output_format} markdown tag.
2669
2482
  final_example_prompt = final_prompt_template.format(memory="<final_memory>")
2670
2483
  try:
2671
2484
  final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
2672
- available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024) # Reserve space for output
2485
+ available_final_tokens = ctx_size - final_static_tokens - (self.llm.default_n_predict or 1024) # Reserve space for output
2673
2486
  except RuntimeError as e:
2674
2487
  ASCIIColors.error(f"Tokenization failed during final setup: {e}")
2675
2488
  return "Error: Could not calculate final prompt size."
@@ -2686,7 +2499,7 @@ The final output must be put inside a {final_output_format} markdown tag.
2686
2499
  ASCIIColors.magenta("--- Final Aggregation Prompt ---")
2687
2500
  ASCIIColors.cyan(final_prompt)
2688
2501
 
2689
- final_summary_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
2502
+ final_summary_raw = self.generate_text(final_prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback)
2690
2503
 
2691
2504
  if isinstance(final_summary_raw, dict):
2692
2505
  ASCIIColors.error(f"Final aggregation failed: {final_summary_raw.get('error')}")
@@ -2742,7 +2555,7 @@ The final output must be put inside a {final_output_format} markdown tag.
2742
2555
 
2743
2556
  # Set defaults and validate input
2744
2557
  if ctx_size is None:
2745
- ctx_size = self.default_ctx_size or 8192
2558
+ ctx_size = self.llm.default_ctx_size or 8192
2746
2559
  if chunk_size is None:
2747
2560
  chunk_size = ctx_size // 4
2748
2561
  if overlap is None:
@@ -2845,7 +2658,7 @@ Task: Update the markdown memory by adding new information from this chunk relev
2845
2658
 
2846
2659
  while start_token_idx < len(file_tokens):
2847
2660
  # Calculate available space dynamically
2848
- available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024)
2661
+ available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.llm.default_n_predict or 1024)
2849
2662
  if available_tokens_for_dynamic_content <= 100:
2850
2663
  ASCIIColors.error(f"Context window too small during analysis of {file_name}.")
2851
2664
  # Option: try truncating memory drastically or break
@@ -2885,7 +2698,7 @@ Task: Update the markdown memory by adding new information from this chunk relev
2885
2698
  ASCIIColors.magenta(f"--- Deep Analysis Prompt (Global Chunk {global_chunk_id}) ---")
2886
2699
  ASCIIColors.cyan(prompt)
2887
2700
 
2888
- response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback for streaming output
2701
+ response = self.generate_text(prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback) # Use main callback for streaming output
2889
2702
 
2890
2703
  if isinstance(response, dict): # Handle error
2891
2704
  ASCIIColors.error(f"Chunk processing failed (Global {global_chunk_id}): {response.get('error')}")
@@ -2940,7 +2753,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
2940
2753
  final_example_prompt = final_prompt.replace("{memory}", "<final_memory>")
2941
2754
  try:
2942
2755
  final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
2943
- available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024)
2756
+ available_final_tokens = ctx_size - final_static_tokens - (self.llm.default_n_predict or 1024)
2944
2757
  except RuntimeError as e:
2945
2758
  ASCIIColors.error(f"Tokenization failed during final setup: {e}")
2946
2759
  return "Error: Could not calculate final prompt size."
@@ -2956,7 +2769,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
2956
2769
  ASCIIColors.magenta("--- Final Aggregation Prompt ---")
2957
2770
  ASCIIColors.cyan(final_prompt)
2958
2771
 
2959
- final_output_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback
2772
+ final_output_raw = self.generate_text(final_prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback) # Use main callback
2960
2773
 
2961
2774
  if isinstance(final_output_raw, dict):
2962
2775
  ASCIIColors.error(f"Final aggregation failed: {final_output_raw.get('error')}")
@@ -3031,9 +2844,9 @@ Provide the final aggregated answer in {output_format} format, directly addressi
3031
2844
  tokens = []
3032
2845
  else:
3033
2846
  # Use the binding's tokenizer for accurate chunking
3034
- tokens = self.binding.tokenize(text_to_process)
2847
+ tokens = self.llm.tokenize(text_to_process)
3035
2848
  if chunk_size_tokens is None:
3036
- chunk_size_tokens = self.default_ctx_size//2
2849
+ chunk_size_tokens = self.llm.default_ctx_size//2
3037
2850
 
3038
2851
  if len(tokens) <= chunk_size_tokens:
3039
2852
  if streaming_callback:
@@ -3064,7 +2877,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
3064
2877
  step = chunk_size_tokens - overlap_tokens
3065
2878
  for i in range(0, len(tokens), step):
3066
2879
  chunk_tokens = tokens[i:i + chunk_size_tokens]
3067
- chunk_text = self.binding.detokenize(chunk_tokens)
2880
+ chunk_text = self.llm.detokenize(chunk_tokens)
3068
2881
  chunks.append(chunk_text)
3069
2882
 
3070
2883
  chunk_summaries = []