lollms-client 0.32.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

Files changed (73) hide show
  1. lollms_client/__init__.py +1 -1
  2. lollms_client/llm_bindings/azure_openai/__init__.py +6 -10
  3. lollms_client/llm_bindings/claude/__init__.py +4 -7
  4. lollms_client/llm_bindings/gemini/__init__.py +3 -7
  5. lollms_client/llm_bindings/grok/__init__.py +3 -7
  6. lollms_client/llm_bindings/groq/__init__.py +4 -7
  7. lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +4 -6
  8. lollms_client/llm_bindings/litellm/__init__.py +15 -6
  9. lollms_client/llm_bindings/llamacpp/__init__.py +214 -388
  10. lollms_client/llm_bindings/lollms/__init__.py +24 -14
  11. lollms_client/llm_bindings/lollms_webui/__init__.py +6 -12
  12. lollms_client/llm_bindings/mistral/__init__.py +58 -29
  13. lollms_client/llm_bindings/ollama/__init__.py +6 -11
  14. lollms_client/llm_bindings/open_router/__init__.py +45 -14
  15. lollms_client/llm_bindings/openai/__init__.py +7 -14
  16. lollms_client/llm_bindings/openllm/__init__.py +12 -12
  17. lollms_client/llm_bindings/pythonllamacpp/__init__.py +1 -1
  18. lollms_client/llm_bindings/tensor_rt/__init__.py +8 -13
  19. lollms_client/llm_bindings/transformers/__init__.py +14 -6
  20. lollms_client/llm_bindings/vllm/__init__.py +16 -12
  21. lollms_client/lollms_core.py +296 -487
  22. lollms_client/lollms_discussion.py +436 -78
  23. lollms_client/lollms_llm_binding.py +223 -11
  24. lollms_client/lollms_mcp_binding.py +33 -2
  25. lollms_client/mcp_bindings/local_mcp/__init__.py +3 -2
  26. lollms_client/mcp_bindings/remote_mcp/__init__.py +6 -5
  27. lollms_client/mcp_bindings/standard_mcp/__init__.py +3 -5
  28. lollms_client/stt_bindings/lollms/__init__.py +6 -8
  29. lollms_client/stt_bindings/whisper/__init__.py +2 -4
  30. lollms_client/stt_bindings/whispercpp/__init__.py +15 -16
  31. lollms_client/tti_bindings/dalle/__init__.py +29 -28
  32. lollms_client/tti_bindings/diffusers/__init__.py +25 -21
  33. lollms_client/tti_bindings/gemini/__init__.py +215 -0
  34. lollms_client/tti_bindings/lollms/__init__.py +8 -9
  35. lollms_client-1.0.0.dist-info/METADATA +1214 -0
  36. lollms_client-1.0.0.dist-info/RECORD +69 -0
  37. {lollms_client-0.32.1.dist-info → lollms_client-1.0.0.dist-info}/top_level.txt +0 -2
  38. examples/article_summary/article_summary.py +0 -58
  39. examples/console_discussion/console_app.py +0 -266
  40. examples/console_discussion.py +0 -448
  41. examples/deep_analyze/deep_analyse.py +0 -30
  42. examples/deep_analyze/deep_analyze_multiple_files.py +0 -32
  43. examples/function_calling_with_local_custom_mcp.py +0 -250
  44. examples/generate_a_benchmark_for_safe_store.py +0 -89
  45. examples/generate_and_speak/generate_and_speak.py +0 -251
  46. examples/generate_game_sfx/generate_game_fx.py +0 -240
  47. examples/generate_text_with_multihop_rag_example.py +0 -210
  48. examples/gradio_chat_app.py +0 -228
  49. examples/gradio_lollms_chat.py +0 -259
  50. examples/internet_search_with_rag.py +0 -226
  51. examples/lollms_chat/calculator.py +0 -59
  52. examples/lollms_chat/derivative.py +0 -48
  53. examples/lollms_chat/test_openai_compatible_with_lollms_chat.py +0 -12
  54. examples/lollms_discussions_test.py +0 -155
  55. examples/mcp_examples/external_mcp.py +0 -267
  56. examples/mcp_examples/local_mcp.py +0 -171
  57. examples/mcp_examples/openai_mcp.py +0 -203
  58. examples/mcp_examples/run_remote_mcp_example_v2.py +0 -290
  59. examples/mcp_examples/run_standard_mcp_example.py +0 -204
  60. examples/simple_text_gen_test.py +0 -173
  61. examples/simple_text_gen_with_image_test.py +0 -178
  62. examples/test_local_models/local_chat.py +0 -9
  63. examples/text_2_audio.py +0 -77
  64. examples/text_2_image.py +0 -144
  65. examples/text_2_image_diffusers.py +0 -274
  66. examples/text_and_image_2_audio.py +0 -59
  67. examples/text_gen.py +0 -30
  68. examples/text_gen_system_prompt.py +0 -29
  69. lollms_client-0.32.1.dist-info/METADATA +0 -854
  70. lollms_client-0.32.1.dist-info/RECORD +0 -101
  71. test/test_lollms_discussion.py +0 -368
  72. {lollms_client-0.32.1.dist-info → lollms_client-1.0.0.dist-info}/WHEEL +0 -0
  73. {lollms_client-0.32.1.dist-info → lollms_client-1.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -30,15 +30,9 @@ class LollmsClient():
30
30
  Provides a unified interface to manage and use different bindings for various modalities.
31
31
  """
32
32
  def __init__(self,
33
- # LLM Binding Parameters
34
- binding_name: str = "lollms",
35
- host_address: Optional[str] = None, # Shared host address (for service based bindings) default for all bindings if not specified
36
- models_path: Optional[str] = None, # Shared models folder path (for local file based bindings) default for all bindings if not specified
37
- model_name: str = "",
38
- llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
39
- llm_binding_config: Optional[Dict[str, any]] = None,
40
33
 
41
34
  # Optional Modality Binding Names
35
+ llm_binding_name: str = "lollms",
42
36
  tts_binding_name: Optional[str] = None,
43
37
  tti_binding_name: Optional[str] = None,
44
38
  stt_binding_name: Optional[str] = None,
@@ -47,6 +41,7 @@ class LollmsClient():
47
41
  mcp_binding_name: Optional[str] = None,
48
42
 
49
43
  # Modality Binding Directories
44
+ llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
50
45
  tts_bindings_dir: Path = Path(__file__).parent / "tts_bindings",
51
46
  tti_bindings_dir: Path = Path(__file__).parent / "tti_bindings",
52
47
  stt_bindings_dir: Path = Path(__file__).parent / "stt_bindings",
@@ -55,28 +50,13 @@ class LollmsClient():
55
50
  mcp_bindings_dir: Path = Path(__file__).parent / "mcp_bindings",
56
51
 
57
52
  # Configurations
53
+ llm_binding_config: Optional[Dict[str, any]] = None,
58
54
  tts_binding_config: Optional[Dict[str, any]] = None,
59
55
  tti_binding_config: Optional[Dict[str, any]] = None,
60
56
  stt_binding_config: Optional[Dict[str, any]] = None,
61
57
  ttv_binding_config: Optional[Dict[str, any]] = None,
62
58
  ttm_binding_config: Optional[Dict[str, any]] = None,
63
59
  mcp_binding_config: Optional[Dict[str, any]] = None,
64
-
65
- # General Parameters (mostly defaults for LLM generation)
66
- service_key: Optional[str] = None, # Shared service key/client_id
67
- verify_ssl_certificate: bool = True,
68
- ctx_size: Optional[int|None] = None,
69
- n_predict: Optional[int|None] = None,
70
- stream: bool = False,
71
- temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
72
- top_k: int = 40, # Ollama default is 40
73
- top_p: float = 0.9, # Ollama default is 0.9
74
- repeat_penalty: float = 1.1, # Ollama default is 1.1
75
- repeat_last_n: int = 64, # Ollama default is 64
76
-
77
- seed: Optional[int] = None,
78
- n_threads: int = 8,
79
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
80
60
  user_name ="user",
81
61
  ai_name = "assistant",
82
62
  **kwargs
@@ -125,27 +105,20 @@ class LollmsClient():
125
105
  Raises:
126
106
  ValueError: If the primary LLM binding cannot be created.
127
107
  """
128
- self.host_address = host_address # Store initial preference
129
- self.models_path = models_path
130
- self.service_key = service_key
131
- self.verify_ssl_certificate = verify_ssl_certificate
132
-
133
108
  # --- LLM Binding Setup ---
134
- self.binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
135
- self.binding = self.binding_manager.create_binding(
136
- binding_name=binding_name,
137
- host_address=host_address, # Pass initial host preference
138
- models_path=models_path,
139
- model_name=model_name,
140
- service_key=service_key,
141
- verify_ssl_certificate=verify_ssl_certificate,
142
- # Pass LLM specific config if needed
143
- **(llm_binding_config or {})
109
+ self.llm_binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
110
+ self.llm = self.llm_binding_manager.create_binding(
111
+ binding_name=llm_binding_name,
112
+ **{
113
+ k: v
114
+ for k, v in (llm_binding_config or {}).items()
115
+ if k != "binding_name"
116
+ }
144
117
  )
145
118
 
146
- if self.binding is None:
147
- available = self.binding_manager.get_available_bindings()
148
- raise ValueError(f"Failed to create LLM binding: {binding_name}. Available: {available}")
119
+ if self.llm is None:
120
+ available = self.llm_binding_manager.get_available_bindings()
121
+ raise ValueError(f"Failed to create LLM binding: {llm_binding_name}. Available: {available}")
149
122
 
150
123
  # --- Modality Binding Setup ---
151
124
  self.tts_binding_manager = LollmsTTSBindingManager(tts_bindings_dir)
@@ -165,7 +138,11 @@ class LollmsClient():
165
138
  if tts_binding_name:
166
139
  self.tts = self.tts_binding_manager.create_binding(
167
140
  binding_name=tts_binding_name,
168
- **tts_binding_config
141
+ **{
142
+ k: v
143
+ for k, v in (tts_binding_config or {}).items()
144
+ if k != "binding_name"
145
+ }
169
146
  )
170
147
  if self.tts is None:
171
148
  ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
@@ -174,7 +151,11 @@ class LollmsClient():
174
151
  if tti_binding_config:
175
152
  self.tti = self.tti_binding_manager.create_binding(
176
153
  binding_name=tti_binding_name,
177
- **tti_binding_config
154
+ **{
155
+ k: v
156
+ for k, v in (tti_binding_config or {}).items()
157
+ if k != "binding_name"
158
+ }
178
159
  )
179
160
  else:
180
161
  self.tti = self.tti_binding_manager.create_binding(
@@ -187,8 +168,13 @@ class LollmsClient():
187
168
  if stt_binding_config:
188
169
  self.stt = self.stt_binding_manager.create_binding(
189
170
  binding_name=stt_binding_name,
190
- **stt_binding_config
171
+ **{
172
+ k: v
173
+ for k, v in (stt_binding_config or {}).items()
174
+ if k != "binding_name"
175
+ }
191
176
  )
177
+
192
178
  else:
193
179
  self.stt = self.stt_binding_manager.create_binding(
194
180
  binding_name=stt_binding_name,
@@ -199,8 +185,13 @@ class LollmsClient():
199
185
  if ttv_binding_config:
200
186
  self.ttv = self.ttv_binding_manager.create_binding(
201
187
  binding_name=ttv_binding_name,
202
- **ttv_binding_config
188
+ **{
189
+ k: v
190
+ for k, v in ttv_binding_config.items()
191
+ if k != "binding_name"
192
+ }
203
193
  )
194
+
204
195
  else:
205
196
  self.ttv = self.ttv_binding_manager.create_binding(
206
197
  binding_name=ttv_binding_name
@@ -212,7 +203,11 @@ class LollmsClient():
212
203
  if ttm_binding_config:
213
204
  self.ttm = self.ttm_binding_manager.create_binding(
214
205
  binding_name=ttm_binding_name,
215
- **ttm_binding_config
206
+ **{
207
+ k: v
208
+ for k, v in (ttm_binding_config or {}).items()
209
+ if k != "binding_name"
210
+ }
216
211
  )
217
212
  else:
218
213
  self.ttm = self.ttm_binding_manager.create_binding(
@@ -224,8 +219,12 @@ class LollmsClient():
224
219
  if mcp_binding_name:
225
220
  if mcp_binding_config:
226
221
  self.mcp = self.mcp_binding_manager.create_binding(
227
- mcp_binding_name,
228
- **mcp_binding_config
222
+ binding_name=mcp_binding_name,
223
+ **{
224
+ k: v
225
+ for k, v in (mcp_binding_config or {}).items()
226
+ if k != "binding_name"
227
+ }
229
228
  )
230
229
  else:
231
230
  self.mcp = self.mcp_binding_manager.create_binding(
@@ -235,17 +234,6 @@ class LollmsClient():
235
234
  ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
236
235
 
237
236
  # --- Store Default Generation Parameters ---
238
- self.default_ctx_size = ctx_size
239
- self.default_n_predict = n_predict
240
- self.default_stream = stream
241
- self.default_temperature = temperature
242
- self.default_top_k = top_k
243
- self.default_top_p = top_p
244
- self.default_repeat_penalty = repeat_penalty
245
- self.default_repeat_last_n = repeat_last_n
246
- self.default_seed = seed
247
- self.default_n_threads = n_threads
248
- self.default_streaming_callback = streaming_callback
249
237
 
250
238
  # --- Prompt Formatting Attributes ---
251
239
  self.user_name = user_name
@@ -264,35 +252,30 @@ class LollmsClient():
264
252
  #
265
253
  def update_llm_binding(self, binding_name: str, config: Optional[Dict[str, Any]] = None):
266
254
  """Update the LLM binding with a new configuration."""
267
- self.binding = self.binding_manager.create_binding(
255
+ self.llm = self.llm_binding_manager.create_binding(
268
256
  binding_name=binding_name,
269
- host_address=self.host_address,
270
- models_path=self.models_path,
271
- model_name=self.binding.model_name, # Keep the same model name
272
- service_key=self.service_key,
273
- verify_ssl_certificate=self.verify_ssl_certificate,
274
257
  **(config or {})
275
258
  )
276
- if self.binding is None:
277
- available = self.binding_manager.get_available_bindings()
259
+ if self.llm is None:
260
+ available = self.llm_binding_manager.get_available_bindings()
278
261
  raise ValueError(f"Failed to update LLM binding: {binding_name}. Available: {available}")
279
262
 
280
263
  def get_ctx_size(self, model_name:str|None=None):
281
- if self.binding:
282
- ctx_size = self.binding.get_ctx_size(model_name)
283
- return ctx_size if ctx_size else self.default_ctx_size
264
+ if self.llm:
265
+ ctx_size = self.llm.get_ctx_size(model_name)
266
+ return ctx_size if ctx_size else self.llm.default_ctx_size
284
267
  else:
285
268
  return None
286
269
 
287
270
  def get_model_name(self):
288
- if self.binding:
289
- return self.binding.model_name
271
+ if self.llm:
272
+ return self.llm.model_name
290
273
  else:
291
274
  return None
292
275
 
293
276
  def set_model_name(self, model_name)->bool:
294
- if self.binding:
295
- self.binding.model_name = model_name
277
+ if self.llm:
278
+ self.llm.model_name = model_name
296
279
  return True
297
280
  else:
298
281
  return False
@@ -400,8 +383,8 @@ class LollmsClient():
400
383
  Returns:
401
384
  list: List of tokens.
402
385
  """
403
- if self.binding:
404
- return self.binding.tokenize(text)
386
+ if self.llm:
387
+ return self.llm.tokenize(text)
405
388
  raise RuntimeError("LLM binding not initialized.")
406
389
 
407
390
  def detokenize(self, tokens: list) -> str:
@@ -414,8 +397,8 @@ class LollmsClient():
414
397
  Returns:
415
398
  str: Detokenized text.
416
399
  """
417
- if self.binding:
418
- return self.binding.detokenize(tokens)
400
+ if self.llm:
401
+ return self.llm.detokenize(tokens)
419
402
  raise RuntimeError("LLM binding not initialized.")
420
403
  def count_tokens(self, text: str) -> int:
421
404
  """
@@ -427,8 +410,8 @@ class LollmsClient():
427
410
  Returns:
428
411
  int: Number of tokens.
429
412
  """
430
- if self.binding:
431
- return self.binding.count_tokens(text)
413
+ if self.llm:
414
+ return self.llm.count_tokens(text)
432
415
  raise RuntimeError("LLM binding not initialized.")
433
416
 
434
417
  def count_image_tokens(self, image: str) -> int:
@@ -441,8 +424,8 @@ class LollmsClient():
441
424
  Returns:
442
425
  int: Estimated number of tokens for the image. Returns -1 on error.
443
426
  """
444
- if self.binding:
445
- return self.binding.count_image_tokens(image)
427
+ if self.llm:
428
+ return self.llm.count_image_tokens(image)
446
429
  raise RuntimeError("LLM binding not initialized.")
447
430
 
448
431
  def get_model_details(self) -> dict:
@@ -452,8 +435,8 @@ class LollmsClient():
452
435
  Returns:
453
436
  dict: Model information dictionary.
454
437
  """
455
- if self.binding:
456
- return self.binding.get_model_info()
438
+ if self.llm:
439
+ return self.llm.get_model_info()
457
440
  raise RuntimeError("LLM binding not initialized.")
458
441
 
459
442
  def switch_model(self, model_name: str) -> bool:
@@ -466,8 +449,8 @@ class LollmsClient():
466
449
  Returns:
467
450
  bool: True if model loaded successfully, False otherwise.
468
451
  """
469
- if self.binding:
470
- return self.binding.load_model(model_name)
452
+ if self.llm:
453
+ return self.llm.load_model(model_name)
471
454
  raise RuntimeError("LLM binding not initialized.")
472
455
 
473
456
  def get_available_llm_bindings(self) -> List[str]:
@@ -477,7 +460,7 @@ class LollmsClient():
477
460
  Returns:
478
461
  List[str]: List of binding names that can be used for LLMs.
479
462
  """
480
- return self.binding_manager.get_available_bindings()
463
+ return self.llm_binding_manager.get_available_bindings()
481
464
 
482
465
  def generate_text(self,
483
466
  prompt: str,
@@ -523,11 +506,11 @@ class LollmsClient():
523
506
  Returns:
524
507
  Union[str, dict]: Generated text or error dictionary if failed.
525
508
  """
526
- if self.binding:
509
+ if self.llm:
527
510
 
528
- ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size if self.default_ctx_size else None
511
+ ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size if self.llm.default_ctx_size else None
529
512
  if ctx_size is None:
530
- ctx_size = self.binding.get_ctx_size()
513
+ ctx_size = self.llm.get_ctx_size()
531
514
  if ctx_size is None:
532
515
  ctx_size = 1024*8 # 1028*8= 8192 tokens, a common default for many models
533
516
  nb_input_tokens = self.count_tokens(prompt)+ (sum([self.count_image_tokens(image) for image in images]) if images else 0)
@@ -536,21 +519,21 @@ class LollmsClient():
536
519
  ASCIIColors.magenta(f"ctx_size : {ctx_size}")
537
520
  ASCIIColors.magenta(f"nb_input_tokens : {nb_input_tokens}")
538
521
 
539
- return self.binding.generate_text(
522
+ return self.llm.generate_text(
540
523
  prompt=prompt,
541
524
  images=images,
542
525
  system_prompt=system_prompt,
543
- n_predict=n_predict if n_predict else self.default_n_predict if self.default_n_predict else ctx_size - nb_input_tokens,
544
- stream=stream if stream is not None else self.default_stream,
545
- temperature=temperature if temperature is not None else self.default_temperature,
546
- top_k=top_k if top_k is not None else self.default_top_k,
547
- top_p=top_p if top_p is not None else self.default_top_p,
548
- repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
549
- repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
550
- seed=seed if seed is not None else self.default_seed,
551
- n_threads=n_threads if n_threads is not None else self.default_n_threads,
552
- ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
553
- streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
526
+ n_predict=n_predict if n_predict else self.llm.default_n_predict if self.llm.default_n_predict else ctx_size - nb_input_tokens,
527
+ stream=stream if stream is not None else self.llm.default_stream,
528
+ temperature=temperature if temperature is not None else self.llm.default_temperature,
529
+ top_k=top_k if top_k is not None else self.llm.default_top_k,
530
+ top_p=top_p if top_p is not None else self.llm.default_top_p,
531
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
532
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
533
+ seed=seed if seed is not None else self.llm.default_seed,
534
+ n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
535
+ ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
536
+ streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback,
554
537
  split= split,
555
538
  user_keyword=user_keyword,
556
539
  ai_keyword=ai_keyword
@@ -592,20 +575,20 @@ class LollmsClient():
592
575
  Returns:
593
576
  Union[str, dict]: Generated text or error dictionary if failed.
594
577
  """
595
- if self.binding:
596
- return self.binding.generate_from_messages(
578
+ if self.llm:
579
+ return self.llm.generate_from_messages(
597
580
  messages=messages,
598
- n_predict=n_predict if n_predict is not None else self.default_n_predict,
599
- stream=stream if stream is not None else self.default_stream,
600
- temperature=temperature if temperature is not None else self.default_temperature,
601
- top_k=top_k if top_k is not None else self.default_top_k,
602
- top_p=top_p if top_p is not None else self.default_top_p,
603
- repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
604
- repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
605
- seed=seed if seed is not None else self.default_seed,
606
- n_threads=n_threads if n_threads is not None else self.default_n_threads,
607
- ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
608
- streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
581
+ n_predict=n_predict if n_predict is not None else self.llm.default_n_predict,
582
+ stream=stream if stream is not None else self.llm.default_stream,
583
+ temperature=temperature if temperature is not None else self.llm.default_temperature,
584
+ top_k=top_k if top_k is not None else self.llm.default_top_k,
585
+ top_p=top_p if top_p is not None else self.llm.default_top_p,
586
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
587
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
588
+ seed=seed if seed is not None else self.llm.default_seed,
589
+ n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
590
+ ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
591
+ streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback,
609
592
  )
610
593
  raise RuntimeError("LLM binding not initialized.")
611
594
 
@@ -650,21 +633,21 @@ class LollmsClient():
650
633
  Returns:
651
634
  Union[str, dict]: Generated text or an error dictionary if failed.
652
635
  """
653
- if self.binding:
654
- return self.binding.chat(
636
+ if self.llm:
637
+ return self.llm.chat(
655
638
  discussion=discussion,
656
639
  branch_tip_id=branch_tip_id,
657
- n_predict=n_predict if n_predict is not None else self.default_n_predict,
658
- stream=stream if stream is not None else True if streaming_callback is not None else self.default_stream,
659
- temperature=temperature if temperature is not None else self.default_temperature,
660
- top_k=top_k if top_k is not None else self.default_top_k,
661
- top_p=top_p if top_p is not None else self.default_top_p,
662
- repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
663
- repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
664
- seed=seed if seed is not None else self.default_seed,
665
- n_threads=n_threads if n_threads is not None else self.default_n_threads,
666
- ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
667
- streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
640
+ n_predict=n_predict if n_predict is not None else self.llm.default_n_predict,
641
+ stream=stream if stream is not None else True if streaming_callback is not None else self.llm.default_stream,
642
+ temperature=temperature if temperature is not None else self.llm.default_temperature,
643
+ top_k=top_k if top_k is not None else self.llm.default_top_k,
644
+ top_p=top_p if top_p is not None else self.llm.default_top_p,
645
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.llm.default_repeat_penalty,
646
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.llm.default_repeat_last_n,
647
+ seed=seed if seed is not None else self.llm.default_seed,
648
+ n_threads=n_threads if n_threads is not None else self.llm.default_n_threads,
649
+ ctx_size = ctx_size if ctx_size is not None else self.llm.default_ctx_size,
650
+ streaming_callback=streaming_callback if streaming_callback is not None else self.llm.default_streaming_callback
668
651
  )
669
652
  raise RuntimeError("LLM binding not initialized.")
670
653
 
@@ -679,15 +662,15 @@ class LollmsClient():
679
662
  Returns:
680
663
  list: List of embeddings.
681
664
  """
682
- if self.binding:
683
- return self.binding.embed(text, **kwargs)
665
+ if self.llm:
666
+ return self.llm.embed(text, **kwargs)
684
667
  raise RuntimeError("LLM binding not initialized.")
685
668
 
686
669
 
687
670
  def listModels(self):
688
671
  """Lists models available to the current LLM binding."""
689
- if self.binding:
690
- return self.binding.listModels()
672
+ if self.llm:
673
+ return self.llm.listModels()
691
674
  raise RuntimeError("LLM binding not initialized.")
692
675
 
693
676
  # --- Convenience Methods for Lollms LLM Binding Features ---
@@ -698,8 +681,8 @@ class LollmsClient():
698
681
  Returns:
699
682
  Union[List[Dict], Dict]: List of personality dicts or error dict.
700
683
  """
701
- if self.binding and hasattr(self.binding, 'lollms_listMountedPersonalities'):
702
- return self.binding.lollms_listMountedPersonalities()
684
+ if self.llm and hasattr(self.llm, 'lollms_listMountedPersonalities'):
685
+ return self.llm.lollms_listMountedPersonalities()
703
686
  else:
704
687
  ASCIIColors.warning("listMountedPersonalities is only available for the 'lollms' LLM binding.")
705
688
  return {"status": False, "error": "Functionality not available for the current binding"}
@@ -910,7 +893,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
910
893
  streaming_callback: Optional[Callable[[str, int, Optional[Dict], Optional[List]], bool]] = None,
911
894
  **llm_generation_kwargs
912
895
  ) -> Dict[str, Any]:
913
- if not self.binding or not self.mcp:
896
+ if not self.llm or not self.mcp:
914
897
  return {"final_answer": "", "tool_calls": [], "error": "LLM or MCP binding not initialized."}
915
898
 
916
899
  turn_history: List[Dict[str, Any]] = []
@@ -1076,7 +1059,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
1076
1059
  "- Do not make up information. If the findings are insufficient to fully answer the request, state what you found and what remains unanswered.\n"
1077
1060
  "- Format your response clearly using markdown where appropriate.\n"
1078
1061
  )
1079
- final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.default_temperature, **(llm_generation_kwargs or {}))
1062
+ final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature if final_answer_temperature is not None else self.llm.default_temperature, **(llm_generation_kwargs or {}))
1080
1063
 
1081
1064
  if streaming_callback:
1082
1065
  streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "final_answer_synthesis"}, turn_history = turn_history)
@@ -1117,7 +1100,7 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
1117
1100
  """
1118
1101
  Enhanced RAG with dynamic objective refinement and a knowledge scratchpad.
1119
1102
  """
1120
- if not self.binding:
1103
+ if not self.llm:
1121
1104
  return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
1122
1105
 
1123
1106
  effective_ctx_size = ctx_size or getattr(self, "default_ctx_size", 20000)
@@ -1456,394 +1439,220 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1456
1439
  new_scratchpad_text = self.generate_text(prompt=synthesis_prompt, n_predict=1024, temperature=0.0)
1457
1440
  return self.remove_thinking_blocks(new_scratchpad_text).strip()
1458
1441
 
1459
-
1460
1442
  def generate_with_mcp_rag(
1461
1443
  self,
1462
1444
  prompt: str,
1445
+ context: Optional[str] = None,
1463
1446
  use_mcps: Union[None, bool, List[str]] = None,
1464
1447
  use_data_store: Union[None, Dict[str, Callable]] = None,
1465
1448
  system_prompt: str = None,
1466
1449
  reasoning_system_prompt: str = "You are a logical AI assistant. Your task is to achieve the user's goal by thinking step-by-step and using the available tools.",
1467
1450
  images: Optional[List[str]] = None,
1468
- max_reasoning_steps: int = None,
1469
- decision_temperature: float = None,
1470
- final_answer_temperature: float = None,
1451
+ max_reasoning_steps: int = 10,
1452
+ decision_temperature: float = 0.5,
1453
+ final_answer_temperature: float = 0.7,
1471
1454
  streaming_callback: Optional[Callable[[str, 'MSG_TYPE', Optional[Dict], Optional[List]], bool]] = None,
1472
- rag_top_k: int = None,
1473
- rag_min_similarity_percent: float = None,
1474
- output_summarization_threshold: int = None, # In tokens
1455
+ rag_top_k: int = 5,
1456
+ rag_min_similarity_percent: float = 50.0,
1457
+ output_summarization_threshold: int = 500, # In tokens
1458
+ force_mcp_use: bool = False,
1475
1459
  debug: bool = False,
1476
1460
  **llm_generation_kwargs
1477
1461
  ) -> Dict[str, Any]:
1478
- """Generates a response using a dynamic agent with stateful, ID-based step tracking.
1479
-
1480
- This method orchestrates a sophisticated agentic process where an AI
1481
- repeatedly observes its state, thinks about the next best action, and
1482
- acts. This "observe-think-act" loop allows the agent to adapt to new
1483
- information, recover from failures, and build a comprehensive
1484
- understanding of the problem before responding.
1485
-
1486
- A key feature is its stateful step notification system, designed for rich
1487
- UI integration. When a step starts, it sends a `step_start` message with
1488
- a unique ID and description. When it finishes, it sends a `step_end`
1489
- message with the same ID, allowing a user interface to track the
1490
- progress of specific, long-running tasks like tool calls.
1491
-
1462
+ """
1463
+ Orchestrates a sophisticated and robust agentic process to generate a response.
1464
+
1465
+ This method employs a dynamic "observe-think-act" loop with several advanced architectural
1466
+ patterns for improved robustness and efficiency, particularly when handling code.
1467
+
1468
+ Key Features:
1469
+ - **Context-Aware Asset Ingestion**: The agent automatically detects if the `context`
1470
+ parameter (representing the previous turn) contains code. If so, it registers that
1471
+ code as an asset with a UUID, preventing the LLM from trying to paste large code
1472
+ blocks into its prompts and avoiding JSON errors.
1473
+ - **Tool Perception Filtering**: Identifies tools that directly consume code and HIDES
1474
+ them from the LLM's view, forcing it to use the safer `generate_and_call` workflow.
1475
+ - **Forced Safe Workflow**: The `generate_and_call` meta-tool is the ONLY way the agent
1476
+ can execute code, ensuring a robust, error-free, and efficient process.
1477
+ - **Verbose Internal Logging**: The `generate_and_call` tool is now fully instrumented
1478
+ with detailed logging and robust error handling to ensure every failure is visible
1479
+ and diagnosable, preventing silent loops.
1480
+
1492
1481
  Args:
1493
- prompt: The user's initial prompt or question.
1482
+ prompt: The user's initial prompt or question for the current turn.
1483
+ context: An optional string containing the content of the previous turn.
1494
1484
  use_mcps: Controls MCP tool usage.
1495
1485
  use_data_store: Controls RAG usage.
1496
- system_prompt: The main system prompt for the final answer generation.
1497
- reasoning_system_prompt: The system prompt for the iterative
1498
- decision-making process.
1499
- images: A list of base64-encoded images provided by the user.
1500
- max_reasoning_steps: The maximum number of reasoning cycles.
1501
- decision_temperature: The temperature for the LLM's decision-making.
1502
- final_answer_temperature: The temperature for the final answer synthesis.
1503
- streaming_callback: A function for real-time output of tokens and steps.
1504
- rag_top_k: The number of top documents to retrieve during RAG.
1486
+ system_prompt: Main system prompt for the final answer.
1487
+ reasoning_system_prompt: System prompt for the decision-making process.
1488
+ images: A list of base64-encoded images provided by the user for the current turn.
1489
+ max_reasoning_steps: Maximum number of reasoning cycles.
1490
+ decision_temperature: Temperature for LLM's decision-making.
1491
+ final_answer_temperature: Temperature for final answer synthesis.
1492
+ streaming_callback: Function for real-time output of tokens and steps.
1493
+ rag_top_k: Number of top documents to retrieve during RAG.
1505
1494
  rag_min_similarity_percent: Minimum similarity for RAG results.
1506
- output_summarization_threshold: The token count that triggers automatic
1507
- summarization of a tool's text output.
1508
- debug : If true, we'll report the detailed promptin and response information
1495
+ output_summarization_threshold: Token count that triggers summarization.
1496
+ force_mcp_use: If True, bypasses the "fast answer" check.
1497
+ debug: If True, prints detailed prompting and response information.
1509
1498
  **llm_generation_kwargs: Additional keyword arguments for LLM calls.
1510
1499
 
1511
1500
  Returns:
1512
- A dictionary containing the agent's full run, including the final
1513
- answer, the complete internal scratchpad, a log of tool calls,
1514
- any retrieved RAG sources, and other metadata.
1501
+ A dictionary containing the agent's full run.
1515
1502
  """
1516
- reasoning_step_id = None
1517
- if not self.binding:
1503
+ if not self.llm:
1518
1504
  return {"final_answer": "", "tool_calls": [], "sources": [], "error": "LLM binding not initialized."}
1505
+ if max_reasoning_steps is None:
1506
+ max_reasoning_steps = 10
1507
+ # --- Helper Functions ---
1508
+ def log_event(desc, event_type=MSG_TYPE.MSG_TYPE_CHUNK, meta=None, event_id=None) -> Optional[str]:
1509
+ if not streaming_callback: return None
1510
+ is_start = event_type == MSG_TYPE.MSG_TYPE_STEP_START
1511
+ event_id = str(uuid.uuid4()) if is_start and not event_id else event_id
1512
+ params = {"type": event_type, "description": desc, **(meta or {})}
1513
+ if event_id: params["id"] = event_id
1514
+ streaming_callback(desc, event_type, params)
1515
+ return event_id
1519
1516
 
1520
- if not max_reasoning_steps:
1521
- max_reasoning_steps= 10
1522
- if not rag_min_similarity_percent:
1523
- rag_min_similarity_percent= 50
1524
- if not rag_top_k:
1525
- rag_top_k = 5
1526
- if not decision_temperature:
1527
- decision_temperature = 0.7
1528
- if not output_summarization_threshold:
1529
- output_summarization_threshold = 500
1530
-
1531
- events = []
1532
-
1533
-
1534
- # --- Initialize Agent State ---
1535
- sources_this_turn: List[Dict[str, Any]] = []
1536
- tool_calls_this_turn: List[Dict[str, Any]] = []
1537
- generated_code_store: Dict[str, str] = {} # NEW: Store for UUID -> code
1538
- original_user_prompt = prompt
1539
-
1540
- initial_state_parts = [
1541
- "### Initial State",
1542
- "- My goal is to address the user's request.",
1543
- "- I have not taken any actions yet."
1544
- ]
1545
- if images:
1546
- initial_state_parts.append(f"- The user has provided {len(images)} image(s) for context.")
1547
- current_scratchpad = "\n".join(initial_state_parts)
1548
-
1549
- def log_prompt(prompt, type="prompt"):
1550
- ASCIIColors.cyan(f"** DEBUG: {type} **")
1551
- ASCIIColors.magenta(prompt[-15000:])
1552
- prompt_size = self.count_tokens(prompt)
1553
- ASCIIColors.red(f"Prompt size:{prompt_size}/{self.default_ctx_size}")
1517
+ def log_prompt(title: str, prompt_text: str):
1518
+ if not debug: return
1519
+ ASCIIColors.cyan(f"** DEBUG: {title} **")
1520
+ ASCIIColors.magenta(prompt_text[-15000:])
1521
+ prompt_size = self.count_tokens(prompt_text)
1522
+ ASCIIColors.red(f"Prompt size:{prompt_size}/{self.llm.default_ctx_size}")
1554
1523
  ASCIIColors.cyan(f"** DEBUG: DONE **")
1555
1524
 
1556
- # --- Define Inner Helper Functions ---
1557
- def log_event(
1558
- description: str,
1559
- event_type: MSG_TYPE = MSG_TYPE.MSG_TYPE_CHUNK,
1560
- metadata: Optional[Dict] = None,
1561
- event_id=None
1562
- ) -> Optional[str]:
1563
- if not streaming_callback: return None
1564
- event_id = str(uuid.uuid4()) if event_type==MSG_TYPE.MSG_TYPE_STEP_START else event_id
1565
- params = {"type": event_type, "description": description, **(metadata or {})}
1566
- params["id"] = event_id
1567
- streaming_callback(description, event_type, params)
1568
- return event_id
1525
+ # --- 1. Initialize State & Context-Aware Asset Ingestion ---
1526
+ original_user_prompt, tool_calls_this_turn, sources_this_turn = prompt, [], []
1527
+ asset_store: Dict[str, Dict] = {}
1528
+ initial_state_parts = ["### Initial State", "- My goal is to address the user's request comprehensively."]
1529
+ if images:
1530
+ for img_b64 in images:
1531
+ img_uuid = str(uuid.uuid4())
1532
+ asset_store[img_uuid] = {"type": "image", "content": img_b64}
1533
+ initial_state_parts.append(f"- User provided image, asset ID: {img_uuid}")
1534
+ if context:
1535
+ code_blocks = re.findall(r"```(?:\w+)?\n([\s\S]+?)\n```", context)
1536
+ if code_blocks:
1537
+ last_code_block = code_blocks[-1]
1538
+ code_uuid = str(uuid.uuid4())
1539
+ asset_store[code_uuid] = {"type": "code", "content": last_code_block}
1540
+ initial_state_parts.append(f"- The user's request likely refers to a code block from the previous turn's context. It has been registered as asset ID: {code_uuid}")
1541
+ current_scratchpad = "\n".join(initial_state_parts)
1569
1542
 
1570
- def _substitute_code_uuids_recursive(data: Any, code_store: Dict[str, str]):
1571
- """Recursively finds and replaces code UUIDs in tool parameters."""
1572
- if isinstance(data, dict):
1573
- for key, value in data.items():
1574
- if isinstance(value, str) and value in code_store:
1575
- data[key] = code_store[value]
1576
- else:
1577
- _substitute_code_uuids_recursive(value, code_store)
1578
- elif isinstance(data, list):
1579
- for i, item in enumerate(data):
1580
- if isinstance(item, str) and item in code_store:
1581
- data[i] = code_store[item]
1582
- else:
1583
- _substitute_code_uuids_recursive(item, code_store)
1584
-
1585
- discovery_step_id = log_event("**Discovering tools**",MSG_TYPE.MSG_TYPE_STEP_START)
1586
- # --- 1. Discover Available Tools ---
1587
- available_tools = []
1588
- if use_mcps and self.mcp:
1589
- discovered_tools = self.mcp.discover_tools(force_refresh=True)
1590
- if isinstance(use_mcps, list):
1591
- available_tools.extend([t for t in discovered_tools if t["name"] in use_mcps])
1592
-
1543
+ # --- 2. Tool Discovery and Filtering ---
1544
+ discovery_step_id = log_event("Discovering and filtering tools...", MSG_TYPE.MSG_TYPE_STEP_START)
1545
+ all_discovered_tools, visible_tools, code_consuming_tools = [], [], set()
1546
+ if use_mcps and hasattr(self, 'mcp'):
1547
+ mcp_tools = self.mcp.discover_tools(force_refresh=True)
1548
+ if isinstance(use_mcps, list): all_discovered_tools.extend([t for t in mcp_tools if t["name"] in use_mcps])
1549
+ elif use_mcps is True: all_discovered_tools.extend(mcp_tools)
1550
+ code_param_keywords = {'code', 'script', 'python_code', 'javascript', 'html', 'css'}
1551
+ for tool in all_discovered_tools:
1552
+ if any(p in code_param_keywords for p in tool.get("input_schema", {}).get("properties", {})): code_consuming_tools.add(tool['name'])
1553
+ else: visible_tools.append(tool)
1593
1554
  if use_data_store:
1594
- for store_name in use_data_store:
1595
- available_tools.append({
1596
- "name": f"research::{store_name}",
1597
- "description": f"Queries the '{store_name}' knowledge base for relevant information.",
1598
- "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}
1599
- })
1555
+ for name, info in use_data_store.items(): visible_tools.append({"name": f"research::{name}", "description": info.get("description", f"Queries '{name}'."), "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}})
1556
+ log_event(f"Made {len(visible_tools)} tools visible (hid {len(code_consuming_tools)} code tools).", MSG_TYPE.MSG_TYPE_STEP_END, meta={"visible": len(visible_tools), "hidden": len(code_consuming_tools), "hidden_list": list(code_consuming_tools)}, event_id=discovery_step_id)
1557
+
1558
+ # --- 3. Fast Answer Path (Not shown for brevity, but retained) ---
1600
1559
 
1601
- # Add the new prepare_code tool definition
1602
- available_tools.append({
1603
- "name": "local_tools::prepare_code",
1604
- "description": """Generates and stores code into a buffer to be used by another tool. Never put code into a tool directly, first call this to generate the code and then paste the uuid in the tool that requires code. Only use this for generating code to be sent to another tool. You can put the uuid of the generated code into the fields that require long code among the tools. If no tool requires code as input do not use prepare_code. prepare_code do not execute the code nor does it audit it.""",
1605
- "input_schema": {"type": "object", "properties": {"prompt": {"type": "string", "description": "A detailed natural language description of the code's purpose and requirements."}, "language": {"type": "string", "description": "The programming language of the generated code. By default it uses python."}}, "required": ["prompt"]}
1606
- })
1607
- available_tools.append({
1608
- "name": "local_tools::view_generated_code",
1609
- "description": """Views the code that was generated and stored to the buffer. You need to have a valid uuid of the generated code.""",
1610
- "input_schema": {"type": "object", "properties": {"code_id": {"type": "string", "description": "The case sensitive uuid of the generated code."}}, "required": ["uuid"]}
1611
- })
1612
- # Add the new refactor_scratchpad tool definition
1613
- available_tools.append({
1614
- "name": "local_tools::refactor_scratchpad",
1615
- "description": "Rewrites the scratchpad content to clean it and reorganize it. Only use if the scratchpad is messy or contains too much information compared to what you need.",
1616
- "input_schema": {"type": "object", "properties": {}}
1617
- })
1618
-
1619
- formatted_tools_list = "\n".join([f"**{t['name']}**:\n{t['description']}\ninput schema:\n{json.dumps(t['input_schema'])}" for t in available_tools])
1620
- formatted_tools_list += "\n**local_tools::request_clarification**:\nUse if the user's request is ambiguous and you can not infer a clear idea of his intent. this tool has no parameters."
1621
- formatted_tools_list += "\n**local_tools::final_answer**:\nUse when you are ready to respond to the user. this tool has no parameters."
1622
-
1623
- if discovery_step_id: log_event(f"**Discovering tools** found {len(available_tools)} tools",MSG_TYPE.MSG_TYPE_STEP_END, event_id=discovery_step_id)
1624
-
1625
- # --- 2. Dynamic Reasoning Loop ---
1560
+ # --- 4. Format Tools for Main Loop ---
1561
+ CODE_PLACEHOLDER = "{GENERATED_CODE}"
1562
+ built_in_tools = [{"name": "local_tools::generate_and_call", "description": f"CRITICAL: To run or modify code, you MUST use this tool. It generates code (e.g., to fix code from an asset) and then calls a tool with it. Refer to existing code using its asset ID. Use '{CODE_PLACEHOLDER}' in `next_tool_params` for the NEWLY generated code.", "input_schema": { "type": "object", "properties": { "code_generation_prompt": {"type": "string"}, "language": {"type": "string"}, "next_tool_name": {"type": "string"}, "next_tool_params": {"type": "object"}}, "required": ["code_generation_prompt", "next_tool_name", "next_tool_params"]}}, {"name": "local_tools::refactor_scratchpad", "description": "Rewrites the scratchpad.", "input_schema": {}}, {"name": "local_tools::request_clarification", "description": "Asks the user for more information.", "input_schema": {"type": "object", "properties": {"question_to_user": {"type": "string"}}, "required": ["question_to_user"]}}, {"name": "local_tools::final_answer", "description": "Provides the final answer.", "input_schema": {}}]
1563
+ all_visible_tools = visible_tools + built_in_tools
1564
+ formatted_tools_list = "\n".join([f"**{t['name']}**:\n- Description: {t['description']}" for t in all_visible_tools])
1565
+
1566
+ # --- 5. Dynamic Reasoning Loop ---
1626
1567
  for i in range(max_reasoning_steps):
1568
+ reasoning_step_id = log_event(f"Reasoning Step {i+1}/{max_reasoning_steps}", MSG_TYPE.MSG_TYPE_STEP_START)
1627
1569
  try:
1628
- reasoning_step_id = log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_START)
1629
- user_context = f'Original User Request: "{original_user_prompt}"'
1630
- if images: user_context += f'\n(Note: {len(images)} image(s) were provided with this request.)'
1570
+ reasoning_prompt = f"""--- AVAILABLE ACTIONS ---\n{formatted_tools_list}\n\n--- YOUR INTERNAL SCRATCHPAD ---\n{current_scratchpad}\n--- END SCRATCHPAD ---\n\n**INSTRUCTIONS:**\n1. **OBSERVE:** Review your scratchpad, especially available asset IDs.\n2. **THINK:** Based on '{original_user_prompt}', what is the single next logical action using ONLY the available actions?\n3. **ACT:** Formulate your decision as a JSON object. Do NOT paste large code blocks into parameters; use their asset IDs instead."""
1571
+ action_schema = {"thought": "My reasoning.", "action": {"tool_name": "string", "tool_params": "object"}}
1572
+ action_data = self.generate_structured_content(prompt=reasoning_prompt, schema=action_schema, system_prompt=reasoning_system_prompt, temperature=decision_temperature, **llm_generation_kwargs)
1631
1573
 
1632
- reasoning_prompt_template = f"""
1633
- --- AVAILABLE TOOLS ---
1634
- {formatted_tools_list}
1635
- --- CONTEXT ---
1636
- {user_context}
1637
- --- YOUR INTERNAL SCRATCHPAD (Work History & Analysis) ---
1638
- {current_scratchpad}
1639
- --- END OF SCRATCHPAD ---
1640
-
1641
- **INSTRUCTIONS:**
1642
- 1. **OBSERVE:** Review the `Observation` from your most recent step in the scratchpad.
1643
- 2. **THINK:**
1644
- - Does the latest observation completely fulfill the user's original request?
1645
- - If YES, your next action MUST be to use the `final_answer` tool.
1646
- - If NO, what is the single next logical step needed? This may involve writing code first with `prepare_code`, then using another tool.
1647
- - If you are stuck or the request is ambiguous, use `local_tools::request_clarification`.
1648
- 3. **ACT:** Formulate your decision as a JSON object.
1649
- ** Important ** Always use this format alias::tool_name to call the tool
1650
- """
1651
- action_template = {
1652
- "thought": "My detailed analysis of the last observation and my reasoning for the next action and how it integrates with my global plan.",
1653
- "action": {
1654
- "tool_name": "The single tool to use (e.g., 'local_tools::prepare_code', 'local_tools::final_answer').",
1655
- "tool_params": {"param1": "value1"},
1656
- "clarification_question": "(string, ONLY if tool_name is 'local_tools::request_clarification')"
1657
- }
1658
- }
1659
- if debug: log_prompt(reasoning_prompt_template, f"REASONING PROMPT (Step {i+1})")
1660
- structured_action_response = self.generate_code(
1661
- prompt=reasoning_prompt_template, template=json.dumps(action_template, indent=2),
1662
- system_prompt=reasoning_system_prompt, temperature=decision_temperature,
1663
- images=images if i == 0 else None
1664
- )
1665
- if structured_action_response is None:
1666
- log_event("**Error generating thought.** Retrying..", MSG_TYPE.MSG_TYPE_EXCEPTION)
1574
+ if not action_data or not isinstance(action_data.get("action"), dict):
1575
+ log_event("Failed to generate a valid JSON action. Will retry.", MSG_TYPE.MSG_TYPE_WARNING, event_id=reasoning_step_id)
1576
+ current_scratchpad += "\n\n### Step Failure\n- **Error:** Failed to produce a valid JSON action."
1667
1577
  continue
1668
- if debug: log_prompt(structured_action_response, f"RAW REASONING RESPONSE (Step {i+1})")
1669
-
1670
- try:
1671
- action_data = robust_json_parser(structured_action_response)
1672
- thought = action_data.get("thought", "No thought was generated.")
1673
- action = action_data.get("action", {})
1674
- if isinstance(action,str):
1675
- tool_name = action
1676
- tool_params = {}
1677
- else:
1678
- tool_name = action.get("tool_name")
1679
- tool_params = action.get("tool_params", {})
1680
- except (json.JSONDecodeError, TypeError) as e:
1681
- current_scratchpad += f"\n\n### Step {i+1} Failure\n- **Error:** Failed to generate a valid JSON action: {e}"
1682
- log_event(f"Step Failure: Invalid JSON action.", MSG_TYPE.MSG_TYPE_EXCEPTION, metadata={"details": str(e)})
1683
- if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, metadata={"error": str(e)}, event_id=reasoning_step_id)
1684
-
1685
1578
 
1579
+ thought, action = action_data.get("thought", ""), action_data.get("action", {})
1580
+ tool_name, tool_params = action.get("tool_name"), action.get("tool_params", {})
1686
1581
  current_scratchpad += f"\n\n### Step {i+1}: Thought\n{thought}"
1687
- log_event(f"{thought}", MSG_TYPE.MSG_TYPE_THOUGHT_CONTENT)
1582
+ log_event(thought, MSG_TYPE.MSG_TYPE_THOUGHT_CONTENT)
1688
1583
 
1689
- if not tool_name:
1690
- # Handle error...
1691
- break
1692
-
1693
- # --- Handle special, non-executing tools ---
1584
+ if tool_name == "local_tools::final_answer": break
1694
1585
  if tool_name == "local_tools::request_clarification":
1695
- # Handle clarification...
1696
- if isinstance(action, dict):
1697
- return {"final_answer": action.get("clarification_question", "Could you please provide more details?"), "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
1698
- elif isinstance(action, str):
1699
- return {"final_answer": action, "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
1700
- else:
1701
- return {"final_answer": "Could you please provide more details?", "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
1702
- if tool_name == "local_tools::final_answer":
1703
- current_scratchpad += f"\n\n### Step {i+1}: Action\n- **Action:** Decided to formulate the final answer."
1704
- log_event("**Action**: Formulate final answer.", MSG_TYPE.MSG_TYPE_THOUGHT_CHUNK)
1705
- if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**",MSG_TYPE.MSG_TYPE_STEP_END, event_id=reasoning_step_id)
1706
- break
1586
+ return {"final_answer": tool_params.get("question_to_user", "?"), "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": True, "error": None}
1707
1587
 
1708
- # --- Handle the `prepare_code` tool specifically ---
1709
- if tool_name == 'local_tools::prepare_code':
1710
- code_gen_id = log_event(f"Generating code...", MSG_TYPE.MSG_TYPE_STEP_START, metadata={"name": "prepare_code", "id": "gencode"})
1711
- code_prompt = tool_params.get("prompt", "Generate the requested code.")
1712
-
1713
- # Use a specific system prompt to get raw code
1714
- code_generation_system_prompt = "You are a code generation assistant. Generate ONLY the raw code based on the user's request. Do not add any explanations, markdown code fences, or other text outside of the code itself."
1715
- generated_code = self.generate_code(prompt=code_prompt, system_prompt=code_generation_system_prompt, **llm_generation_kwargs)
1716
-
1717
- code_uuid = str(uuid.uuid4())
1718
- generated_code_store[code_uuid] = generated_code
1719
-
1720
- tool_result = {"status": "success", "code_id": code_uuid, "summary": f"Code generated successfully. Use this ID in the next tool call that requires code."}
1721
- tool_calls_this_turn.append({"name": "prepare_code", "params": tool_params, "result": tool_result})
1722
- observation_text = f"```json\n{json.dumps(tool_result, indent=2)}\n```"
1723
- current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
1724
- log_event(f"Code generated with ID: {code_uuid}", MSG_TYPE.MSG_TYPE_OBSERVATION)
1725
- if code_gen_id: log_event(f"Generating code...", MSG_TYPE.MSG_TYPE_TOOL_CALL, metadata={"id": code_gen_id, "result": tool_result})
1726
- if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id= reasoning_step_id)
1727
- continue # Go to the next reasoning step immediately
1728
- if tool_name == 'local_tools::view_generated_code':
1729
- code_id = tool_params.get("code_id")
1730
- if code_id:
1731
- tool_result = {"status": "success", "code_id": code_id, "generated_code":generated_code_store[code_uuid]}
1732
- else:
1733
- tool_result = {"status": "error", "code_id": code_id, "error":"Unknown uuid"}
1734
- observation_text = f"```json\n{json.dumps(tool_result, indent=2)}\n```"
1735
- current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
1736
- log_event(f"Result from `{tool_name}`:\n```\n{generated_code_store[code_uuid]}\n```\n", MSG_TYPE.MSG_TYPE_TOOL_CALL, metadata={"id": code_gen_id, "result": tool_result})
1737
- continue
1738
- if tool_name == 'local_tools::refactor_scratchpad':
1739
- scratchpad_cleaning_prompt = f"""Enhance this scratchpad content to be more organized and comprehensive. Keep relevant experience information and remove any useless redundancies. Try to log learned things from the context so that you won't make the same mistakes again. Do not remove the main objective information or any crucial information that may be useful for the next iterations. Answer directly with the new scratchpad content without any comments.
1740
- --- YOUR INTERNAL SCRATCHPAD (Work History & Analysis) ---
1741
- {current_scratchpad}
1742
- --- END OF SCRATCHPAD ---"""
1743
- current_scratchpad = self.generate_text(scratchpad_cleaning_prompt)
1744
- log_event(f"**New scratchpad**:\n{current_scratchpad}", MSG_TYPE.MSG_TYPE_SCRATCHPAD)
1745
-
1746
- # --- Substitute UUIDs and Execute Standard Tools ---
1747
- log_event(f"**Calling tool**: `{tool_name}` with params:\n{dict_to_markdown(tool_params)}", MSG_TYPE.MSG_TYPE_TOOL_CALL)
1748
- _substitute_code_uuids_recursive(tool_params, generated_code_store)
1749
-
1750
- tool_call_id = log_event(f"**Executing tool**: {tool_name}",MSG_TYPE.MSG_TYPE_STEP_START, metadata={"name": tool_name, "parameters": tool_params, "id":"executing tool"})
1751
- tool_result = None
1752
- try:
1753
- if tool_name.startswith("research::") and use_data_store:
1754
- store_name = tool_name.split("::")[1]
1755
- rag_callable = use_data_store.get(store_name, {}).get("callable")
1756
- query = tool_params.get("query", "")
1757
- retrieved_chunks = rag_callable(query, rag_top_k=rag_top_k, rag_min_similarity_percent=rag_min_similarity_percent)
1758
- if retrieved_chunks:
1759
- sources_this_turn.extend(retrieved_chunks)
1760
- tool_result = {"status": "success", "summary": f"Found {len(retrieved_chunks)} relevant chunks.", "chunks": retrieved_chunks}
1588
+ tool_result = {"status": "failure", "error": f"Tool '{tool_name}' was called but did not execute properly."} # Default error
1589
+ if tool_name == "local_tools::generate_and_call":
1590
+ chain_id = log_event(f"Starting chained tool call...", MSG_TYPE.MSG_TYPE_STEP_START)
1591
+ try:
1592
+ code_gen_prompt, lang = tool_params.get("code_generation_prompt", ""), tool_params.get("language", "python")
1593
+ next_tool_name, next_tool_params = tool_params.get("next_tool_name"), tool_params.get("next_tool_params", {})
1594
+ log_event("Received parameters for chain", MSG_TYPE.MSG_TYPE_STEP, meta={"parent_id": chain_id, "params": tool_params})
1595
+
1596
+ if not (use_mcps and hasattr(self, 'mcp')):
1597
+ tool_result = {"status": "failure", "error": "MCPs are not enabled, cannot execute tools."}
1598
+ elif next_tool_name not in code_consuming_tools:
1599
+ tool_result = {"status": "failure", "error": f"Tool '{next_tool_name}' is not a valid code-consuming tool. Valid options are: {list(code_consuming_tools)}"}
1761
1600
  else:
1762
- tool_result = {"status": "success", "summary": "No relevant documents found."}
1763
- elif use_mcps and self.mcp:
1764
- mcp_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
1765
- tool_result = {"status": "success", "output": mcp_result} if not (isinstance(mcp_result, dict) and "error" in mcp_result) else {"status": "failure", **mcp_result}
1766
- else:
1767
- tool_result = {"status": "failure", "error": f"Tool '{tool_name}' not found."}
1768
- except Exception as e:
1769
- trace_exception(e)
1770
- tool_result = {"status": "failure", "error": f"Exception executing tool: {str(e)}"}
1771
-
1772
- if tool_call_id: log_event(f"**Executing tool**: {tool_name}", MSG_TYPE.MSG_TYPE_STEP_END, metadata={"result": tool_result}, event_id= tool_call_id)
1773
-
1774
- observation_text = ""
1601
+ def _hydrate(text: str, store: Dict) -> str:
1602
+ for k, v in store.items(): text = text.replace(k, v.get('content',''))
1603
+ return text
1604
+ hydrated_prompt = _hydrate(code_gen_prompt, asset_store)
1605
+ log_event(f"Generating {lang} code for {next_tool_name}", MSG_TYPE.MSG_TYPE_STEP, meta={"parent_id": chain_id, "hydrated_prompt": hydrated_prompt})
1606
+ generated_code = self.generate_code(prompt=hydrated_prompt, system_prompt=f"Generate ONLY raw {lang} code.", **llm_generation_kwargs)
1607
+
1608
+ def _substitute(data: Any) -> Any:
1609
+ if isinstance(data, dict): return {k: _substitute(v) for k, v in data.items()}
1610
+ if isinstance(data, list): return [_substitute(item) for item in data]
1611
+ if isinstance(data, str) and data == CODE_PLACEHOLDER: return generated_code
1612
+ return data
1613
+ hydrated_params = _substitute(next_tool_params)
1614
+
1615
+ log_event(f"Calling tool: {next_tool_name}", MSG_TYPE.MSG_TYPE_TOOL_CALL, meta={"parent_id": chain_id, "name": next_tool_name, "parameters": hydrated_params})
1616
+ tool_result = self.mcp.execute_tool(next_tool_name, hydrated_params, lollms_client_instance=self)
1617
+ except Exception as e:
1618
+ tool_result = {"status": "failure", "error": f"Exception in chained tool logic: {str(e)}"}
1619
+ log_event(f"Finished chained tool call.", MSG_TYPE.MSG_TYPE_STEP_END, event_id=chain_id)
1620
+ # ... other non-code tool handlers ...
1621
+
1622
+ # --- Process and Sanitize ALL Tool Outputs for the Scratchpad ---
1775
1623
  sanitized_result = {}
1776
1624
  if isinstance(tool_result, dict):
1777
1625
  sanitized_result = tool_result.copy()
1778
- summarized_fields = {}
1779
1626
  for key, value in tool_result.items():
1780
- if isinstance(value, str) and key.endswith("_base64") and len(value) > 256:
1781
- sanitized_result[key] = f"[Image was generated. Size: {len(value)} bytes]"
1782
- continue
1783
- if isinstance(value, str) and len(self.tokenize(value)) > output_summarization_threshold:
1784
- if streaming_callback: streaming_callback(f"Summarizing long output from field '{key}'...", MSG_TYPE.MSG_TYPE_STEP, {"type": "summarization"})
1785
- summary = self.sequential_summarize(text=value, chunk_processing_prompt=f"Summarize key info from this chunk of '{key}'.", callback=streaming_callback)
1786
- summarized_fields[key] = summary
1787
- sanitized_result[key] = f"[Content summarized, see summary below. Original length: {len(value)} chars]"
1788
- observation_text = f"```json\n{json.dumps(sanitized_result, indent=2)}\n```"
1789
- if summarized_fields:
1790
- observation_text += "\n\n**Summaries of Long Outputs:**"
1791
- for key, summary in summarized_fields.items():
1792
- observation_text += f"\n- **Summary of '{key}':**\n{summary}"
1627
+ if isinstance(value, str) and value.startswith("data:image"):
1628
+ img_uuid = str(uuid.uuid4())
1629
+ asset_store[img_uuid] = {"type": "image", "content": value}
1630
+ sanitized_result[key] = f"[Image asset generated: {img_uuid}]"
1793
1631
  else:
1794
- observation_text = f"Tool returned non-dictionary output: {str(tool_result)}"
1632
+ sanitized_result = {"raw_output": str(tool_result)}
1795
1633
 
1634
+ observation_text = f"```json\n{json.dumps(sanitized_result, indent=2)}\n```"
1635
+ log_event(f"Received output from: {tool_name}", MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, meta={"name": tool_name, "result": sanitized_result})
1796
1636
  tool_calls_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
1797
1637
  current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
1798
- log_event(f"Result from `{tool_name}`:\n{dict_to_markdown(sanitized_result)}", MSG_TYPE.MSG_TYPE_OBSERVATION)
1799
-
1800
- if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
1638
+ log_event(f"Finished reasoning step {i+1}", MSG_TYPE.MSG_TYPE_STEP_END, event_id=reasoning_step_id)
1639
+
1801
1640
  except Exception as ex:
1802
1641
  trace_exception(ex)
1803
- current_scratchpad += f"\n\n### Error : {ex}"
1804
- if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
1805
-
1806
- # --- Final Answer Synthesis ---
1807
- synthesis_id = log_event("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
1642
+ log_event(f"Error in reasoning loop: {str(ex)}", MSG_TYPE.MSG_TYPE_EXCEPTION, event_id=reasoning_step_id)
1808
1643
 
1809
- final_answer_prompt = f"""
1810
- --- Original User Request ---
1811
- "{original_user_prompt}"
1812
- --- Your Internal Scratchpad (Actions Taken & Findings) ---
1813
- {current_scratchpad}
1814
- --- INSTRUCTIONS ---
1815
- - Synthesize a clear and friendly answer for the user based ONLY on your scratchpad.
1816
- - If images were provided by the user, incorporate your analysis of them into the answer.
1817
- - Do not talk about your internal process unless it's necessary to explain why you couldn't find an answer.
1818
- """
1819
- if debug: log_prompt(final_answer_prompt, "FINAL ANSWER SYNTHESIS PROMPT")
1820
-
1821
-
1822
- final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature, **llm_generation_kwargs)
1823
- if type(final_answer_text) is dict:
1824
- if streaming_callback:
1825
- streaming_callback(final_answer_text["error"], MSG_TYPE.MSG_TYPE_EXCEPTION)
1826
- return {
1827
- "final_answer": "",
1828
- "final_scratchpad": current_scratchpad,
1829
- "tool_calls": tool_calls_this_turn,
1830
- "sources": sources_this_turn,
1831
- "clarification_required": False,
1832
- "error": final_answer_text["error"]
1833
- }
1644
+ # --- 6. Final Answer Synthesis ---
1645
+ synthesis_id = log_event("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
1646
+ final_answer_prompt = f"""--- Original User Request ---\n"{original_user_prompt}"\n\n--- Your Internal Scratchpad ---\n{current_scratchpad}\n\n--- INSTRUCTIONS ---\nSynthesize a clear, comprehensive, and friendly answer for the user based ONLY on your scratchpad."""
1647
+ final_synthesis_images = [img for img in (images or [])] + [asset['content'] for asset in asset_store.values() if asset['type'] == 'image']
1648
+ final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=final_synthesis_images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature, **llm_generation_kwargs)
1649
+ if isinstance(final_answer_text, dict) and "error" in final_answer_text:
1650
+ return {"final_answer": "", "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": False, "error": final_answer_text["error"]}
1834
1651
  final_answer = self.remove_thinking_blocks(final_answer_text)
1835
- if debug: log_prompt(final_answer_text, "FINAL ANSWER RESPONSE")
1652
+ log_event("Finished synthesizing answer.", MSG_TYPE.MSG_TYPE_STEP_END, event_id=synthesis_id)
1836
1653
 
1837
- if synthesis_id: log_event("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_END, event_id= synthesis_id)
1654
+ return {"final_answer": final_answer, "final_scratchpad": current_scratchpad, "tool_calls": tool_calls_this_turn, "sources": sources_this_turn, "clarification_required": False, "error": None}
1838
1655
 
1839
- return {
1840
- "final_answer": final_answer,
1841
- "final_scratchpad": current_scratchpad,
1842
- "tool_calls": tool_calls_this_turn,
1843
- "sources": sources_this_turn,
1844
- "clarification_required": False,
1845
- "error": None
1846
- }
1847
1656
  def generate_code(
1848
1657
  self,
1849
1658
  prompt:str,
@@ -2497,7 +2306,7 @@ Do not split the code in multiple tags.
2497
2306
  callback = self.sink
2498
2307
 
2499
2308
  if ctx_size is None:
2500
- ctx_size = self.default_ctx_size or 8192 # Provide a fallback default
2309
+ ctx_size = self.llm.default_ctx_size or 8192 # Provide a fallback default
2501
2310
  if chunk_size is None:
2502
2311
  chunk_size = ctx_size // 4
2503
2312
  if overlap is None:
@@ -2573,7 +2382,7 @@ Current document analysis memory:
2573
2382
  # Process text in chunks
2574
2383
  while start_token_idx < total_tokens:
2575
2384
  # Calculate available tokens for chunk + memory
2576
- available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024) # Reserve space for output
2385
+ available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.llm.default_n_predict or 1024) # Reserve space for output
2577
2386
  if available_tokens_for_dynamic_content <= 100: # Need some minimum space
2578
2387
  ASCIIColors.error("Context size too small for summarization with current settings.")
2579
2388
  return "Error: Context size too small."
@@ -2610,7 +2419,7 @@ Current document analysis memory:
2610
2419
  ASCIIColors.magenta(f"--- Chunk {chunk_id} Prompt ---")
2611
2420
  ASCIIColors.cyan(prompt)
2612
2421
 
2613
- response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
2422
+ response = self.generate_text(prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback)
2614
2423
 
2615
2424
  if isinstance(response, dict): # Handle generation error
2616
2425
  ASCIIColors.error(f"Chunk {chunk_id} processing failed: {response.get('error')}")
@@ -2669,7 +2478,7 @@ The final output must be put inside a {final_output_format} markdown tag.
2669
2478
  final_example_prompt = final_prompt_template.format(memory="<final_memory>")
2670
2479
  try:
2671
2480
  final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
2672
- available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024) # Reserve space for output
2481
+ available_final_tokens = ctx_size - final_static_tokens - (self.llm.default_n_predict or 1024) # Reserve space for output
2673
2482
  except RuntimeError as e:
2674
2483
  ASCIIColors.error(f"Tokenization failed during final setup: {e}")
2675
2484
  return "Error: Could not calculate final prompt size."
@@ -2686,7 +2495,7 @@ The final output must be put inside a {final_output_format} markdown tag.
2686
2495
  ASCIIColors.magenta("--- Final Aggregation Prompt ---")
2687
2496
  ASCIIColors.cyan(final_prompt)
2688
2497
 
2689
- final_summary_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
2498
+ final_summary_raw = self.generate_text(final_prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback)
2690
2499
 
2691
2500
  if isinstance(final_summary_raw, dict):
2692
2501
  ASCIIColors.error(f"Final aggregation failed: {final_summary_raw.get('error')}")
@@ -2742,7 +2551,7 @@ The final output must be put inside a {final_output_format} markdown tag.
2742
2551
 
2743
2552
  # Set defaults and validate input
2744
2553
  if ctx_size is None:
2745
- ctx_size = self.default_ctx_size or 8192
2554
+ ctx_size = self.llm.default_ctx_size or 8192
2746
2555
  if chunk_size is None:
2747
2556
  chunk_size = ctx_size // 4
2748
2557
  if overlap is None:
@@ -2845,7 +2654,7 @@ Task: Update the markdown memory by adding new information from this chunk relev
2845
2654
 
2846
2655
  while start_token_idx < len(file_tokens):
2847
2656
  # Calculate available space dynamically
2848
- available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024)
2657
+ available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.llm.default_n_predict or 1024)
2849
2658
  if available_tokens_for_dynamic_content <= 100:
2850
2659
  ASCIIColors.error(f"Context window too small during analysis of {file_name}.")
2851
2660
  # Option: try truncating memory drastically or break
@@ -2885,7 +2694,7 @@ Task: Update the markdown memory by adding new information from this chunk relev
2885
2694
  ASCIIColors.magenta(f"--- Deep Analysis Prompt (Global Chunk {global_chunk_id}) ---")
2886
2695
  ASCIIColors.cyan(prompt)
2887
2696
 
2888
- response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback for streaming output
2697
+ response = self.generate_text(prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback) # Use main callback for streaming output
2889
2698
 
2890
2699
  if isinstance(response, dict): # Handle error
2891
2700
  ASCIIColors.error(f"Chunk processing failed (Global {global_chunk_id}): {response.get('error')}")
@@ -2940,7 +2749,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
2940
2749
  final_example_prompt = final_prompt.replace("{memory}", "<final_memory>")
2941
2750
  try:
2942
2751
  final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
2943
- available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024)
2752
+ available_final_tokens = ctx_size - final_static_tokens - (self.llm.default_n_predict or 1024)
2944
2753
  except RuntimeError as e:
2945
2754
  ASCIIColors.error(f"Tokenization failed during final setup: {e}")
2946
2755
  return "Error: Could not calculate final prompt size."
@@ -2956,7 +2765,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
2956
2765
  ASCIIColors.magenta("--- Final Aggregation Prompt ---")
2957
2766
  ASCIIColors.cyan(final_prompt)
2958
2767
 
2959
- final_output_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback
2768
+ final_output_raw = self.generate_text(final_prompt, n_predict=(self.llm.default_n_predict or 1024), streaming_callback=callback) # Use main callback
2960
2769
 
2961
2770
  if isinstance(final_output_raw, dict):
2962
2771
  ASCIIColors.error(f"Final aggregation failed: {final_output_raw.get('error')}")
@@ -3031,9 +2840,9 @@ Provide the final aggregated answer in {output_format} format, directly addressi
3031
2840
  tokens = []
3032
2841
  else:
3033
2842
  # Use the binding's tokenizer for accurate chunking
3034
- tokens = self.binding.tokenize(text_to_process)
2843
+ tokens = self.llm.tokenize(text_to_process)
3035
2844
  if chunk_size_tokens is None:
3036
- chunk_size_tokens = self.default_ctx_size//2
2845
+ chunk_size_tokens = self.llm.default_ctx_size//2
3037
2846
 
3038
2847
  if len(tokens) <= chunk_size_tokens:
3039
2848
  if streaming_callback:
@@ -3064,7 +2873,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
3064
2873
  step = chunk_size_tokens - overlap_tokens
3065
2874
  for i in range(0, len(tokens), step):
3066
2875
  chunk_tokens = tokens[i:i + chunk_size_tokens]
3067
- chunk_text = self.binding.detokenize(chunk_tokens)
2876
+ chunk_text = self.llm.detokenize(chunk_tokens)
3068
2877
  chunks.append(chunk_text)
3069
2878
 
3070
2879
  chunk_summaries = []