lollms-client 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,31 +1,56 @@
1
+ # lollms_client/lollms_core.py
1
2
  import requests
2
3
  from ascii_colors import ASCIIColors, trace_exception
3
4
  from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
4
- from lollms_client.lollms_utilities import encode_image
5
- from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
5
+ from lollms_client.lollms_utilities import encode_image # Keep utilities needed by core
6
+ from lollms_client.lollms_llm_binding import LollmsLLMBinding, LollmsLLMBindingManager
7
+ # Import new Abstract Base Classes and Managers
8
+ from lollms_client.lollms_tts_binding import LollmsTTSBinding, LollmsTTSBindingManager
9
+ from lollms_client.lollms_tti_binding import LollmsTTIBinding, LollmsTTIBindingManager
10
+ from lollms_client.lollms_stt_binding import LollmsSTTBinding, LollmsSTTBindingManager
11
+ from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingManager
12
+ from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
13
+
6
14
  import json
7
15
  from enum import Enum
8
16
  import base64
9
17
  import requests
10
- import pipmaster as pm
11
18
  from typing import List, Optional, Callable, Union, Dict
12
19
  import numpy as np
13
- import pipmaster as pm
14
20
  from pathlib import Path
15
21
  import os
16
22
 
17
-
18
23
  class LollmsClient():
19
- """Core class for interacting with LOLLMS bindings"""
20
- def __init__(self,
24
+ """
25
+ Core client class for interacting with LOLLMS services, including LLM, TTS, TTI, STT, TTV, and TTM.
26
+ Provides a unified interface to manage and use different bindings for various modalities.
27
+ """
28
+ def __init__(self,
29
+ # LLM Binding Parameters
21
30
  binding_name: str = "lollms",
22
- host_address: Optional[str] = None,
31
+ host_address: Optional[str] = None, # Shared host address default for all bindings if not specified
23
32
  model_name: str = "",
24
- service_key: Optional[str] = None,
25
- verify_ssl_certificate: bool = True,
26
- personality: Optional[int] = None,
27
33
  llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
28
- binding_config: Optional[Dict[str, any]] = None,
34
+ llm_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
35
+ personality: Optional[int] = None, # Specific to LLM lollms binding
36
+
37
+ # Optional Modality Binding Names
38
+ tts_binding_name: Optional[str] = None,
39
+ tti_binding_name: Optional[str] = None,
40
+ stt_binding_name: Optional[str] = None,
41
+ ttv_binding_name: Optional[str] = None,
42
+ ttm_binding_name: Optional[str] = None,
43
+
44
+ # Modality Binding Directories
45
+ tts_bindings_dir: Path = Path(__file__).parent / "tts_bindings",
46
+ tti_bindings_dir: Path = Path(__file__).parent / "tti_bindings",
47
+ stt_bindings_dir: Path = Path(__file__).parent / "stt_bindings",
48
+ ttv_bindings_dir: Path = Path(__file__).parent / "ttv_bindings",
49
+ ttm_bindings_dir: Path = Path(__file__).parent / "ttm_bindings",
50
+
51
+ # General Parameters (mostly defaults for LLM generation)
52
+ service_key: Optional[str] = None, # Shared service key/client_id
53
+ verify_ssl_certificate: bool = True,
29
54
  ctx_size: Optional[int] = 8192,
30
55
  n_predict: Optional[int] = 4096,
31
56
  stream: bool = False,
@@ -40,37 +65,135 @@ class LollmsClient():
40
65
  user_name ="user",
41
66
  ai_name = "assistant"):
42
67
  """
43
- Initialize the LollmsCore with a binding and generation parameters.
68
+ Initialize the LollmsClient with LLM and optional modality bindings.
44
69
 
45
70
  Args:
46
- binding_name (str): Name of the binding to use (e.g., "lollms", "ollama").
47
- host_address (Optional[str]): Host address for the service. Overrides binding default if provided.
48
- model_name (str): Name of the model to use. Defaults to empty string.
49
- service_key (Optional[str]): Authentication key for the service.
50
- verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
51
- personality (Optional[int]): Personality ID (used only by LOLLMS binding).
52
- llm_bindings_dir (Path): Directory containing binding implementations.
53
- Defaults to the "bindings" subdirectory relative to this file's location.
54
- binding_config (Optional[Dict[str, any]]): Additional configuration for the binding.
55
- n_predict (Optional[int]): Maximum number of tokens to generate. Default for generate_text.
56
- stream (bool): Whether to stream the output. Defaults to False for generate_text.
57
- temperature (float): Sampling temperature. Defaults to 0.1 for generate_text.
58
- top_k (int): Top-k sampling parameter. Defaults to 50 for generate_text.
59
- top_p (float): Top-p sampling parameter. Defaults to 0.95 for generate_text.
60
- repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8 for generate_text.
61
- repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
62
- seed (Optional[int]): Random seed for generation. Default for generate_text.
63
- n_threads (int): Number of threads to use. Defaults to 8 for generate_text.
64
- streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
65
- Default for generate_text. Takes a string chunk and an MSG_TYPE enum value.
71
+ binding_name (str): Name of the primary LLM binding (e.g., "lollms", "ollama").
72
+ host_address (Optional[str]): Default host address for all services. Overridden by binding defaults if None.
73
+ model_name (str): Default model name for the LLM binding.
74
+ llm_bindings_dir (Path): Directory for LLM binding implementations.
75
+ llm_binding_config (Optional[Dict]): Additional config for the LLM binding.
76
+ personality (Optional[int]): Personality ID (used by LLM 'lollms' binding).
77
+ tts_binding_name (Optional[str]): Name of the TTS binding to use (e.g., "lollms").
78
+ tti_binding_name (Optional[str]): Name of the TTI binding to use (e.g., "lollms").
79
+ stt_binding_name (Optional[str]): Name of the STT binding to use (e.g., "lollms").
80
+ ttv_binding_name (Optional[str]): Name of the TTV binding to use (e.g., "lollms").
81
+ ttm_binding_name (Optional[str]): Name of the TTM binding to use (e.g., "lollms").
82
+ tts_bindings_dir (Path): Directory for TTS bindings.
83
+ tti_bindings_dir (Path): Directory for TTI bindings.
84
+ stt_bindings_dir (Path): Directory for STT bindings.
85
+ ttv_bindings_dir (Path): Directory for TTV bindings.
86
+ ttm_bindings_dir (Path): Directory for TTM bindings.
87
+ service_key (Optional[str]): Shared authentication key or client_id.
88
+ verify_ssl_certificate (bool): Whether to verify SSL certificates.
89
+ ctx_size (Optional[int]): Default context size for LLM.
90
+ n_predict (Optional[int]): Default max tokens for LLM.
91
+ stream (bool): Default streaming mode for LLM.
92
+ temperature (float): Default temperature for LLM.
93
+ top_k (int): Default top_k for LLM.
94
+ top_p (float): Default top_p for LLM.
95
+ repeat_penalty (float): Default repeat penalty for LLM.
96
+ repeat_last_n (int): Default repeat last n for LLM.
97
+ seed (Optional[int]): Default seed for LLM.
98
+ n_threads (int): Default threads for LLM.
99
+ streaming_callback (Optional[Callable]): Default streaming callback for LLM.
100
+ user_name (str): Default user name for prompts.
101
+ ai_name (str): Default AI name for prompts.
66
102
 
67
103
  Raises:
68
- ValueError: If the specified binding cannot be created.
104
+ ValueError: If the primary LLM binding cannot be created.
69
105
  """
106
+ self.host_address = host_address # Store initial preference
107
+ self.service_key = service_key
108
+ self.verify_ssl_certificate = verify_ssl_certificate
109
+
110
+ # --- LLM Binding Setup ---
70
111
  self.binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
71
- self.binding_config = binding_config or {}
72
-
73
- # Store generation parameters as instance variables
112
+ self.binding = self.binding_manager.create_binding(
113
+ binding_name=binding_name,
114
+ host_address=host_address, # Pass initial host preference
115
+ model_name=model_name,
116
+ service_key=service_key,
117
+ verify_ssl_certificate=verify_ssl_certificate,
118
+ personality=personality,
119
+ # Pass LLM specific config if needed
120
+ **(llm_binding_config or {})
121
+ )
122
+
123
+ if self.binding is None:
124
+ available = self.binding_manager.get_available_bindings()
125
+ raise ValueError(f"Failed to create LLM binding: {binding_name}. Available: {available}")
126
+
127
+ # Determine the effective host address (use LLM binding's if initial was None)
128
+ effective_host_address = self.host_address
129
+ if effective_host_address is None and self.binding:
130
+ effective_host_address = self.binding.host_address
131
+
132
+ # --- Modality Binding Setup ---
133
+ self.tts_binding_manager = LollmsTTSBindingManager(tts_bindings_dir)
134
+ self.tti_binding_manager = LollmsTTIBindingManager(tti_bindings_dir)
135
+ self.stt_binding_manager = LollmsSTTBindingManager(stt_bindings_dir)
136
+ self.ttv_binding_manager = LollmsTTVBindingManager(ttv_bindings_dir)
137
+ self.ttm_binding_manager = LollmsTTMBindingManager(ttm_bindings_dir)
138
+
139
+ self.tts: Optional[LollmsTTSBinding] = None
140
+ self.tti: Optional[LollmsTTIBinding] = None
141
+ self.stt: Optional[LollmsSTTBinding] = None
142
+ self.ttv: Optional[LollmsTTVBinding] = None
143
+ self.ttm: Optional[LollmsTTMBinding] = None
144
+
145
+ if tts_binding_name:
146
+ self.tts = self.tts_binding_manager.create_binding(
147
+ binding_name=tts_binding_name,
148
+ host_address=effective_host_address,
149
+ service_key=self.service_key,
150
+ verify_ssl_certificate=self.verify_ssl_certificate
151
+ )
152
+ if self.tts is None:
153
+ ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
154
+
155
+ if tti_binding_name:
156
+ self.tti = self.tti_binding_manager.create_binding(
157
+ binding_name=tti_binding_name,
158
+ host_address=effective_host_address,
159
+ service_key=self.service_key, # Passed as service_key, used as client_id by lollms TTI binding
160
+ verify_ssl_certificate=self.verify_ssl_certificate
161
+ )
162
+ if self.tti is None:
163
+ ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
164
+
165
+ if stt_binding_name:
166
+ self.stt = self.stt_binding_manager.create_binding(
167
+ binding_name=stt_binding_name,
168
+ host_address=effective_host_address,
169
+ service_key=self.service_key,
170
+ verify_ssl_certificate=self.verify_ssl_certificate
171
+ )
172
+ if self.stt is None:
173
+ ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
174
+
175
+ if ttv_binding_name:
176
+ self.ttv = self.ttv_binding_manager.create_binding(
177
+ binding_name=ttv_binding_name,
178
+ host_address=effective_host_address,
179
+ service_key=self.service_key,
180
+ verify_ssl_certificate=self.verify_ssl_certificate
181
+ )
182
+ if self.ttv is None:
183
+ ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
184
+
185
+ if ttm_binding_name:
186
+ self.ttm = self.ttm_binding_manager.create_binding(
187
+ binding_name=ttm_binding_name,
188
+ host_address=effective_host_address,
189
+ service_key=self.service_key,
190
+ verify_ssl_certificate=self.verify_ssl_certificate
191
+ )
192
+ if self.ttm is None:
193
+ ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
194
+
195
+
196
+ # --- Store Default Generation Parameters ---
74
197
  self.default_ctx_size = ctx_size
75
198
  self.default_n_predict = n_predict
76
199
  self.default_stream = stream
@@ -82,29 +205,10 @@ class LollmsClient():
82
205
  self.default_seed = seed
83
206
  self.default_n_threads = n_threads
84
207
  self.default_streaming_callback = streaming_callback
85
-
86
- # Create the binding instance
87
- self.binding = self.binding_manager.create_binding(
88
- binding_name=binding_name,
89
- host_address=host_address,
90
- model_name=model_name,
91
- service_key=service_key,
92
- verify_ssl_certificate=verify_ssl_certificate,
93
- personality=personality
94
- )
95
-
96
- if self.binding is None:
97
- raise ValueError(f"Failed to create binding: {binding_name}. Available bindings: {self.binding_manager.get_available_bindings()}")
98
-
99
- # Apply additional configuration if provided
100
- if binding_config:
101
- for key, value in binding_config.items():
102
- setattr(self.binding, key, value)
208
+
209
+ # --- Prompt Formatting Attributes ---
103
210
  self.user_name = user_name
104
211
  self.ai_name = ai_name
105
- self.service_key = service_key
106
-
107
- self.verify_ssl_certificate = verify_ssl_certificate
108
212
  self.start_header_id_template ="!@>"
109
213
  self.end_header_id_template =": "
110
214
  self.system_message_template ="system"
@@ -117,24 +221,25 @@ class LollmsClient():
117
221
  self.end_ai_message_id_template =""
118
222
 
119
223
 
224
+ # --- Prompt Formatting Properties ---
120
225
  @property
121
226
  def system_full_header(self) -> str:
122
227
  """Get the start_header_id_template."""
123
228
  return f"{self.start_header_id_template}{self.system_message_template}{self.end_header_id_template}"
124
-
229
+
125
230
  def system_custom_header(self, ai_name) -> str:
126
231
  """Get the start_header_id_template."""
127
232
  return f"{self.start_header_id_template}{ai_name}{self.end_header_id_template}"
128
-
233
+
129
234
  @property
130
235
  def user_full_header(self) -> str:
131
236
  """Get the start_header_id_template."""
132
237
  return f"{self.start_user_header_id_template}{self.user_name}{self.end_user_header_id_template}"
133
-
238
+
134
239
  def user_custom_header(self, user_name="user") -> str:
135
240
  """Get the start_header_id_template."""
136
241
  return f"{self.start_user_header_id_template}{user_name}{self.end_user_header_id_template}"
137
-
242
+
138
243
  @property
139
244
  def ai_full_header(self) -> str:
140
245
  """Get the start_header_id_template."""
@@ -145,10 +250,13 @@ class LollmsClient():
145
250
  return f"{self.start_ai_header_id_template}{ai_name}{self.end_ai_header_id_template}"
146
251
 
147
252
  def sink(self, s=None,i=None,d=None):
253
+ """Placeholder sink method."""
148
254
  pass
255
+
256
+ # --- Core LLM Binding Methods ---
149
257
  def tokenize(self, text: str) -> list:
150
258
  """
151
- Tokenize text using the active binding.
259
+ Tokenize text using the active LLM binding.
152
260
 
153
261
  Args:
154
262
  text (str): The text to tokenize.
@@ -156,11 +264,13 @@ class LollmsClient():
156
264
  Returns:
157
265
  list: List of tokens.
158
266
  """
159
- return self.binding.tokenize(text)
160
-
267
+ if self.binding:
268
+ return self.binding.tokenize(text)
269
+ raise RuntimeError("LLM binding not initialized.")
270
+
161
271
  def detokenize(self, tokens: list) -> str:
162
272
  """
163
- Detokenize tokens using the active binding.
273
+ Detokenize tokens using the active LLM binding.
164
274
 
165
275
  Args:
166
276
  tokens (list): List of tokens to detokenize.
@@ -168,20 +278,24 @@ class LollmsClient():
168
278
  Returns:
169
279
  str: Detokenized text.
170
280
  """
171
- return self.binding.detokenize(tokens)
172
-
281
+ if self.binding:
282
+ return self.binding.detokenize(tokens)
283
+ raise RuntimeError("LLM binding not initialized.")
284
+
173
285
  def get_model_details(self) -> dict:
174
286
  """
175
- Get model information from the active binding.
287
+ Get model information from the active LLM binding.
176
288
 
177
289
  Returns:
178
290
  dict: Model information dictionary.
179
291
  """
180
- return self.binding.get_model_info()
181
-
292
+ if self.binding:
293
+ return self.binding.get_model_info()
294
+ raise RuntimeError("LLM binding not initialized.")
295
+
182
296
  def switch_model(self, model_name: str) -> bool:
183
297
  """
184
- Load a new model in the active binding.
298
+ Load a new model in the active LLM binding.
185
299
 
186
300
  Args:
187
301
  model_name (str): Name of the model to load.
@@ -189,18 +303,20 @@ class LollmsClient():
189
303
  Returns:
190
304
  bool: True if model loaded successfully, False otherwise.
191
305
  """
192
- return self.binding.load_model(model_name)
193
-
194
- def get_available_bindings(self) -> List[str]:
306
+ if self.binding:
307
+ return self.binding.load_model(model_name)
308
+ raise RuntimeError("LLM binding not initialized.")
309
+
310
+ def get_available_llm_bindings(self) -> List[str]: # Renamed for clarity
195
311
  """
196
- Get list of available bindings.
312
+ Get list of available LLM binding names.
197
313
 
198
314
  Returns:
199
- List[str]: List of binding names that can be used.
315
+ List[str]: List of binding names that can be used for LLMs.
200
316
  """
201
317
  return self.binding_manager.get_available_bindings()
202
-
203
- def generate_text(self,
318
+
319
+ def generate_text(self,
204
320
  prompt: str,
205
321
  images: Optional[List[str]] = None,
206
322
  n_predict: Optional[int] = None,
@@ -212,9 +328,10 @@ class LollmsClient():
212
328
  repeat_last_n: Optional[int] = None,
213
329
  seed: Optional[int] = None,
214
330
  n_threads: Optional[int] = None,
215
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> str:
331
+ ctx_size: int | None = None,
332
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> Union[str, dict]:
216
333
  """
217
- Generate text using the active binding, using instance defaults if parameters are not provided.
334
+ Generate text using the active LLM binding, using instance defaults if parameters are not provided.
218
335
 
219
336
  Args:
220
337
  prompt (str): The input prompt for text generation.
@@ -228,58 +345,91 @@ class LollmsClient():
228
345
  repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
229
346
  seed (Optional[int]): Random seed for generation. Uses instance default if None.
230
347
  n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
348
+ ctx_size (int | None): Context size override for this generation.
231
349
  streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
232
- Uses instance default if None.
233
- - First parameter (str): The chunk of text received from the stream.
234
- - Second parameter (MSG_TYPE): The message type enum (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
235
350
 
236
351
  Returns:
237
352
  Union[str, dict]: Generated text or error dictionary if failed.
238
353
  """
239
- return self.binding.generate_text(
240
- prompt=prompt,
241
- images=images,
242
- n_predict=n_predict if n_predict is not None else self.default_n_predict,
243
- stream=stream if stream is not None else self.default_stream,
244
- temperature=temperature if temperature is not None else self.default_temperature,
245
- top_k=top_k if top_k is not None else self.default_top_k,
246
- top_p=top_p if top_p is not None else self.default_top_p,
247
- repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
248
- repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
249
- seed=seed if seed is not None else self.default_seed,
250
- n_threads=n_threads if n_threads is not None else self.default_n_threads,
251
- streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
252
- )
354
+ if self.binding:
355
+ return self.binding.generate_text(
356
+ prompt=prompt,
357
+ images=images,
358
+ n_predict=n_predict if n_predict is not None else self.default_n_predict,
359
+ stream=stream if stream is not None else self.default_stream,
360
+ temperature=temperature if temperature is not None else self.default_temperature,
361
+ top_k=top_k if top_k is not None else self.default_top_k,
362
+ top_p=top_p if top_p is not None else self.default_top_p,
363
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
364
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
365
+ seed=seed if seed is not None else self.default_seed,
366
+ n_threads=n_threads if n_threads is not None else self.default_n_threads,
367
+ ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
368
+ streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
369
+ )
370
+ raise RuntimeError("LLM binding not initialized.")
371
+
372
+
373
+ def embed(self, text, **kwargs):
374
+ """
375
+ Generate embeddings for the input text using the active LLM binding.
253
376
 
254
-
255
- def embed(self, text):
256
- self.binding.embed(text)
377
+ Args:
378
+ text (str or List[str]): Input text to embed.
379
+ **kwargs: Additional arguments specific to the binding's embed method.
380
+
381
+ Returns:
382
+ list: List of embeddings.
383
+ """
384
+ if self.binding:
385
+ return self.binding.embed(text, **kwargs)
386
+ raise RuntimeError("LLM binding not initialized.")
257
387
 
258
388
 
259
389
  def listModels(self):
260
- self.binding.listModels()
390
+ """Lists models available to the current LLM binding."""
391
+ if self.binding:
392
+ return self.binding.listModels()
393
+ raise RuntimeError("LLM binding not initialized.")
261
394
 
395
+ # --- Convenience Methods for Lollms LLM Binding Features ---
396
+ def listMountedPersonalities(self) -> Union[List[Dict], Dict]:
397
+ """
398
+ Lists mounted personalities *if* the active LLM binding is 'lollms'.
262
399
 
400
+ Returns:
401
+ Union[List[Dict], Dict]: List of personality dicts or error dict.
402
+ """
403
+ if self.binding and hasattr(self.binding, 'lollms_listMountedPersonalities'):
404
+ return self.binding.lollms_listMountedPersonalities()
405
+ else:
406
+ ASCIIColors.warning("listMountedPersonalities is only available for the 'lollms' LLM binding.")
407
+ return {"status": False, "error": "Functionality not available for the current binding"}
263
408
 
409
+ # --- Code Generation / Extraction Helpers (These might be moved to TasksLibrary later) ---
264
410
  def generate_codes(
265
- self,
266
- prompt,
267
- images=[],
411
+ self,
412
+ prompt,
413
+ images=[],
268
414
  template=None,
269
415
  language="json",
270
416
  code_tag_format="markdown", # or "html"
271
- max_size = None,
272
- temperature = None,
273
- top_k = None,
274
- top_p=None,
275
- repeat_penalty=None,
276
- repeat_last_n=None,
277
- callback=None,
278
- debug=False
417
+ max_size = None,
418
+ temperature = None,
419
+ top_k = None,
420
+ top_p=None,
421
+ repeat_penalty=None,
422
+ repeat_last_n=None,
423
+ callback=None,
424
+ debug=False
279
425
  ):
426
+ """
427
+ Generates multiple code blocks based on a prompt.
428
+ Uses the underlying LLM binding via `generate_text`.
429
+ """
280
430
  response_full = ""
281
- full_prompt = f"""{self.system_full_header}Act as a code generation assistant that generates code from user prompt.
282
- {self.user_full_header}
431
+ full_prompt = f"""{self.system_full_header}Act as a code generation assistant that generates code from user prompt.
432
+ {self.user_full_header}
283
433
  {prompt}
284
434
  """
285
435
  if template:
@@ -305,24 +455,35 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
305
455
  full_prompt += f"""Do not split the code in multiple tags.
306
456
  {self.ai_full_header}"""
307
457
 
308
- if len(self.image_files)>0:
309
- response = self.generate_text_with_images(full_prompt, self.image_files, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
310
- elif len(images)>0:
311
- response = self.generate_text_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
312
- else:
313
- response = self.generate_text(full_prompt, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
458
+ # Use generate_text which handles images internally
459
+ response = self.generate_text(
460
+ full_prompt,
461
+ images=images,
462
+ n_predict=max_size,
463
+ temperature=temperature,
464
+ top_k=top_k,
465
+ top_p=top_p,
466
+ repeat_penalty=repeat_penalty,
467
+ repeat_last_n=repeat_last_n,
468
+ streaming_callback=callback # Assuming generate_text handles streaming callback
469
+ )
470
+
471
+ if isinstance(response, dict) and not response.get("status", True): # Check for error dict
472
+ ASCIIColors.error(f"Code generation failed: {response.get('error')}")
473
+ return []
474
+
314
475
  response_full += response
315
- codes = self.extract_code_blocks(response)
476
+ codes = self.extract_code_blocks(response, format=code_tag_format)
316
477
  return codes
317
-
478
+
318
479
  def generate_code(
319
- self,
320
- prompt,
480
+ self,
481
+ prompt,
321
482
  images=[],
322
483
  template=None,
323
484
  language="json",
324
- code_tag_format="markdown", # or "html"
325
- max_size = None,
485
+ code_tag_format="markdown", # or "html"
486
+ max_size = None,
326
487
  temperature = None,
327
488
  top_k = None,
328
489
  top_p=None,
@@ -330,9 +491,14 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
330
491
  repeat_last_n=None,
331
492
  callback=None,
332
493
  debug=False ):
333
-
334
- full_prompt = f"""{self.system_full_header}Act as a code generation assistant that generates code from user prompt.
335
- {self.user_full_header}
494
+ """
495
+ Generates a single code block based on a prompt.
496
+ Uses the underlying LLM binding via `generate_text`.
497
+ Handles potential continuation if the code block is incomplete.
498
+ """
499
+
500
+ full_prompt = f"""{self.system_full_header}Act as a code generation assistant that generates code from user prompt.
501
+ {self.user_full_header}
336
502
  {prompt}
337
503
  """
338
504
  if template:
@@ -358,170 +524,259 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
358
524
  full_prompt += f"""You must return a single code tag.
359
525
  Do not split the code in multiple tags.
360
526
  {self.ai_full_header}"""
361
- response = self.generate_text(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
362
- codes = self.extract_code_blocks(response)
363
- if len(codes)>0:
364
- if not codes[-1]["is_complete"]:
365
- code = "\n".join(codes[-1]["content"].split("\n")[:-1])
366
- while not codes[-1]["is_complete"]:
367
- response = self.generate_text(prompt+code+self.user_full_header+"continue the code. Start from last line and continue the code. Put the code inside a markdown code tag."+self.separator_template+self.ai_full_header, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
368
- codes = self.extract_code_blocks(response)
369
- if len(codes)==0:
370
- break
371
- else:
372
- if not codes[-1]["is_complete"]:
373
- code +="\n"+ "\n".join(codes[-1]["content"].split("\n")[:-1])
374
- else:
375
- code +="\n"+ "\n".join(codes[-1]["content"].split("\n"))
376
- else:
377
- code = codes[-1]["content"]
378
527
 
379
- return code
380
- else:
381
- return None
382
-
383
- def extract_code_blocks(self, text: str) -> List[dict]:
528
+ response = self.generate_text(
529
+ full_prompt,
530
+ images=images,
531
+ n_predict=max_size,
532
+ temperature=temperature,
533
+ top_k=top_k,
534
+ top_p=top_p,
535
+ repeat_penalty=repeat_penalty,
536
+ repeat_last_n=repeat_last_n,
537
+ streaming_callback=callback
538
+ )
539
+
540
+ if isinstance(response, dict) and not response.get("status", True):
541
+ ASCIIColors.error(f"Code generation failed: {response.get('error')}")
542
+ return None
543
+
544
+ codes = self.extract_code_blocks(response, format=code_tag_format)
545
+ code_content = None
546
+
547
+ if codes:
548
+ last_code = codes[-1]
549
+ code_content = last_code["content"]
550
+
551
+ # Handle incomplete code block continuation (simple approach)
552
+ max_retries = 3 # Limit continuation attempts
553
+ retries = 0
554
+ while not last_code["is_complete"] and retries < max_retries:
555
+ retries += 1
556
+ ASCIIColors.info(f"Code block seems incomplete. Attempting continuation ({retries}/{max_retries})...")
557
+ continuation_prompt = f"{full_prompt}{code_content}\n\n{self.user_full_header}The previous code block was incomplete. Continue the code exactly from where it left off. Do not repeat the previous part. Only provide the continuation inside a single {code_tag_format} code tag.\n{self.ai_full_header}"
558
+
559
+ continuation_response = self.generate_text(
560
+ continuation_prompt,
561
+ images=images, # Resend images if needed for context
562
+ n_predict=max_size, # Allow space for continuation
563
+ temperature=temperature, # Use same parameters
564
+ top_k=top_k,
565
+ top_p=top_p,
566
+ repeat_penalty=repeat_penalty,
567
+ repeat_last_n=repeat_last_n,
568
+ streaming_callback=callback
569
+ )
570
+
571
+ if isinstance(continuation_response, dict) and not continuation_response.get("status", True):
572
+ ASCIIColors.warning(f"Continuation attempt failed: {continuation_response.get('error')}")
573
+ break # Stop trying if generation fails
574
+
575
+ continuation_codes = self.extract_code_blocks(continuation_response, format=code_tag_format)
576
+
577
+ if continuation_codes:
578
+ new_code_part = continuation_codes[0]["content"]
579
+ code_content += "\n" + new_code_part # Append continuation
580
+ last_code["is_complete"] = continuation_codes[0]["is_complete"] # Update completeness
581
+ if last_code["is_complete"]:
582
+ ASCIIColors.info("Code block continuation successful.")
583
+ break # Exit loop if complete
584
+ else:
585
+ ASCIIColors.warning("Continuation response contained no code block.")
586
+ break # Stop if no code block found in continuation
587
+
588
+ if not last_code["is_complete"]:
589
+ ASCIIColors.warning("Code block remained incomplete after multiple attempts.")
590
+
591
+ return code_content # Return the (potentially completed) code content or None
592
+
593
+
594
+ def extract_code_blocks(self, text: str, format: str = "markdown") -> List[dict]:
384
595
  """
385
- This function extracts code blocks from a given text.
386
-
387
- Parameters:
388
- text (str): The text from which to extract code blocks. Code blocks are identified by triple backticks (```).
389
-
390
- Returns:
391
- List[dict]: A list of dictionaries where each dictionary represents a code block and contains the following keys:
392
- - 'index' (int): The index of the code block in the text.
393
- - 'file_name' (str): The name of the file extracted from the preceding line, if available.
394
- - 'content' (str): The content of the code block.
395
- - 'type' (str): The type of the code block. If the code block starts with a language specifier (like 'python' or 'java'), this field will contain that specifier. Otherwise, it will be set to 'language-specific'.
396
- - 'is_complete' (bool): True if the block has a closing tag, False otherwise.
397
-
398
- Note:
399
- The function assumes that the number of triple backticks in the text is even.
400
- If the number of triple backticks is odd, it will consider the rest of the text as the last code block.
401
- """
596
+ Extracts code blocks from text in Markdown or HTML format.
597
+ (Implementation remains the same as provided before)
598
+ """
599
+ # ... (Keep the existing implementation from the previous file) ...
600
+ code_blocks = []
402
601
  remaining = text
403
- bloc_index = 0
404
602
  first_index = 0
405
603
  indices = []
406
- while len(remaining) > 0:
407
- try:
408
- index = remaining.index("```")
409
- indices.append(index + first_index)
410
- remaining = remaining[index + 3:]
411
- first_index += index + 3
412
- bloc_index += 1
413
- except Exception as ex:
414
- if bloc_index % 2 == 1:
415
- index = len(remaining)
416
- indices.append(index)
417
- remaining = ""
418
604
 
419
- code_blocks = []
420
- is_start = True
421
- for index, code_delimiter_position in enumerate(indices):
605
+ if format.lower() == "markdown":
606
+ # Markdown: Find triple backtick positions
607
+ while remaining:
608
+ try:
609
+ index = remaining.index("```")
610
+ indices.append(index + first_index)
611
+ remaining = remaining[index + 3:]
612
+ first_index += index + 3
613
+ except ValueError:
614
+ if len(indices) % 2 == 1: # Odd number of delimiters means the last block is open
615
+ indices.append(first_index + len(remaining)) # Mark end of text as end of block
616
+ break
617
+
618
+ elif format.lower() == "html":
619
+ # HTML: Find <code> and </code> positions, handling nested tags
620
+ cursor = 0
621
+ while cursor < len(text):
622
+ try:
623
+ # Look for opening <code tag
624
+ start_index = text.index("<code", cursor)
625
+ try:
626
+ end_of_opening = text.index(">", start_index)
627
+ except ValueError:
628
+ break # Invalid opening tag
629
+
630
+ indices.append(start_index)
631
+ opening_tag_end = end_of_opening + 1
632
+ cursor = opening_tag_end
633
+
634
+ # Look for matching </code>, accounting for nested <code>
635
+ nest_level = 0
636
+ temp_cursor = cursor
637
+ found_closing = False
638
+ while temp_cursor < len(text):
639
+ if text[temp_cursor:].startswith("<code"):
640
+ nest_level += 1
641
+ try:
642
+ temp_cursor = text.index(">", temp_cursor) + 1
643
+ except ValueError:
644
+ break # Invalid nested opening tag
645
+ elif text[temp_cursor:].startswith("</code>"):
646
+ if nest_level == 0:
647
+ indices.append(temp_cursor)
648
+ cursor = temp_cursor + len("</code>")
649
+ found_closing = True
650
+ break
651
+ nest_level -= 1
652
+ temp_cursor += len("</code>")
653
+ else:
654
+ temp_cursor += 1
655
+
656
+ if not found_closing: # If no closing tag found until the end
657
+ indices.append(len(text))
658
+ break # Stop searching
659
+
660
+ except ValueError:
661
+ break # No more opening tags found
662
+
663
+ else:
664
+ raise ValueError("Format must be 'markdown' or 'html'")
665
+
666
+ # Process indices to extract blocks
667
+ for i in range(0, len(indices), 2):
422
668
  block_infos = {
423
- 'index': index,
669
+ 'index': i // 2,
424
670
  'file_name': "",
425
- 'section': "",
426
671
  'content': "",
427
- 'type': "",
672
+ 'type': 'language-specific', # Default type
428
673
  'is_complete': False
429
674
  }
430
- if is_start:
431
- # Check the preceding line for file name
432
- preceding_text = text[:code_delimiter_position].strip().splitlines()
433
- if preceding_text:
434
- last_line = preceding_text[-1].strip()
435
- if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
436
- file_name = last_line[len("<file_name>"):-len("</file_name>")].strip()
437
- block_infos['file_name'] = file_name
438
- elif last_line.startswith("## filename:"):
439
- file_name = last_line[len("## filename:"):].strip()
440
- block_infos['file_name'] = file_name
441
- if last_line.startswith("<section>") and last_line.endswith("</section>"):
442
- section = last_line[len("<section>"):-len("</section>")].strip()
443
- block_infos['section'] = section
444
-
445
- sub_text = text[code_delimiter_position + 3:]
446
- if len(sub_text) > 0:
447
- try:
448
- find_space = sub_text.index(" ")
449
- except:
450
- find_space = int(1e10)
451
- try:
452
- find_return = sub_text.index("\n")
453
- except:
454
- find_return = int(1e10)
455
- next_index = min(find_return, find_space)
456
- if '{' in sub_text[:next_index]:
457
- next_index = 0
458
- start_pos = next_index
459
- if code_delimiter_position + 3 < len(text) and text[code_delimiter_position + 3] in ["\n", " ", "\t"]:
460
- block_infos["type"] = 'language-specific'
461
- else:
462
- block_infos["type"] = sub_text[:next_index]
463
675
 
464
- if index + 1 < len(indices):
465
- next_pos = indices[index + 1] - code_delimiter_position
466
- if next_pos - 3 < len(sub_text) and sub_text[next_pos - 3] == "`":
467
- block_infos["content"] = sub_text[start_pos:next_pos - 3].strip()
468
- block_infos["is_complete"] = True
469
- else:
470
- block_infos["content"] = sub_text[start_pos:next_pos].strip()
471
- block_infos["is_complete"] = False
676
+ start_pos = indices[i]
677
+ # --- Extract preceding text for potential file name hints ---
678
+ # Look backwards from start_pos for common patterns
679
+ search_area_start = max(0, start_pos - 200) # Limit search area
680
+ preceding_text_segment = text[search_area_start:start_pos]
681
+ lines = preceding_text_segment.strip().splitlines()
682
+ if lines:
683
+ last_line = lines[-1].strip()
684
+ # Example patterns (adjust as needed)
685
+ if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
686
+ block_infos['file_name'] = last_line[len("<file_name>"):-len("</file_name>")].strip()
687
+ elif last_line.lower().startswith("file:") or last_line.lower().startswith("filename:"):
688
+ block_infos['file_name'] = last_line.split(":", 1)[1].strip()
689
+ # --- End file name extraction ---
690
+
691
+ # Extract content and type based on format
692
+ if format.lower() == "markdown":
693
+ content_start = start_pos + 3 # After ```
694
+ if i + 1 < len(indices):
695
+ end_pos = indices[i + 1]
696
+ content_raw = text[content_start:end_pos]
697
+ block_infos['is_complete'] = True
698
+ else: # Last block is open
699
+ content_raw = text[content_start:]
700
+ block_infos['is_complete'] = False
701
+
702
+ # Check for language specifier on the first line
703
+ first_line_end = content_raw.find('\n')
704
+ if first_line_end != -1:
705
+ first_line = content_raw[:first_line_end].strip()
706
+ if first_line and not first_line.isspace() and ' ' not in first_line: # Basic check for language specifier
707
+ block_infos['type'] = first_line
708
+ content = content_raw[first_line_end + 1:].strip()
472
709
  else:
473
- block_infos["content"] = sub_text[start_pos:].strip()
474
- block_infos["is_complete"] = False
475
- code_blocks.append(block_infos)
476
- is_start = False
477
- else:
478
- is_start = True
479
- continue
710
+ content = content_raw.strip()
711
+ else: # Single line code block or no language specifier
712
+ content = content_raw.strip()
713
+ # If content itself looks like a language specifier, clear it
714
+ if content and not content.isspace() and ' ' not in content and len(content)<20:
715
+ block_infos['type'] = content
716
+ content = ""
717
+
718
+
719
+ elif format.lower() == "html":
720
+ # Find end of opening tag to get content start
721
+ try:
722
+ opening_tag_end = text.index(">", start_pos) + 1
723
+ except ValueError:
724
+ continue # Should not happen if indices are correct
725
+
726
+ opening_tag = text[start_pos:opening_tag_end]
727
+
728
+ if i + 1 < len(indices):
729
+ end_pos = indices[i + 1]
730
+ content = text[opening_tag_end:end_pos].strip()
731
+ block_infos['is_complete'] = True
732
+ else: # Last block is open
733
+ content = text[opening_tag_end:].strip()
734
+ block_infos['is_complete'] = False
735
+
736
+
737
+ # Extract language from class attribute (more robust)
738
+ import re
739
+ match = re.search(r'class\s*=\s*["\']([^"\']*)["\']', opening_tag)
740
+ if match:
741
+ classes = match.group(1).split()
742
+ for cls in classes:
743
+ if cls.startswith("language-"):
744
+ block_infos['type'] = cls[len("language-"):]
745
+ break # Take the first language- class found
746
+
747
+ block_infos['content'] = content
748
+ if block_infos['content'] or block_infos['is_complete']: # Add block if it has content or is closed
749
+ code_blocks.append(block_infos)
480
750
 
481
751
  return code_blocks
482
752
 
753
+
483
754
  def extract_thinking_blocks(self, text: str) -> List[str]:
484
755
  """
485
756
  Extracts content between <thinking> or <think> tags from a given text.
486
-
487
- Parameters:
488
- text (str): The text containing thinking blocks
489
-
490
- Returns:
491
- List[str]: List of extracted thinking contents
757
+ (Implementation remains the same as provided before)
492
758
  """
493
759
  import re
494
-
495
- # Pattern to match both <thinking> and <think> blocks with matching tags
496
760
  pattern = r'<(thinking|think)>(.*?)</\1>'
497
- matches = re.finditer(pattern, text, re.DOTALL)
498
-
499
- # Extract content from the second group (index 2) and clean
761
+ matches = re.finditer(pattern, text, re.DOTALL | re.IGNORECASE) # Added IGNORECASE
500
762
  thinking_blocks = [match.group(2).strip() for match in matches]
501
-
502
763
  return thinking_blocks
503
764
 
504
765
  def remove_thinking_blocks(self, text: str) -> str:
505
766
  """
506
767
  Removes thinking blocks (either <thinking> or <think>) from text including the tags.
507
-
508
- Parameters:
509
- text (str): The text containing thinking blocks
510
-
511
- Returns:
512
- str: Text with thinking blocks removed
768
+ (Implementation remains the same as provided before)
513
769
  """
514
770
  import re
515
-
516
- # Pattern to remove both <thinking> and <think> blocks with matching tags
517
- pattern = r'<(thinking|think)>.*?</\1>'
518
- cleaned_text = re.sub(pattern, '', text, flags=re.DOTALL)
519
-
520
- # Remove extra whitespace and normalize newlines
521
- cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text.strip())
522
-
771
+ pattern = r'<(thinking|think)>.*?</\1>\s*' # Added \s* to remove potential trailing whitespace/newlines
772
+ cleaned_text = re.sub(pattern, '', text, flags=re.DOTALL | re.IGNORECASE) # Added IGNORECASE
773
+ # Further cleanup might be needed depending on desired newline handling
774
+ cleaned_text = re.sub(r'\n{3,}', '\n\n', cleaned_text).strip() # Collapse excess newlines
523
775
  return cleaned_text
524
776
 
777
+ # --- Task-oriented methods (Candidates for moving to TasksLibrary) ---
778
+ # Keeping them here for now, but they primarily use generate_code/generate_text
779
+
525
780
  def yes_no(
526
781
  self,
527
782
  question: str,
@@ -532,174 +787,242 @@ Do not split the code in multiple tags.
532
787
  callback = None
533
788
  ) -> bool | dict:
534
789
  """
535
- Answers a yes/no question.
536
-
537
- Args:
538
- question (str): The yes/no question to answer.
539
- context (str, optional): Additional context to provide for the question.
540
- max_answer_length (int, optional): Maximum string length allowed for the response. Defaults to None.
541
- conditionning (str, optional): An optional system message to put at the beginning of the prompt.
542
- return_explanation (bool, optional): If True, returns a dictionary with the answer and explanation. Defaults to False.
543
-
544
- Returns:
545
- bool or dict:
546
- - If return_explanation is False, returns a boolean (True for 'yes', False for 'no').
547
- - If return_explanation is True, returns a dictionary with the answer and explanation.
790
+ Answers a yes/no question using LLM JSON generation.
791
+ (Implementation requires self.generate_code which uses self.generate_text)
548
792
  """
793
+ # ... (Implementation as provided before, relies on self.generate_code) ...
549
794
  if not callback:
550
795
  callback=self.sink
551
796
 
552
- prompt = f"{conditionning}\nQuestion: {question}\nContext: {context}\n"
553
-
554
- template = """
555
- {
556
- "answer": true | false,
557
- "explanation": "Optional explanation if return_explanation is True"
558
- }
559
- """
560
-
561
- response = self.generate_text_code(
797
+ prompt = f"{self.system_full_header}{conditionning}\n{self.user_full_header}Based on the context, answer the question with only 'true' or 'false' and provide a brief explanation.\nContext:\n{context}\nQuestion: {question}\n{self.ai_full_header}"
798
+
799
+ template = """{
800
+ "answer": true | false, // boolean required
801
+ "explanation": "A brief explanation for the answer"
802
+ }"""
803
+
804
+ # Assuming generate_code exists and works as intended
805
+ response_json_str = self.generate_code(
562
806
  prompt=prompt,
563
- template=template,
564
807
  language="json",
808
+ template=template,
565
809
  code_tag_format="markdown",
566
810
  max_size=max_answer_length,
567
811
  callback=callback
568
812
  )
569
-
813
+
814
+ if response_json_str is None:
815
+ ASCIIColors.error("LLM failed to generate JSON for yes/no question.")
816
+ return {"answer": False, "explanation": "Generation failed"} if return_explanation else False
817
+
570
818
  try:
571
- parsed_response = json.loads(response)
572
- answer = parsed_response.get("answer", False)
819
+ # Attempt to repair minor JSON issues before parsing
820
+ import json
821
+ import re
822
+ # Remove potential comments, trailing commas etc.
823
+ response_json_str = re.sub(r"//.*", "", response_json_str)
824
+ response_json_str = re.sub(r",\s*}", "}", response_json_str)
825
+ response_json_str = re.sub(r",\s*]", "]", response_json_str)
826
+
827
+ parsed_response = json.loads(response_json_str)
828
+ answer = parsed_response.get("answer")
573
829
  explanation = parsed_response.get("explanation", "")
574
-
830
+
831
+ # Validate boolean type
832
+ if not isinstance(answer, bool):
833
+ # Attempt to coerce common string representations
834
+ if isinstance(answer, str):
835
+ answer_lower = answer.lower()
836
+ if answer_lower == 'true':
837
+ answer = True
838
+ elif answer_lower == 'false':
839
+ answer = False
840
+ else:
841
+ raise ValueError("Answer is not a valid boolean representation.")
842
+ else:
843
+ raise ValueError("Answer is not a boolean.")
844
+
845
+
575
846
  if return_explanation:
576
847
  return {"answer": answer, "explanation": explanation}
577
848
  else:
578
849
  return answer
579
- except json.JSONDecodeError:
580
- return False
850
+ except (json.JSONDecodeError, ValueError) as e:
851
+ ASCIIColors.error(f"Failed to parse or validate JSON response for yes/no: {e}")
852
+ ASCIIColors.error(f"Received: {response_json_str}")
853
+ # Fallback: try simple string check in the raw LLM output (less reliable)
854
+ if "true" in response_json_str.lower():
855
+ answer_fallback = True
856
+ elif "false" in response_json_str.lower():
857
+ answer_fallback = False
858
+ else:
859
+ answer_fallback = False # Default to false on ambiguity
860
+
861
+ if return_explanation:
862
+ return {"answer": answer_fallback, "explanation": f"Parsing failed ({e}). Fallback used."}
863
+ else:
864
+ return answer_fallback
865
+
581
866
 
582
867
  def multichoice_question(
583
- self,
584
- question: str,
585
- possible_answers: list,
586
- context: str = "",
587
- max_answer_length: int = None,
588
- conditionning: str = "",
868
+ self,
869
+ question: str,
870
+ possible_answers: list,
871
+ context: str = "",
872
+ max_answer_length: int = None,
873
+ conditionning: str = "",
589
874
  return_explanation: bool = False,
590
875
  callback = None
591
- ) -> dict:
876
+ ) -> int | dict: # Corrected return type hint
592
877
  """
593
- Interprets a multi-choice question from a user's response. This function expects only one choice as true.
594
- All other choices are considered false. If none are correct, returns -1.
595
-
596
- Args:
597
- question (str): The multi-choice question posed by the user.
598
- possible_answers (List[Any]): A list containing all valid options for the chosen value.
599
- context (str, optional): Additional context to provide for the question.
600
- max_answer_length (int, optional): Maximum string length allowed while interpreting the user's responses. Defaults to None.
601
- conditionning (str, optional): An optional system message to put at the beginning of the prompt.
602
- return_explanation (bool, optional): If True, returns a dictionary with the choice and explanation. Defaults to False.
603
-
604
- Returns:
605
- dict:
606
- - If return_explanation is False, returns a JSON object with only the selected choice index.
607
- - If return_explanation is True, returns a JSON object with the selected choice index and an explanation.
608
- - Returns {"index": -1} if no match is found among the possible answers.
878
+ Interprets a multi-choice question using LLM JSON generation.
879
+ (Implementation requires self.generate_code which uses self.generate_text)
609
880
  """
881
+ # ... (Implementation as provided before, relies on self.generate_code) ...
610
882
  if not callback:
611
883
  callback=self.sink
612
-
613
- prompt = f"""
614
- {conditionning}\n
615
- QUESTION:\n{question}\n
616
- POSSIBLE ANSWERS:\n"""
617
- for i, answer in enumerate(possible_answers):
618
- prompt += f"{i}. {answer}\n"
619
-
884
+
885
+ choices_text = "\n".join([f"{i}. {ans}" for i, ans in enumerate(possible_answers)])
886
+
887
+ prompt = f"{self.system_full_header}{conditionning}\n"
888
+ prompt += f"{self.user_full_header}Answer the following multiple-choice question based on the context. Respond with a JSON object containing the index of the single best answer and an optional explanation.\n"
620
889
  if context:
621
- prompt += f"\nADDITIONAL CONTEXT:\n{context}\n"
622
-
623
- prompt += "\nRespond with a JSON object containing:\n"
624
- if return_explanation:
625
- prompt += "{\"index\": (the selected answer index), \"explanation\": (reasoning for selection)}"
626
- else:
627
- prompt += "{\"index\": (the selected answer index)}"
628
-
629
- response = self.generate_text_code(prompt, language="json", max_size=max_answer_length,
630
- accept_all_if_no_code_tags_is_present=True, return_full_generated_code=False, callback=callback)
631
-
890
+ prompt += f"Context:\n{context}\n"
891
+ prompt += f"Question:\n{question}\n"
892
+ prompt += f"Possible Answers:\n{choices_text}\n"
893
+ prompt += f"{self.ai_full_header}"
894
+
895
+ template = """{
896
+ "index": 0, // integer index required
897
+ "explanation": "Optional explanation for the choice"
898
+ }"""
899
+
900
+ response_json_str = self.generate_code(
901
+ prompt=prompt,
902
+ template=template,
903
+ language="json",
904
+ code_tag_format="markdown",
905
+ max_size=max_answer_length,
906
+ callback=callback
907
+ )
908
+
909
+ if response_json_str is None:
910
+ ASCIIColors.error("LLM failed to generate JSON for multichoice question.")
911
+ return {"index": -1, "explanation": "Generation failed"} if return_explanation else -1
912
+
632
913
  try:
633
- result = json.loads(response)
634
- if return_explanation:
635
- if "index" in result and isinstance(result["index"], int):
636
- return result["index"], result["index"]
637
- else:
638
- if "index" in result and isinstance(result["index"], int):
639
- return result["index"]
640
- except json.JSONDecodeError:
914
+ # Attempt to repair minor JSON issues before parsing
915
+ import json
916
+ import re
917
+ response_json_str = re.sub(r"//.*", "", response_json_str)
918
+ response_json_str = re.sub(r",\s*}", "}", response_json_str)
919
+ response_json_str = re.sub(r",\s*]", "]", response_json_str)
920
+
921
+ result = json.loads(response_json_str)
922
+ index = result.get("index")
923
+ explanation = result.get("explanation", "")
924
+
925
+ if not isinstance(index, int) or not (0 <= index < len(possible_answers)):
926
+ raise ValueError(f"Invalid index returned: {index}")
927
+
641
928
  if return_explanation:
642
- return -1, "failed to decide"
929
+ return {"index": index, "explanation": explanation}
643
930
  else:
644
- return -1
645
-
931
+ return index
932
+ except (json.JSONDecodeError, ValueError) as e:
933
+ ASCIIColors.error(f"Failed to parse or validate JSON response for multichoice: {e}")
934
+ ASCIIColors.error(f"Received: {response_json_str}")
935
+ # Fallback logic could be added here (e.g., regex for index) but is less reliable
936
+ return {"index": -1, "explanation": f"Parsing failed ({e})."} if return_explanation else -1
937
+
938
+
646
939
  def multichoice_ranking(
647
- self,
648
- question: str,
649
- possible_answers: list,
650
- context: str = "",
651
- max_answer_length: int = 512,
652
- conditionning: str = "",
940
+ self,
941
+ question: str,
942
+ possible_answers: list,
943
+ context: str = "",
944
+ max_answer_length: int = None,
945
+ conditionning: str = "",
653
946
  return_explanation: bool = False,
654
947
  callback = None
655
948
  ) -> dict:
656
949
  """
657
- Ranks answers for a question from best to worst. Returns a JSON object containing the ranked order.
658
-
659
- Args:
660
- question (str): The question for which the answers are being ranked.
661
- possible_answers (List[Any]): A list of possible answers to rank.
662
- context (str, optional): Additional context to provide for the question.
663
- max_answer_length (int, optional): Maximum string length allowed for the response. Defaults to 50.
664
- conditionning (str, optional): An optional system message to put at the beginning of the prompt.
665
- return_explanation (bool, optional): If True, returns a dictionary with the ranked order and explanations. Defaults to False.
666
-
667
- Returns:
668
- dict:
669
- - If return_explanation is False, returns a JSON object with only the ranked order.
670
- - If return_explanation is True, returns a JSON object with the ranked order and explanations.
950
+ Ranks answers for a question from best to worst using LLM JSON generation.
951
+ (Implementation requires self.generate_code which uses self.generate_text)
671
952
  """
953
+ # ... (Implementation as provided before, relies on self.generate_code) ...
672
954
  if not callback:
673
- callback=self.sink
674
-
675
- prompt = f"""
676
- {conditionning}\n
677
- QUESTION:\n{question}\n
678
- POSSIBLE ANSWERS:\n"""
679
- for i, answer in enumerate(possible_answers):
680
- prompt += f"{i}. {answer}\n"
681
-
955
+ callback = self.sink
956
+
957
+ choices_text = "\n".join([f"{i}. {ans}" for i, ans in enumerate(possible_answers)])
958
+
959
+ prompt = f"{self.system_full_header}{conditionning}\n"
960
+ prompt += f"{self.user_full_header}Rank the following answers to the question from best to worst based on the context. Respond with a JSON object containing a list of indices in ranked order and an optional list of explanations.\n"
682
961
  if context:
683
- prompt += f"\nADDITIONAL CONTEXT:\n{context}\n"
684
-
685
- prompt += "\nRespond with a JSON object containing:\n"
686
- if return_explanation:
687
- prompt += "{\"ranking\": (list of indices ordered from best to worst), \"explanations\": (list of reasons for each ranking)}"
688
- else:
689
- prompt += "{\"ranking\": (list of indices ordered from best to worst)}"
690
-
691
- response = self.generate_text_code(prompt, language="json", return_full_generated_code=False, callback=callback)
692
-
962
+ prompt += f"Context:\n{context}\n"
963
+ prompt += f"Question:\n{question}\n"
964
+ prompt += f"Possible Answers to Rank:\n{choices_text}\n"
965
+ prompt += f"{self.ai_full_header}"
966
+
967
+ template = """{
968
+ "ranking": [0, 1, 2], // list of integer indices required, length must match number of answers
969
+ "explanations": ["Optional explanation 1", "Optional explanation 2", "Optional explanation 3"] // Optional list of strings
970
+ }"""
971
+
972
+ response_json_str = self.generate_code(
973
+ prompt=prompt,
974
+ template=template,
975
+ language="json",
976
+ code_tag_format="markdown",
977
+ max_size=max_answer_length,
978
+ callback=callback
979
+ )
980
+
981
+ default_return = {"ranking": [], "explanations": []} if return_explanation else {"ranking": []}
982
+
983
+ if response_json_str is None:
984
+ ASCIIColors.error("LLM failed to generate JSON for ranking.")
985
+ return default_return
986
+
693
987
  try:
694
- result = json.loads(response)
695
- if "ranking" in result and isinstance(result["ranking"], list):
696
- return result
697
- except json.JSONDecodeError:
698
- return {"ranking": []}
699
-
700
-
988
+ # Attempt to repair minor JSON issues before parsing
989
+ import json
990
+ import re
991
+ response_json_str = re.sub(r"//.*", "", response_json_str)
992
+ response_json_str = re.sub(r",\s*}", "}", response_json_str)
993
+ response_json_str = re.sub(r",\s*]", "]", response_json_str)
994
+
995
+ result = json.loads(response_json_str)
996
+ ranking = result.get("ranking")
997
+ explanations = result.get("explanations", []) if return_explanation else None
998
+
999
+ # Validation
1000
+ if not isinstance(ranking, list) or len(ranking) != len(possible_answers):
1001
+ raise ValueError("Ranking is not a list or has incorrect length.")
1002
+ if not all(isinstance(idx, int) and 0 <= idx < len(possible_answers) for idx in ranking):
1003
+ raise ValueError("Ranking contains invalid indices.")
1004
+ if len(set(ranking)) != len(possible_answers):
1005
+ raise ValueError("Ranking contains duplicate indices.")
1006
+ if return_explanation and not isinstance(explanations, list):
1007
+ ASCIIColors.warning("Explanations format is invalid, returning ranking only.")
1008
+ explanations = None # Ignore invalid explanations
1009
+
1010
+
1011
+ if return_explanation:
1012
+ return {"ranking": ranking, "explanations": explanations or [""] * len(ranking)} # Provide empty strings if explanations were invalid/missing
1013
+ else:
1014
+ return {"ranking": ranking}
1015
+
1016
+ except (json.JSONDecodeError, ValueError) as e:
1017
+ ASCIIColors.error(f"Failed to parse or validate JSON response for ranking: {e}")
1018
+ ASCIIColors.error(f"Received: {response_json_str}")
1019
+ return default_return
1020
+
1021
+ # --- Summarization / Analysis Methods (Candidates for TasksLibrary) ---
1022
+ # These use generate_text and tokenization/detokenization
1023
+
701
1024
  def sequential_summarize(
702
- self,
1025
+ self,
703
1026
  text:str,
704
1027
  chunk_processing_prompt:str="Extract relevant information from the current text chunk and update the memory if needed.",
705
1028
  chunk_processing_output_format="markdown",
@@ -707,42 +1030,43 @@ Do not split the code in multiple tags.
707
1030
  final_output_format="markdown",
708
1031
  ctx_size:int=None,
709
1032
  chunk_size:int=None,
1033
+ overlap:int=None, # Added overlap
710
1034
  bootstrap_chunk_size:int=None,
711
1035
  bootstrap_steps:int=None,
712
1036
  callback = None,
713
1037
  debug:bool= False):
714
1038
  """
715
- This function processes a given text in chunks and generates a summary for each chunk.
716
- It then combines the summaries to create a final summary.
717
-
718
- Parameters:
719
- text (str): The input text to be summarized.
720
- chunk_processing_prompt (str, optional): The prompt used for processing each chunk. Defaults to "".
721
- chunk_processing_output_format (str, optional): The format of the output for each chunk. Defaults to "markdown".
722
- final_memory_processing_prompt (str, optional): The prompt used for processing the final memory. Defaults to "Create final summary using this memory.".
723
- final_output_format (str, optional): The format of the final output. Defaults to "markdown".
724
- ctx_size (int, optional): The size of the context. Defaults to None.
725
- chunk_size (int, optional): The size of each chunk. Defaults to None.
726
- callback (callable, optional): A function to be called after processing each chunk. Defaults to None.
727
- debug (bool, optional): A flag to enable debug mode. Defaults to False.
728
-
729
- Returns:
730
- The final summary in the specified format.
1039
+ Processes text in chunks sequentially, updating a memory at each step.
1040
+ (Implementation requires self.tokenize, self.detokenize, self.generate_text, self.extract_code_blocks)
731
1041
  """
1042
+ # ... (Implementation as provided before, relies on core methods) ...
1043
+ if not callback:
1044
+ callback = self.sink
1045
+
732
1046
  if ctx_size is None:
733
- ctx_size = self.ctx_size
734
-
1047
+ ctx_size = self.default_ctx_size or 8192 # Provide a fallback default
735
1048
  if chunk_size is None:
736
- chunk_size = ctx_size//4
737
-
1049
+ chunk_size = ctx_size // 4
1050
+ if overlap is None:
1051
+ overlap = chunk_size // 10 # Default overlap
1052
+ if bootstrap_chunk_size is None:
1053
+ bootstrap_chunk_size = chunk_size // 2 # Smaller initial chunks
1054
+ if bootstrap_steps is None:
1055
+ bootstrap_steps = 2 # Process first few chunks smaller
1056
+
738
1057
  # Tokenize entire text
739
- all_tokens = self.tokenize(text)
1058
+ try:
1059
+ all_tokens = self.tokenize(text)
1060
+ except RuntimeError as e:
1061
+ ASCIIColors.error(f"Tokenization failed: {e}")
1062
+ return "Error: Could not tokenize input text."
740
1063
  total_tokens = len(all_tokens)
741
-
1064
+
742
1065
  # Initialize memory and chunk index
743
1066
  memory = ""
744
1067
  start_token_idx = 0
745
-
1068
+ chunk_id = 0
1069
+
746
1070
  # Create static prompt template
747
1071
  static_prompt_template = f"""{self.system_full_header}
748
1072
  You are a structured sequential text summary assistant that processes documents chunk by chunk, updating a memory of previously generated information at each step.
@@ -756,9 +1080,7 @@ Update the memory by merging previous information with new details from this tex
756
1080
  Only add information explicitly present in the chunk. Retain all relevant prior memory unless clarified or updated by the current chunk.
757
1081
 
758
1082
  ----
759
- # Text chunk:
760
- # Chunk number: {{chunk_id}}
761
- ----
1083
+ Text chunk (Chunk number: {{chunk_id}}):
762
1084
  ```markdown
763
1085
  {{chunk}}
764
1086
  ```
@@ -771,263 +1093,525 @@ Before updating, verify each requested detail:
771
1093
 
772
1094
  Include only confirmed details in the output.
773
1095
  Rewrite the full memory including the updates and keeping relevant data.
774
- Do not discuss the information inside thememory, just put the relevant information without comments.
775
-
776
- ----
777
- # Current document analysis memory:
1096
+ Do not discuss the information inside the memory, just put the relevant information without comments.
1097
+ The output memory must be put inside a {chunk_processing_output_format} markdown code block.
778
1098
  ----
1099
+ Current document analysis memory:
779
1100
  ```{chunk_processing_output_format}
780
1101
  {{memory}}
781
1102
  ```
782
1103
  {self.ai_full_header}
783
- """
784
- # Calculate static prompt tokens (with empty memory and chunk)
785
- chunk_id=0
786
- example_prompt = static_prompt_template.format(custom_prompt=chunk_processing_prompt if chunk_processing_prompt else '', memory="", chunk="", chunk_id=chunk_id)
787
- static_tokens = len(self.tokenize(example_prompt))
788
-
1104
+ ```{chunk_processing_output_format}
1105
+ """ # Added start of code block for AI
1106
+
1107
+ # Calculate static prompt tokens (with estimated placeholders)
1108
+ example_prompt = static_prompt_template.format(
1109
+ custom_prompt=chunk_processing_prompt,
1110
+ memory="<est_memory>",
1111
+ chunk="<est_chunk>",
1112
+ chunk_id=0
1113
+ )
1114
+ try:
1115
+ static_tokens = len(self.tokenize(example_prompt)) - len(self.tokenize("<est_memory>")) - len(self.tokenize("<est_chunk>"))
1116
+ except RuntimeError as e:
1117
+ ASCIIColors.error(f"Tokenization failed during setup: {e}")
1118
+ return "Error: Could not calculate prompt size."
1119
+
789
1120
  # Process text in chunks
790
1121
  while start_token_idx < total_tokens:
791
- # Calculate available tokens for chunk
792
- current_memory_tokens = len(self.tokenize(memory))
793
- available_tokens = ctx_size - static_tokens - current_memory_tokens
794
-
795
- if available_tokens <= 0:
796
- raise ValueError("Memory too large - consider reducing chunk size or increasing context window")
797
-
798
- # Get chunk tokens
799
- if bootstrap_chunk_size is not None and chunk_id < bootstrap_steps:
800
- end_token_idx = min(start_token_idx + bootstrap_chunk_size, total_tokens)
801
- else:
802
- end_token_idx = min(start_token_idx + chunk_size, total_tokens)
1122
+ # Calculate available tokens for chunk + memory
1123
+ available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024) # Reserve space for output
1124
+ if available_tokens_for_dynamic_content <= 100: # Need some minimum space
1125
+ ASCIIColors.error("Context size too small for summarization with current settings.")
1126
+ return "Error: Context size too small."
1127
+
1128
+ # Estimate token split between memory and chunk (e.g., 50/50)
1129
+ max_memory_tokens = available_tokens_for_dynamic_content // 2
1130
+ max_chunk_tokens = available_tokens_for_dynamic_content - max_memory_tokens
1131
+
1132
+ # Truncate memory if needed
1133
+ current_memory_tokens = self.tokenize(memory)
1134
+ if len(current_memory_tokens) > max_memory_tokens:
1135
+ memory = self.detokenize(current_memory_tokens[-max_memory_tokens:]) # Keep recent memory
1136
+ if debug: ASCIIColors.yellow(f"Memory truncated to {max_memory_tokens} tokens.")
1137
+
1138
+ # Determine actual chunk size based on remaining space and settings
1139
+ current_chunk_size = bootstrap_chunk_size if chunk_id < bootstrap_steps else chunk_size
1140
+ current_chunk_size = min(current_chunk_size, max_chunk_tokens) # Adjust chunk size based on available space
1141
+
1142
+ end_token_idx = min(start_token_idx + current_chunk_size, total_tokens)
803
1143
  chunk_tokens = all_tokens[start_token_idx:end_token_idx]
804
1144
  chunk = self.detokenize(chunk_tokens)
805
- chunk_id +=1
806
-
1145
+
1146
+ chunk_id += 1
1147
+ callback(f"Processing chunk {chunk_id}...", MSG_TYPE.MSG_TYPE_STEP)
1148
+
807
1149
  # Generate memory update
808
- prompt = static_prompt_template.format(custom_prompt=chunk_processing_prompt if chunk_processing_prompt else '', memory=memory, chunk=chunk, chunk_id=chunk_id)
1150
+ prompt = static_prompt_template.format(
1151
+ custom_prompt=chunk_processing_prompt,
1152
+ memory=memory,
1153
+ chunk=chunk,
1154
+ chunk_id=chunk_id
1155
+ )
809
1156
  if debug:
810
- ASCIIColors.yellow(f" ----- {chunk_id-1} ------")
811
- ASCIIColors.red(prompt)
812
-
813
- memory = self.generate_text(prompt, n_predict=ctx_size//4, streaming_callback=callback).strip()
814
- code = self.extract_code_blocks(memory)
815
- if code:
816
- memory=code[0]["content"]
817
-
1157
+ ASCIIColors.magenta(f"--- Chunk {chunk_id} Prompt ---")
1158
+ ASCIIColors.cyan(prompt)
1159
+
1160
+ response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
1161
+
1162
+ if isinstance(response, dict): # Handle generation error
1163
+ ASCIIColors.error(f"Chunk {chunk_id} processing failed: {response.get('error')}")
1164
+ # Option: skip chunk or stop? Let's skip for now.
1165
+ start_token_idx = end_token_idx # Move to next chunk index
1166
+ continue
1167
+
1168
+ memory_code_blocks = self.extract_code_blocks(response, format=chunk_processing_output_format)
1169
+ if memory_code_blocks:
1170
+ memory = memory_code_blocks[0]["content"] # Assume first block is the memory
1171
+ else:
1172
+ # Fallback: Try to extract from the end if the AI added text after the block
1173
+ end_tag = f"```{chunk_processing_output_format}"
1174
+ last_occurrence = response.rfind(end_tag)
1175
+ if last_occurrence != -1:
1176
+ # Extract content between the start and end tags
1177
+ start_tag_len = len(f"```{chunk_processing_output_format}\n") # Approx
1178
+ potential_memory = response[last_occurrence + start_tag_len:].strip()
1179
+ if potential_memory.endswith("```"):
1180
+ potential_memory = potential_memory[:-3].strip()
1181
+ if potential_memory: # Use if non-empty
1182
+ memory = potential_memory
1183
+ else: # If extraction failed, keep old memory or use raw response? Use raw response for now.
1184
+ ASCIIColors.warning(f"Could not extract memory block for chunk {chunk_id}. Using raw response.")
1185
+ memory = response.strip().rstrip('```') # Basic cleanup
1186
+ else:
1187
+ ASCIIColors.warning(f"Could not extract memory block for chunk {chunk_id}. Using raw response.")
1188
+ memory = response.strip().rstrip('```')
1189
+
1190
+
818
1191
  if debug:
819
- ASCIIColors.yellow(f" ----- OUT ------")
820
- ASCIIColors.yellow(memory)
821
- ASCIIColors.yellow(" ----- ------")
822
- # Move to next chunk
823
- start_token_idx = end_token_idx
824
-
825
- # Prepare final summary prompt
826
- final_prompt_template = f"""!@>system:
827
- You are a memory summarizer assistant that helps users format their memory information into coherant text in a specific style or format.
1192
+ ASCIIColors.magenta(f"--- Chunk {chunk_id} Updated Memory ---")
1193
+ ASCIIColors.green(memory)
1194
+ ASCIIColors.magenta("----------------------------")
1195
+
1196
+ # Move to next chunk start, considering overlap
1197
+ start_token_idx = max(start_token_idx, end_token_idx - overlap) if overlap>0 and end_token_idx < total_tokens else end_token_idx
1198
+
1199
+
1200
+ # --- Final Aggregation Step ---
1201
+ callback("Aggregating final summary...", MSG_TYPE.MSG_TYPE_STEP)
1202
+ final_prompt_template = f"""{self.system_full_header}
1203
+ You are a memory summarizer assistant.
828
1204
  {final_memory_processing_prompt}.
829
- !@>user:
830
- Here is my document analysis memory:
1205
+ {self.user_full_header}
1206
+ Here is the document analysis memory:
831
1207
  ```{chunk_processing_output_format}
832
- {memory}
1208
+ {{memory}}
833
1209
  ```
834
- The output must be put inside a {final_output_format} markdown tag.
835
- The updated memory must be put in a {chunk_processing_output_format} markdown tag.
836
- !@>assistant:
1210
+ The final output must be put inside a {final_output_format} markdown tag.
1211
+ {self.ai_full_header}
1212
+ ```{final_output_format}
837
1213
  """
838
- # Truncate memory if needed for final prompt
839
- example_final_prompt = final_prompt_template
840
- final_static_tokens = len(self.tokenize(example_final_prompt))
841
- available_final_tokens = ctx_size - final_static_tokens
842
-
1214
+
1215
+ # Truncate memory if needed for the final prompt
1216
+ final_example_prompt = final_prompt_template.format(memory="<final_memory>")
1217
+ try:
1218
+ final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
1219
+ available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024) # Reserve space for output
1220
+ except RuntimeError as e:
1221
+ ASCIIColors.error(f"Tokenization failed during final setup: {e}")
1222
+ return "Error: Could not calculate final prompt size."
1223
+
1224
+
843
1225
  memory_tokens = self.tokenize(memory)
844
1226
  if len(memory_tokens) > available_final_tokens:
845
- memory = self.detokenize(memory_tokens[:available_final_tokens])
846
-
1227
+ memory = self.detokenize(memory_tokens[-available_final_tokens:]) # Keep most recent info
1228
+ if debug: ASCIIColors.yellow(f"Final memory truncated to {available_final_tokens} tokens.")
1229
+
847
1230
  # Generate final summary
848
- final_prompt = final_prompt_template
849
- memory = self.generate_text(final_prompt, streaming_callback=callback)
850
- code = self.extract_code_blocks(memory)
851
- if code:
852
- memory=code[0]["content"]
853
- return memory
1231
+ final_prompt = final_prompt_template.format(memory=memory)
1232
+ if debug:
1233
+ ASCIIColors.magenta("--- Final Aggregation Prompt ---")
1234
+ ASCIIColors.cyan(final_prompt)
854
1235
 
855
- def deep_analyze(
856
- self,
857
- query: str,
858
- text: str = None,
859
- files: list = None,
860
- search_prompt: str = "Extract information related to the query from the current text chunk and update the memory with new findings.",
861
- aggregation_prompt: str = None,
862
- output_format: str = "markdown",
863
- ctx_size: int = None,
864
- chunk_size: int = None,
865
- bootstrap_chunk_size: int = None,
866
- bootstrap_steps: int = None,
867
- callback=None,
868
- debug: bool = False
869
- ):
870
- """
871
- Searches for specific information related to a query in a long text or a list of files.
872
- Processes the input in chunks, updates a memory with relevant findings, and optionally aggregates them.
873
-
874
- Parameters:
875
- - query (str): The query to search for.
876
- - text (str, optional): The input text to search in. Defaults to None.
877
- - files (list, optional): List of file paths to search in. Defaults to None.
878
- - search_prompt (str, optional): Prompt for processing each chunk. Defaults to a standard extraction prompt.
879
- - aggregation_prompt (str, optional): Prompt for aggregating findings. Defaults to None.
880
- - output_format (str, optional): Output format. Defaults to "markdown".
881
- - ctx_size (int, optional): Context size for the model. Defaults to None (uses self.ctx_size).
882
- - chunk_size (int, optional): Size of each chunk. Defaults to None (ctx_size // 4). Smaller chunk sizes yield better results but is slower.
883
- - bootstrap_chunk_size (int, optional): Size for initial chunks. Defaults to None.
884
- - bootstrap_steps (int, optional): Number of initial chunks using bootstrap size. Defaults to None.
885
- - callback (callable, optional): Function called after each chunk. Defaults to None.
886
- - debug (bool, optional): Enable debug output. Defaults to False.
887
-
888
- Returns:
889
- - str: The search findings or aggregated output in the specified format.
890
- """
891
- # Set defaults
892
- if ctx_size is None:
893
- ctx_size = self.ctx_size
894
- if chunk_size is None:
895
- chunk_size = ctx_size // 4
896
-
897
- # Prepare input
898
- if files:
899
- all_texts = [(file, open(file, 'r', encoding='utf-8').read()) for file in files]
900
- elif text:
901
- all_texts = [("input_text", text)]
902
- else:
903
- raise ValueError("Either text or files must be provided.")
1236
+ final_summary_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
904
1237
 
905
- # Initialize memory and chunk counter
906
- memory = ""
907
- chunk_id = 0
1238
+ if isinstance(final_summary_raw, dict):
1239
+ ASCIIColors.error(f"Final aggregation failed: {final_summary_raw.get('error')}")
1240
+ return "Error: Final aggregation failed."
908
1241
 
909
- # Define search prompt template using f-string and the provided search_prompt
910
- search_prompt_template = f"""{self.system_full_header}
911
- You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a memory of findings at each step.
1242
+ final_code_blocks = self.extract_code_blocks(final_summary_raw, format=final_output_format)
1243
+ if final_code_blocks:
1244
+ final_summary = final_code_blocks[0]["content"]
1245
+ else:
1246
+ # Fallback similar to chunk processing
1247
+ end_tag = f"```{final_output_format}"
1248
+ last_occurrence = final_summary_raw.rfind(end_tag)
1249
+ if last_occurrence != -1:
1250
+ start_tag_len = len(f"```{final_output_format}\n") # Approx
1251
+ potential_summary = final_summary_raw[last_occurrence + start_tag_len:].strip()
1252
+ if potential_summary.endswith("```"):
1253
+ potential_summary = potential_summary[:-3].strip()
1254
+ final_summary = potential_summary if potential_summary else final_summary_raw.strip().rstrip('```')
1255
+ else:
1256
+ final_summary = final_summary_raw.strip().rstrip('```')
1257
+ ASCIIColors.warning("Could not extract final summary block. Using raw response.")
1258
+
1259
+ if debug:
1260
+ ASCIIColors.magenta("--- Final Summary ---")
1261
+ ASCIIColors.green(final_summary)
1262
+ ASCIIColors.magenta("-------------------")
1263
+
1264
+ return final_summary
912
1265
 
913
- Your goal is to extract and combine relevant information from each text chunk with the existing memory, ensuring no key details are omitted or invented.
914
1266
 
1267
+ def deep_analyze(
1268
+ self,
1269
+ query: str,
1270
+ text: str = None,
1271
+ files: Optional[List[Union[str, Path]]] = None,
1272
+ aggregation_prompt: str = "Aggregate the findings from the memory into a coherent answer to the original query.",
1273
+ output_format: str = "markdown",
1274
+ ctx_size: int = None,
1275
+ chunk_size: int = None,
1276
+ overlap: int = None, # Added overlap
1277
+ bootstrap_chunk_size: int = None,
1278
+ bootstrap_steps: int = None,
1279
+ callback=None,
1280
+ debug: bool = False
1281
+ ):
1282
+ """
1283
+ Searches for information related to a query in long text or files, processing chunk by chunk.
1284
+ (Implementation requires self.tokenize, self.detokenize, self.generate_text, self.extract_code_blocks)
1285
+ """
1286
+ # ... (Implementation mostly similar to previous version, but needs updates) ...
1287
+ if not callback:
1288
+ callback=self.sink
1289
+
1290
+ # Set defaults and validate input
1291
+ if ctx_size is None:
1292
+ ctx_size = self.default_ctx_size or 8192
1293
+ if chunk_size is None:
1294
+ chunk_size = ctx_size // 4
1295
+ if overlap is None:
1296
+ overlap = chunk_size // 10
1297
+ if bootstrap_chunk_size is None:
1298
+ bootstrap_chunk_size = chunk_size // 2
1299
+ if bootstrap_steps is None:
1300
+ bootstrap_steps = 2
1301
+
1302
+ if not text and not files:
1303
+ raise ValueError("Either 'text' or 'files' must be provided.")
1304
+ if text and files:
1305
+ ASCIIColors.warning("Both 'text' and 'files' provided. Processing 'files' only.")
1306
+ text = None # Prioritize files if both are given
1307
+
1308
+ # Prepare input texts from files or the single text string
1309
+ all_texts = []
1310
+ if files:
1311
+ from docling import DocumentConverter # Lazy import
1312
+ converter = DocumentConverter()
1313
+ callback("Loading and converting files...", MSG_TYPE.MSG_TYPE_STEP)
1314
+ for i, file_path in enumerate(files):
1315
+ file_p = Path(file_path)
1316
+ callback(f"Processing file {i+1}/{len(files)}: {file_p.name}", MSG_TYPE.MSG_TYPE_STEP_PROGRESS, {"progress":(i+1)/len(files)*100})
1317
+ try:
1318
+ if file_p.exists():
1319
+ file_content_result = converter.convert(file_p)
1320
+ if file_content_result and file_content_result.document:
1321
+ # Exporting to markdown for consistent processing
1322
+ all_texts.append((str(file_path), file_content_result.document.export_to_markdown()))
1323
+ else:
1324
+ ASCIIColors.error(f"Could not convert file: {file_path}")
1325
+ else:
1326
+ ASCIIColors.error(f"File not found: {file_path}")
1327
+ except Exception as e:
1328
+ ASCIIColors.error(f"Error processing file {file_path}: {e}")
1329
+ trace_exception(e)
1330
+ callback("File processing complete.", MSG_TYPE.MSG_TYPE_STEP_END)
1331
+
1332
+ elif text:
1333
+ all_texts = [("input_text", text)]
1334
+
1335
+ if not all_texts:
1336
+ return "Error: No valid text content found to analyze."
1337
+
1338
+ # Initialize memory and counters
1339
+ memory = ""
1340
+ global_chunk_id = 0
1341
+
1342
+ # Define prompts (can be customized)
1343
+ def update_memory_prompt_template(file_name, file_chunk_id, global_chunk_id, chunk, memory, query):
1344
+ system_header = self.system_full_header
1345
+ user_header = self.user_full_header
1346
+ ai_header = self.ai_full_header
1347
+ mem_header = "Initial memory template:" if not memory else "Current findings memory (cumulative):"
1348
+
1349
+ return f"""{system_header}
1350
+ You are a search assistant processing document chunks to find information relevant to a user query. Update the markdown memory with findings from the current chunk.
915
1351
 
916
1352
  ----
917
- # Chunk number: {{chunk_id}}
918
- # Text chunk:
1353
+ File: {file_name}
1354
+ Chunk in File: {file_chunk_id}
1355
+ Global Chunk: {global_chunk_id}
1356
+ Text Chunk:
919
1357
  ```markdown
920
- {{chunk}}
1358
+ {chunk}
921
1359
  ```
922
-
923
- Current findings memory:
1360
+ {mem_header}
924
1361
  ```markdown
925
- {{memory}}
1362
+ """+memory or '# Findings\\n## Key Information\\nDetails relevant to the query...\\n## Context\\nSupporting context...'+f"""
926
1363
  ```
927
- {self.user_full_header}
1364
+ {user_header}
928
1365
  Query: '{query}'
929
- Task: {search_prompt}
930
-
931
- Update the memory by adding new relevant information from this chunk. Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
932
- Make sure to extrafct only information relevant to be able to answer the query of the user or at least gives important contextual information that can be completed to answer the user query.
933
- {self.ai_full_header}
934
- """
1366
+ Task: Update the markdown memory by adding new information from this chunk relevant to the query. Retain prior findings unless contradicted. Only include explicitly relevant details. Return the *entire updated* markdown memory inside a markdown code block.
1367
+ {ai_header}
1368
+ ```markdown
1369
+ """ # Start AI response with code block
935
1370
 
936
- # Calculate static prompt tokens
937
- example_prompt = search_prompt_template.replace("{{chunk_id}}", "0")\
938
- .replace("{{memory}}", "")\
939
- .replace("{{chunk}}", "")
940
- static_tokens = len(self.tokenize(example_prompt))
941
-
942
- # Process each text (file or input)
943
- for file_name, file_text in all_texts:
944
- file_tokens = self.tokenize(file_text)
945
- start_token_idx = 0
946
-
947
- while start_token_idx < len(file_tokens):
948
- # Calculate available tokens
949
- current_memory_tokens = len(self.tokenize(memory))
950
- available_tokens = ctx_size - static_tokens - current_memory_tokens
951
- if available_tokens <= 0:
952
- raise ValueError("Memory too large - consider reducing chunk size or increasing context window")
953
-
954
- # Adjust chunk size
955
- actual_chunk_size = (
956
- min(bootstrap_chunk_size, available_tokens)
957
- if bootstrap_chunk_size is not None and bootstrap_steps is not None and chunk_id < bootstrap_steps
958
- else min(chunk_size, available_tokens)
959
- )
960
-
961
- end_token_idx = min(start_token_idx + actual_chunk_size, len(file_tokens))
962
- chunk_tokens = file_tokens[start_token_idx:end_token_idx]
963
- chunk = self.detokenize(chunk_tokens)
964
-
965
- # Generate updated memory
966
- prompt = search_prompt_template.replace("{chunk_id}", str(chunk_id))\
967
- .replace("{memory}", memory)\
968
- .replace("{chunk}", chunk)
969
- if debug:
970
- print(f"----- Chunk {chunk_id} from {file_name} ------")
971
- print(prompt)
972
-
973
- output = self.generate_text(prompt, n_predict=ctx_size // 4, streaming_callback=callback).strip()
974
- code = self.extract_code_blocks(output)
975
- memory = code[0]["content"] if code else output
976
-
977
- if debug:
978
- print("----- Updated Memory ------")
979
- print(memory)
980
- print("---------------------------")
981
-
982
- start_token_idx = end_token_idx
983
- chunk_id += 1
984
-
985
- # Aggregate findings if requested
986
- if aggregation_prompt:
987
- final_prompt = f"""{self.system_full_header}
1371
+ # Estimate static prompt size (approximate)
1372
+ example_prompt = update_memory_prompt_template("f.txt", 0, 0, "<chunk>", "<memory>", query)
1373
+ try:
1374
+ static_tokens = len(self.tokenize(example_prompt)) - len(self.tokenize("<chunk>")) - len(self.tokenize("<memory>"))
1375
+ except RuntimeError as e:
1376
+ ASCIIColors.error(f"Tokenization failed during setup: {e}")
1377
+ return "Error: Could not calculate prompt size."
1378
+
1379
+ # Process each text (from file or input)
1380
+ callback("Starting deep analysis...", MSG_TYPE.MSG_TYPE_STEP_START)
1381
+ for file_path_str, file_text_content in all_texts:
1382
+ file_name = Path(file_path_str).name
1383
+ callback(f"Analyzing: {file_name}", MSG_TYPE.MSG_TYPE_STEP)
1384
+ try:
1385
+ file_tokens = self.tokenize(file_text_content)
1386
+ except RuntimeError as e:
1387
+ ASCIIColors.error(f"Tokenization failed for {file_name}: {e}")
1388
+ continue # Skip this file
1389
+
1390
+ start_token_idx = 0
1391
+ file_chunk_id = 0
1392
+
1393
+ while start_token_idx < len(file_tokens):
1394
+ # Calculate available space dynamically
1395
+ available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024)
1396
+ if available_tokens_for_dynamic_content <= 100:
1397
+ ASCIIColors.error(f"Context window too small during analysis of {file_name}.")
1398
+ # Option: try truncating memory drastically or break
1399
+ break # Stop processing this file if context is too full
1400
+
1401
+ max_memory_tokens = available_tokens_for_dynamic_content // 2
1402
+ max_chunk_tokens = available_tokens_for_dynamic_content - max_memory_tokens
1403
+
1404
+ # Truncate memory if needed
1405
+ current_memory_tokens = self.tokenize(memory)
1406
+ if len(current_memory_tokens) > max_memory_tokens:
1407
+ memory = self.detokenize(current_memory_tokens[-max_memory_tokens:])
1408
+ if debug: ASCIIColors.yellow(f"Memory truncated (File: {file_name}, Chunk: {file_chunk_id})")
1409
+
1410
+ # Determine chunk size
1411
+ current_chunk_size = bootstrap_chunk_size if global_chunk_id < bootstrap_steps else chunk_size
1412
+ current_chunk_size = min(current_chunk_size, max_chunk_tokens)
1413
+
1414
+ end_token_idx = min(start_token_idx + current_chunk_size, len(file_tokens))
1415
+ chunk_tokens = file_tokens[start_token_idx:end_token_idx]
1416
+ chunk = self.detokenize(chunk_tokens)
1417
+
1418
+ file_chunk_id += 1
1419
+ global_chunk_id += 1
1420
+ callback(f"Processing chunk {file_chunk_id} (Global {global_chunk_id}) of {file_name}", MSG_TYPE.MSG_TYPE_STEP_PROGRESS, {"progress": end_token_idx/len(file_tokens)*100})
1421
+
1422
+ # Generate updated memory
1423
+ prompt = update_memory_prompt_template(
1424
+ file_name=file_name,
1425
+ file_chunk_id=file_chunk_id,
1426
+ global_chunk_id=global_chunk_id,
1427
+ chunk=chunk,
1428
+ memory=memory,
1429
+ query=query
1430
+ )
1431
+ if debug:
1432
+ ASCIIColors.magenta(f"--- Deep Analysis Prompt (Global Chunk {global_chunk_id}) ---")
1433
+ ASCIIColors.cyan(prompt)
1434
+
1435
+ response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback for streaming output
1436
+
1437
+ if isinstance(response, dict): # Handle error
1438
+ ASCIIColors.error(f"Chunk processing failed (Global {global_chunk_id}): {response.get('error')}")
1439
+ start_token_idx = end_token_idx # Skip to next chunk index
1440
+ continue
1441
+
1442
+ memory_code_blocks = self.extract_code_blocks(response, format="markdown")
1443
+ if memory_code_blocks:
1444
+ memory = memory_code_blocks[0]["content"]
1445
+ else:
1446
+ # Fallback logic (same as sequential_summarize)
1447
+ end_tag = "```markdown"
1448
+ last_occurrence = response.rfind(end_tag)
1449
+ if last_occurrence != -1:
1450
+ start_tag_len = len("```markdown\n")
1451
+ potential_memory = response[last_occurrence + start_tag_len:].strip()
1452
+ if potential_memory.endswith("```"):
1453
+ potential_memory = potential_memory[:-3].strip()
1454
+ memory = potential_memory if potential_memory else response.strip().rstrip('```')
1455
+ else:
1456
+ memory = response.strip().rstrip('```')
1457
+ ASCIIColors.warning(f"Could not extract memory block for chunk {global_chunk_id}. Using raw response.")
1458
+
1459
+
1460
+ if debug:
1461
+ ASCIIColors.magenta(f"--- Updated Memory (After Global Chunk {global_chunk_id}) ---")
1462
+ ASCIIColors.green(memory)
1463
+ ASCIIColors.magenta("-----------------------------------")
1464
+
1465
+ # Move to next chunk start index with overlap
1466
+ start_token_idx = max(start_token_idx, end_token_idx - overlap) if overlap > 0 and end_token_idx < len(file_tokens) else end_token_idx
1467
+
1468
+ callback(f"Finished analyzing: {file_name}", MSG_TYPE.MSG_TYPE_STEP_END)
1469
+
1470
+
1471
+ # --- Final Aggregation ---
1472
+ callback("Aggregating final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
1473
+ final_prompt = f"""{self.system_full_header}
988
1474
  You are a search results aggregator.
989
-
990
1475
  {self.user_full_header}
991
1476
  {aggregation_prompt}
992
-
993
- Collected findings:
1477
+ Collected findings (across all sources):
994
1478
  ```markdown
995
1479
  {memory}
996
1480
  ```
997
-
998
- Provide the final output in {output_format} format.
1481
+ Provide the final aggregated answer in {output_format} format, directly addressing the original query: '{query}'. The final answer must be put inside a {output_format} markdown tag.
999
1482
  {self.ai_full_header}
1000
- """
1001
- final_output = self.generate_text(final_prompt, streaming_callback=callback)
1002
- code = self.extract_code_blocks(final_output)
1003
- return code[0]["content"] if code else final_output
1004
- return memory
1005
- def error(self, content, duration:int=4, client_id=None, verbose:bool=True):
1006
- ASCIIColors.error(content)
1007
-
1008
-
1009
-
1010
- if __name__=="__main__":
1011
- lc = LollmsClient("ollama", model_name="mistral-nemo:latest")
1012
- #lc = LollmsClient("http://localhost:11434", model_name="mistral-nemo:latest", default_generation_mode=ELF_GENERATION_FORMAT.OLLAMA)
1013
- #lc = LollmsClient(model_name="gpt-3.5-turbo-0125", default_generation_mode=ELF_GENERATION_FORMAT.OPENAI)
1014
- print(lc.listModels())
1015
- code = lc.generate_code("Build a simple json that containes name and age. put the output inside a json markdown tag")
1016
- print(code)
1017
-
1018
- code ="""<thinking>
1019
- Hello world thinking!
1020
- How you doing?
1021
-
1022
- </thinking>
1023
- This is no thinking
1024
-
1025
- <think>
1026
- Hello world think!
1027
- How you doing?
1028
-
1029
- </think>
1483
+ ```{output_format}
1484
+ """ # Start AI response
1030
1485
 
1031
- """
1032
- print(lc.extract_thinking_blocks(code))
1033
- print(lc.remove_thinking_blocks(code))
1486
+ # Truncate memory if needed for final prompt (similar logic to sequential_summarize)
1487
+ final_example_prompt = final_prompt.replace("{memory}", "<final_memory>")
1488
+ try:
1489
+ final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
1490
+ available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024)
1491
+ except RuntimeError as e:
1492
+ ASCIIColors.error(f"Tokenization failed during final setup: {e}")
1493
+ return "Error: Could not calculate final prompt size."
1494
+
1495
+ memory_tokens = self.tokenize(memory)
1496
+ if len(memory_tokens) > available_final_tokens:
1497
+ memory = self.detokenize(memory_tokens[-available_final_tokens:])
1498
+ if debug: ASCIIColors.yellow(f"Final memory truncated for aggregation.")
1499
+
1500
+ final_prompt = final_prompt.format(memory=memory) # Format with potentially truncated memory
1501
+
1502
+ if debug:
1503
+ ASCIIColors.magenta("--- Final Aggregation Prompt ---")
1504
+ ASCIIColors.cyan(final_prompt)
1505
+
1506
+ final_output_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback
1507
+
1508
+ if isinstance(final_output_raw, dict):
1509
+ ASCIIColors.error(f"Final aggregation failed: {final_output_raw.get('error')}")
1510
+ callback("Aggregation failed.", MSG_TYPE.MSG_TYPE_STEP_END, {'status':False})
1511
+ return "Error: Final aggregation failed."
1512
+
1513
+ final_code_blocks = self.extract_code_blocks(final_output_raw, format=output_format)
1514
+ if final_code_blocks:
1515
+ final_output = final_code_blocks[0]["content"]
1516
+ else:
1517
+ # Fallback logic
1518
+ end_tag = f"```{output_format}"
1519
+ last_occurrence = final_output_raw.rfind(end_tag)
1520
+ if last_occurrence != -1:
1521
+ start_tag_len = len(f"```{output_format}\n")
1522
+ potential_output = final_output_raw[last_occurrence + start_tag_len:].strip()
1523
+ if potential_output.endswith("```"):
1524
+ potential_output = potential_output[:-3].strip()
1525
+ final_output = potential_output if potential_output else final_output_raw.strip().rstrip('```')
1526
+ else:
1527
+ final_output = final_output_raw.strip().rstrip('```')
1528
+ ASCIIColors.warning("Could not extract final output block. Using raw response.")
1529
+
1530
+
1531
+ if debug:
1532
+ ASCIIColors.magenta("--- Final Aggregated Output ---")
1533
+ ASCIIColors.green(final_output)
1534
+ ASCIIColors.magenta("-----------------------------")
1535
+
1536
+ callback("Deep analysis complete.", MSG_TYPE.MSG_TYPE_STEP_END)
1537
+ return final_output
1538
+
1539
+ def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):
1540
+ """
1541
+ Chunks text based on token count.
1542
+
1543
+ Args:
1544
+ text (str): The text to chunk.
1545
+ tokenizer (callable): Function to tokenize text.
1546
+ detokenizer (callable): Function to detokenize tokens.
1547
+ chunk_size (int): The desired number of tokens per chunk.
1548
+ overlap (int): The number of tokens to overlap between chunks.
1549
+ use_separators (bool): If True, tries to chunk at natural separators (paragraphs, sentences).
1550
+
1551
+ Returns:
1552
+ List[str]: A list of text chunks.
1553
+ """
1554
+ tokens = tokenizer(text)
1555
+ chunks = []
1556
+ start_idx = 0
1557
+
1558
+ if not use_separators:
1559
+ while start_idx < len(tokens):
1560
+ end_idx = min(start_idx + chunk_size, len(tokens))
1561
+ chunks.append(detokenizer(tokens[start_idx:end_idx]))
1562
+ start_idx += chunk_size - overlap
1563
+ if start_idx >= len(tokens): # Ensure last chunk is added correctly
1564
+ break
1565
+ start_idx = max(0, start_idx) # Prevent negative index
1566
+ else:
1567
+ # Find potential separator positions (more robust implementation needed)
1568
+ # This is a basic example using paragraphs first, then sentences.
1569
+ import re
1570
+ separators = ["\n\n", "\n", ". ", "? ", "! "] # Order matters
1571
+
1572
+ current_pos = 0
1573
+ while current_pos < len(text):
1574
+ # Determine target end position based on tokens
1575
+ target_end_token = min(start_idx + chunk_size, len(tokens))
1576
+ target_end_char_approx = len(detokenizer(tokens[:target_end_token])) # Approximate char position
1577
+
1578
+ best_sep_pos = -1
1579
+ # Try finding a good separator near the target end
1580
+ for sep in separators:
1581
+ # Search backwards from the approximate character position
1582
+ search_start = max(current_pos, target_end_char_approx - chunk_size // 2) # Search in a reasonable window
1583
+ sep_pos = text.rfind(sep, search_start, target_end_char_approx + len(sep))
1584
+ if sep_pos > current_pos: # Found a separator after the current start
1585
+ best_sep_pos = max(best_sep_pos, sep_pos + len(sep)) # Take the latest separator found
1586
+
1587
+ # If no good separator found, just cut at token limit
1588
+ if best_sep_pos == -1 or best_sep_pos <= current_pos:
1589
+ end_idx = target_end_token
1590
+ end_char = len(detokenizer(tokens[:end_idx])) if end_idx < len(tokens) else len(text)
1591
+ else:
1592
+ end_char = best_sep_pos
1593
+ end_idx = len(tokenizer(text[:end_char])) # Re-tokenize to find token index
1594
+
1595
+
1596
+ chunk_text_str = text[current_pos:end_char]
1597
+ chunks.append(chunk_text_str)
1598
+
1599
+ # Move to next chunk start, considering overlap in characters
1600
+ overlap_char_approx = len(detokenizer(tokens[:overlap])) # Approx overlap chars
1601
+ next_start_char = max(current_pos, end_char - overlap_char_approx)
1602
+
1603
+ # Try to align next start with a separator too for cleaner breaks
1604
+ best_next_start_sep = next_start_char
1605
+ for sep in separators:
1606
+ sep_pos = text.find(sep, next_start_char)
1607
+ if sep_pos != -1:
1608
+ best_next_start_sep = min(best_next_start_sep, sep_pos+len(sep)) if best_next_start_sep!=next_start_char else sep_pos+len(sep) # Find earliest separator after overlap point
1609
+
1610
+ current_pos = best_next_start_sep if best_next_start_sep > next_start_char else next_start_char
1611
+ start_idx = len(tokenizer(text[:current_pos])) # Update token index for next iteration
1612
+
1613
+
1614
+ if current_pos >= len(text):
1615
+ break
1616
+
1617
+ return chunks