lollms-client 0.11.0__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -1,30 +1,56 @@
1
+ # lollms_client/lollms_core.py
1
2
  import requests
2
3
  from ascii_colors import ASCIIColors, trace_exception
3
4
  from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
4
- from lollms_client.lollms_utilities import encode_image
5
- from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
5
+ from lollms_client.lollms_utilities import encode_image # Keep utilities needed by core
6
+ from lollms_client.lollms_llm_binding import LollmsLLMBinding, LollmsLLMBindingManager
7
+ # Import new Abstract Base Classes and Managers
8
+ from lollms_client.lollms_tts_binding import LollmsTTSBinding, LollmsTTSBindingManager
9
+ from lollms_client.lollms_tti_binding import LollmsTTIBinding, LollmsTTIBindingManager
10
+ from lollms_client.lollms_stt_binding import LollmsSTTBinding, LollmsSTTBindingManager
11
+ from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingManager
12
+ from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
13
+
6
14
  import json
7
15
  from enum import Enum
8
16
  import base64
9
17
  import requests
10
- import pipmaster as pm
11
18
  from typing import List, Optional, Callable, Union, Dict
12
19
  import numpy as np
13
- import pipmaster as pm
14
20
  from pathlib import Path
15
21
  import os
16
-
22
+
17
23
  class LollmsClient():
18
- """Core class for interacting with LOLLMS bindings"""
19
- def __init__(self,
24
+ """
25
+ Core client class for interacting with LOLLMS services, including LLM, TTS, TTI, STT, TTV, and TTM.
26
+ Provides a unified interface to manage and use different bindings for various modalities.
27
+ """
28
+ def __init__(self,
29
+ # LLM Binding Parameters
20
30
  binding_name: str = "lollms",
21
- host_address: Optional[str] = None,
31
+ host_address: Optional[str] = None, # Shared host address default for all bindings if not specified
22
32
  model_name: str = "",
23
- service_key: Optional[str] = None,
24
- verify_ssl_certificate: bool = True,
25
- personality: Optional[int] = None,
26
33
  llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
27
- binding_config: Optional[Dict[str, any]] = None,
34
+ llm_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
35
+ personality: Optional[int] = None, # Specific to LLM lollms binding
36
+
37
+ # Optional Modality Binding Names
38
+ tts_binding_name: Optional[str] = None,
39
+ tti_binding_name: Optional[str] = None,
40
+ stt_binding_name: Optional[str] = None,
41
+ ttv_binding_name: Optional[str] = None,
42
+ ttm_binding_name: Optional[str] = None,
43
+
44
+ # Modality Binding Directories
45
+ tts_bindings_dir: Path = Path(__file__).parent / "tts_bindings",
46
+ tti_bindings_dir: Path = Path(__file__).parent / "tti_bindings",
47
+ stt_bindings_dir: Path = Path(__file__).parent / "stt_bindings",
48
+ ttv_bindings_dir: Path = Path(__file__).parent / "ttv_bindings",
49
+ ttm_bindings_dir: Path = Path(__file__).parent / "ttm_bindings",
50
+
51
+ # General Parameters (mostly defaults for LLM generation)
52
+ service_key: Optional[str] = None, # Shared service key/client_id
53
+ verify_ssl_certificate: bool = True,
28
54
  ctx_size: Optional[int] = 8192,
29
55
  n_predict: Optional[int] = 4096,
30
56
  stream: bool = False,
@@ -39,37 +65,135 @@ class LollmsClient():
39
65
  user_name ="user",
40
66
  ai_name = "assistant"):
41
67
  """
42
- Initialize the LollmsCore with a binding and generation parameters.
68
+ Initialize the LollmsClient with LLM and optional modality bindings.
43
69
 
44
70
  Args:
45
- binding_name (str): Name of the binding to use (e.g., "lollms", "ollama").
46
- host_address (Optional[str]): Host address for the service. Overrides binding default if provided.
47
- model_name (str): Name of the model to use. Defaults to empty string.
48
- service_key (Optional[str]): Authentication key for the service.
49
- verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
50
- personality (Optional[int]): Personality ID (used only by LOLLMS binding).
51
- llm_bindings_dir (Path): Directory containing binding implementations.
52
- Defaults to the "bindings" subdirectory relative to this file's location.
53
- binding_config (Optional[Dict[str, any]]): Additional configuration for the binding.
54
- n_predict (Optional[int]): Maximum number of tokens to generate. Default for generate_text.
55
- stream (bool): Whether to stream the output. Defaults to False for generate_text.
56
- temperature (float): Sampling temperature. Defaults to 0.1 for generate_text.
57
- top_k (int): Top-k sampling parameter. Defaults to 50 for generate_text.
58
- top_p (float): Top-p sampling parameter. Defaults to 0.95 for generate_text.
59
- repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8 for generate_text.
60
- repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
61
- seed (Optional[int]): Random seed for generation. Default for generate_text.
62
- n_threads (int): Number of threads to use. Defaults to 8 for generate_text.
63
- streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
64
- Default for generate_text. Takes a string chunk and an MSG_TYPE enum value.
71
+ binding_name (str): Name of the primary LLM binding (e.g., "lollms", "ollama").
72
+ host_address (Optional[str]): Default host address for all services. Overridden by binding defaults if None.
73
+ model_name (str): Default model name for the LLM binding.
74
+ llm_bindings_dir (Path): Directory for LLM binding implementations.
75
+ llm_binding_config (Optional[Dict]): Additional config for the LLM binding.
76
+ personality (Optional[int]): Personality ID (used by LLM 'lollms' binding).
77
+ tts_binding_name (Optional[str]): Name of the TTS binding to use (e.g., "lollms").
78
+ tti_binding_name (Optional[str]): Name of the TTI binding to use (e.g., "lollms").
79
+ stt_binding_name (Optional[str]): Name of the STT binding to use (e.g., "lollms").
80
+ ttv_binding_name (Optional[str]): Name of the TTV binding to use (e.g., "lollms").
81
+ ttm_binding_name (Optional[str]): Name of the TTM binding to use (e.g., "lollms").
82
+ tts_bindings_dir (Path): Directory for TTS bindings.
83
+ tti_bindings_dir (Path): Directory for TTI bindings.
84
+ stt_bindings_dir (Path): Directory for STT bindings.
85
+ ttv_bindings_dir (Path): Directory for TTV bindings.
86
+ ttm_bindings_dir (Path): Directory for TTM bindings.
87
+ service_key (Optional[str]): Shared authentication key or client_id.
88
+ verify_ssl_certificate (bool): Whether to verify SSL certificates.
89
+ ctx_size (Optional[int]): Default context size for LLM.
90
+ n_predict (Optional[int]): Default max tokens for LLM.
91
+ stream (bool): Default streaming mode for LLM.
92
+ temperature (float): Default temperature for LLM.
93
+ top_k (int): Default top_k for LLM.
94
+ top_p (float): Default top_p for LLM.
95
+ repeat_penalty (float): Default repeat penalty for LLM.
96
+ repeat_last_n (int): Default repeat last n for LLM.
97
+ seed (Optional[int]): Default seed for LLM.
98
+ n_threads (int): Default threads for LLM.
99
+ streaming_callback (Optional[Callable]): Default streaming callback for LLM.
100
+ user_name (str): Default user name for prompts.
101
+ ai_name (str): Default AI name for prompts.
65
102
 
66
103
  Raises:
67
- ValueError: If the specified binding cannot be created.
104
+ ValueError: If the primary LLM binding cannot be created.
68
105
  """
106
+ self.host_address = host_address # Store initial preference
107
+ self.service_key = service_key
108
+ self.verify_ssl_certificate = verify_ssl_certificate
109
+
110
+ # --- LLM Binding Setup ---
69
111
  self.binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
70
- self.binding_config = binding_config or {}
71
-
72
- # Store generation parameters as instance variables
112
+ self.binding = self.binding_manager.create_binding(
113
+ binding_name=binding_name,
114
+ host_address=host_address, # Pass initial host preference
115
+ model_name=model_name,
116
+ service_key=service_key,
117
+ verify_ssl_certificate=verify_ssl_certificate,
118
+ personality=personality,
119
+ # Pass LLM specific config if needed
120
+ **(llm_binding_config or {})
121
+ )
122
+
123
+ if self.binding is None:
124
+ available = self.binding_manager.get_available_bindings()
125
+ raise ValueError(f"Failed to create LLM binding: {binding_name}. Available: {available}")
126
+
127
+ # Determine the effective host address (use LLM binding's if initial was None)
128
+ effective_host_address = self.host_address
129
+ if effective_host_address is None and self.binding:
130
+ effective_host_address = self.binding.host_address
131
+
132
+ # --- Modality Binding Setup ---
133
+ self.tts_binding_manager = LollmsTTSBindingManager(tts_bindings_dir)
134
+ self.tti_binding_manager = LollmsTTIBindingManager(tti_bindings_dir)
135
+ self.stt_binding_manager = LollmsSTTBindingManager(stt_bindings_dir)
136
+ self.ttv_binding_manager = LollmsTTVBindingManager(ttv_bindings_dir)
137
+ self.ttm_binding_manager = LollmsTTMBindingManager(ttm_bindings_dir)
138
+
139
+ self.tts: Optional[LollmsTTSBinding] = None
140
+ self.tti: Optional[LollmsTTIBinding] = None
141
+ self.stt: Optional[LollmsSTTBinding] = None
142
+ self.ttv: Optional[LollmsTTVBinding] = None
143
+ self.ttm: Optional[LollmsTTMBinding] = None
144
+
145
+ if tts_binding_name:
146
+ self.tts = self.tts_binding_manager.create_binding(
147
+ binding_name=tts_binding_name,
148
+ host_address=effective_host_address,
149
+ service_key=self.service_key,
150
+ verify_ssl_certificate=self.verify_ssl_certificate
151
+ )
152
+ if self.tts is None:
153
+ ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
154
+
155
+ if tti_binding_name:
156
+ self.tti = self.tti_binding_manager.create_binding(
157
+ binding_name=tti_binding_name,
158
+ host_address=effective_host_address,
159
+ service_key=self.service_key, # Passed as service_key, used as client_id by lollms TTI binding
160
+ verify_ssl_certificate=self.verify_ssl_certificate
161
+ )
162
+ if self.tti is None:
163
+ ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
164
+
165
+ if stt_binding_name:
166
+ self.stt = self.stt_binding_manager.create_binding(
167
+ binding_name=stt_binding_name,
168
+ host_address=effective_host_address,
169
+ service_key=self.service_key,
170
+ verify_ssl_certificate=self.verify_ssl_certificate
171
+ )
172
+ if self.stt is None:
173
+ ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
174
+
175
+ if ttv_binding_name:
176
+ self.ttv = self.ttv_binding_manager.create_binding(
177
+ binding_name=ttv_binding_name,
178
+ host_address=effective_host_address,
179
+ service_key=self.service_key,
180
+ verify_ssl_certificate=self.verify_ssl_certificate
181
+ )
182
+ if self.ttv is None:
183
+ ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
184
+
185
+ if ttm_binding_name:
186
+ self.ttm = self.ttm_binding_manager.create_binding(
187
+ binding_name=ttm_binding_name,
188
+ host_address=effective_host_address,
189
+ service_key=self.service_key,
190
+ verify_ssl_certificate=self.verify_ssl_certificate
191
+ )
192
+ if self.ttm is None:
193
+ ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
194
+
195
+
196
+ # --- Store Default Generation Parameters ---
73
197
  self.default_ctx_size = ctx_size
74
198
  self.default_n_predict = n_predict
75
199
  self.default_stream = stream
@@ -81,29 +205,10 @@ class LollmsClient():
81
205
  self.default_seed = seed
82
206
  self.default_n_threads = n_threads
83
207
  self.default_streaming_callback = streaming_callback
84
-
85
- # Create the binding instance
86
- self.binding = self.binding_manager.create_binding(
87
- binding_name=binding_name,
88
- host_address=host_address,
89
- model_name=model_name,
90
- service_key=service_key,
91
- verify_ssl_certificate=verify_ssl_certificate,
92
- personality=personality
93
- )
94
-
95
- if self.binding is None:
96
- raise ValueError(f"Failed to create binding: {binding_name}. Available bindings: {self.binding_manager.get_available_bindings()}")
97
-
98
- # Apply additional configuration if provided
99
- if binding_config:
100
- for key, value in binding_config.items():
101
- setattr(self.binding, key, value)
208
+
209
+ # --- Prompt Formatting Attributes ---
102
210
  self.user_name = user_name
103
211
  self.ai_name = ai_name
104
- self.service_key = service_key
105
-
106
- self.verify_ssl_certificate = verify_ssl_certificate
107
212
  self.start_header_id_template ="!@>"
108
213
  self.end_header_id_template =": "
109
214
  self.system_message_template ="system"
@@ -116,24 +221,25 @@ class LollmsClient():
116
221
  self.end_ai_message_id_template =""
117
222
 
118
223
 
224
+ # --- Prompt Formatting Properties ---
119
225
  @property
120
226
  def system_full_header(self) -> str:
121
227
  """Get the start_header_id_template."""
122
228
  return f"{self.start_header_id_template}{self.system_message_template}{self.end_header_id_template}"
123
-
229
+
124
230
  def system_custom_header(self, ai_name) -> str:
125
231
  """Get the start_header_id_template."""
126
232
  return f"{self.start_header_id_template}{ai_name}{self.end_header_id_template}"
127
-
233
+
128
234
  @property
129
235
  def user_full_header(self) -> str:
130
236
  """Get the start_header_id_template."""
131
237
  return f"{self.start_user_header_id_template}{self.user_name}{self.end_user_header_id_template}"
132
-
238
+
133
239
  def user_custom_header(self, user_name="user") -> str:
134
240
  """Get the start_header_id_template."""
135
241
  return f"{self.start_user_header_id_template}{user_name}{self.end_user_header_id_template}"
136
-
242
+
137
243
  @property
138
244
  def ai_full_header(self) -> str:
139
245
  """Get the start_header_id_template."""
@@ -144,10 +250,13 @@ class LollmsClient():
144
250
  return f"{self.start_ai_header_id_template}{ai_name}{self.end_ai_header_id_template}"
145
251
 
146
252
  def sink(self, s=None,i=None,d=None):
253
+ """Placeholder sink method."""
147
254
  pass
255
+
256
+ # --- Core LLM Binding Methods ---
148
257
  def tokenize(self, text: str) -> list:
149
258
  """
150
- Tokenize text using the active binding.
259
+ Tokenize text using the active LLM binding.
151
260
 
152
261
  Args:
153
262
  text (str): The text to tokenize.
@@ -155,11 +264,13 @@ class LollmsClient():
155
264
  Returns:
156
265
  list: List of tokens.
157
266
  """
158
- return self.binding.tokenize(text)
159
-
267
+ if self.binding:
268
+ return self.binding.tokenize(text)
269
+ raise RuntimeError("LLM binding not initialized.")
270
+
160
271
  def detokenize(self, tokens: list) -> str:
161
272
  """
162
- Detokenize tokens using the active binding.
273
+ Detokenize tokens using the active LLM binding.
163
274
 
164
275
  Args:
165
276
  tokens (list): List of tokens to detokenize.
@@ -167,20 +278,24 @@ class LollmsClient():
167
278
  Returns:
168
279
  str: Detokenized text.
169
280
  """
170
- return self.binding.detokenize(tokens)
171
-
281
+ if self.binding:
282
+ return self.binding.detokenize(tokens)
283
+ raise RuntimeError("LLM binding not initialized.")
284
+
172
285
  def get_model_details(self) -> dict:
173
286
  """
174
- Get model information from the active binding.
287
+ Get model information from the active LLM binding.
175
288
 
176
289
  Returns:
177
290
  dict: Model information dictionary.
178
291
  """
179
- return self.binding.get_model_info()
180
-
292
+ if self.binding:
293
+ return self.binding.get_model_info()
294
+ raise RuntimeError("LLM binding not initialized.")
295
+
181
296
  def switch_model(self, model_name: str) -> bool:
182
297
  """
183
- Load a new model in the active binding.
298
+ Load a new model in the active LLM binding.
184
299
 
185
300
  Args:
186
301
  model_name (str): Name of the model to load.
@@ -188,18 +303,20 @@ class LollmsClient():
188
303
  Returns:
189
304
  bool: True if model loaded successfully, False otherwise.
190
305
  """
191
- return self.binding.load_model(model_name)
192
-
193
- def get_available_bindings(self) -> List[str]:
306
+ if self.binding:
307
+ return self.binding.load_model(model_name)
308
+ raise RuntimeError("LLM binding not initialized.")
309
+
310
+ def get_available_llm_bindings(self) -> List[str]: # Renamed for clarity
194
311
  """
195
- Get list of available bindings.
312
+ Get list of available LLM binding names.
196
313
 
197
314
  Returns:
198
- List[str]: List of binding names that can be used.
315
+ List[str]: List of binding names that can be used for LLMs.
199
316
  """
200
317
  return self.binding_manager.get_available_bindings()
201
-
202
- def generate_text(self,
318
+
319
+ def generate_text(self,
203
320
  prompt: str,
204
321
  images: Optional[List[str]] = None,
205
322
  n_predict: Optional[int] = None,
@@ -212,9 +329,9 @@ class LollmsClient():
212
329
  seed: Optional[int] = None,
213
330
  n_threads: Optional[int] = None,
214
331
  ctx_size: int | None = None,
215
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> str:
332
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> Union[str, dict]:
216
333
  """
217
- Generate text using the active binding, using instance defaults if parameters are not provided.
334
+ Generate text using the active LLM binding, using instance defaults if parameters are not provided.
218
335
 
219
336
  Args:
220
337
  prompt (str): The input prompt for text generation.
@@ -228,59 +345,91 @@ class LollmsClient():
228
345
  repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
229
346
  seed (Optional[int]): Random seed for generation. Uses instance default if None.
230
347
  n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
348
+ ctx_size (int | None): Context size override for this generation.
231
349
  streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
232
- Uses instance default if None.
233
- - First parameter (str): The chunk of text received from the stream.
234
- - Second parameter (MSG_TYPE): The message type enum (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
235
350
 
236
351
  Returns:
237
352
  Union[str, dict]: Generated text or error dictionary if failed.
238
353
  """
239
- return self.binding.generate_text(
240
- prompt=prompt,
241
- images=images,
242
- n_predict=n_predict if n_predict is not None else self.default_n_predict,
243
- stream=stream if stream is not None else self.default_stream,
244
- temperature=temperature if temperature is not None else self.default_temperature,
245
- top_k=top_k if top_k is not None else self.default_top_k,
246
- top_p=top_p if top_p is not None else self.default_top_p,
247
- repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
248
- repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
249
- seed=seed if seed is not None else self.default_seed,
250
- n_threads=n_threads if n_threads is not None else self.default_n_threads,
251
- ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
252
- streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
253
- )
354
+ if self.binding:
355
+ return self.binding.generate_text(
356
+ prompt=prompt,
357
+ images=images,
358
+ n_predict=n_predict if n_predict is not None else self.default_n_predict,
359
+ stream=stream if stream is not None else self.default_stream,
360
+ temperature=temperature if temperature is not None else self.default_temperature,
361
+ top_k=top_k if top_k is not None else self.default_top_k,
362
+ top_p=top_p if top_p is not None else self.default_top_p,
363
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
364
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
365
+ seed=seed if seed is not None else self.default_seed,
366
+ n_threads=n_threads if n_threads is not None else self.default_n_threads,
367
+ ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
368
+ streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
369
+ )
370
+ raise RuntimeError("LLM binding not initialized.")
371
+
372
+
373
+ def embed(self, text, **kwargs):
374
+ """
375
+ Generate embeddings for the input text using the active LLM binding.
254
376
 
255
-
256
- def embed(self, text):
257
- self.binding.embed(text)
377
+ Args:
378
+ text (str or List[str]): Input text to embed.
379
+ **kwargs: Additional arguments specific to the binding's embed method.
380
+
381
+ Returns:
382
+ list: List of embeddings.
383
+ """
384
+ if self.binding:
385
+ return self.binding.embed(text, **kwargs)
386
+ raise RuntimeError("LLM binding not initialized.")
258
387
 
259
388
 
260
389
  def listModels(self):
261
- return self.binding.listModels()
390
+ """Lists models available to the current LLM binding."""
391
+ if self.binding:
392
+ return self.binding.listModels()
393
+ raise RuntimeError("LLM binding not initialized.")
262
394
 
395
+ # --- Convenience Methods for Lollms LLM Binding Features ---
396
+ def listMountedPersonalities(self) -> Union[List[Dict], Dict]:
397
+ """
398
+ Lists mounted personalities *if* the active LLM binding is 'lollms'.
263
399
 
400
+ Returns:
401
+ Union[List[Dict], Dict]: List of personality dicts or error dict.
402
+ """
403
+ if self.binding and hasattr(self.binding, 'lollms_listMountedPersonalities'):
404
+ return self.binding.lollms_listMountedPersonalities()
405
+ else:
406
+ ASCIIColors.warning("listMountedPersonalities is only available for the 'lollms' LLM binding.")
407
+ return {"status": False, "error": "Functionality not available for the current binding"}
264
408
 
409
+ # --- Code Generation / Extraction Helpers (These might be moved to TasksLibrary later) ---
265
410
  def generate_codes(
266
- self,
267
- prompt,
268
- images=[],
411
+ self,
412
+ prompt,
413
+ images=[],
269
414
  template=None,
270
415
  language="json",
271
416
  code_tag_format="markdown", # or "html"
272
- max_size = None,
273
- temperature = None,
274
- top_k = None,
275
- top_p=None,
276
- repeat_penalty=None,
277
- repeat_last_n=None,
278
- callback=None,
279
- debug=False
417
+ max_size = None,
418
+ temperature = None,
419
+ top_k = None,
420
+ top_p=None,
421
+ repeat_penalty=None,
422
+ repeat_last_n=None,
423
+ callback=None,
424
+ debug=False
280
425
  ):
426
+ """
427
+ Generates multiple code blocks based on a prompt.
428
+ Uses the underlying LLM binding via `generate_text`.
429
+ """
281
430
  response_full = ""
282
- full_prompt = f"""{self.system_full_header}Act as a code generation assistant that generates code from user prompt.
283
- {self.user_full_header}
431
+ full_prompt = f"""{self.system_full_header}Act as a code generation assistant that generates code from user prompt.
432
+ {self.user_full_header}
284
433
  {prompt}
285
434
  """
286
435
  if template:
@@ -306,24 +455,35 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
306
455
  full_prompt += f"""Do not split the code in multiple tags.
307
456
  {self.ai_full_header}"""
308
457
 
309
- if len(self.image_files)>0:
310
- response = self.generate_text_with_images(full_prompt, self.image_files, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
311
- elif len(images)>0:
312
- response = self.generate_text_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
313
- else:
314
- response = self.generate_text(full_prompt, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
458
+ # Use generate_text which handles images internally
459
+ response = self.generate_text(
460
+ full_prompt,
461
+ images=images,
462
+ n_predict=max_size,
463
+ temperature=temperature,
464
+ top_k=top_k,
465
+ top_p=top_p,
466
+ repeat_penalty=repeat_penalty,
467
+ repeat_last_n=repeat_last_n,
468
+ streaming_callback=callback # Assuming generate_text handles streaming callback
469
+ )
470
+
471
+ if isinstance(response, dict) and not response.get("status", True): # Check for error dict
472
+ ASCIIColors.error(f"Code generation failed: {response.get('error')}")
473
+ return []
474
+
315
475
  response_full += response
316
- codes = self.extract_code_blocks(response)
476
+ codes = self.extract_code_blocks(response, format=code_tag_format)
317
477
  return codes
318
-
478
+
319
479
  def generate_code(
320
- self,
321
- prompt,
480
+ self,
481
+ prompt,
322
482
  images=[],
323
483
  template=None,
324
484
  language="json",
325
- code_tag_format="markdown", # or "html"
326
- max_size = None,
485
+ code_tag_format="markdown", # or "html"
486
+ max_size = None,
327
487
  temperature = None,
328
488
  top_k = None,
329
489
  top_p=None,
@@ -331,9 +491,14 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
331
491
  repeat_last_n=None,
332
492
  callback=None,
333
493
  debug=False ):
334
-
335
- full_prompt = f"""{self.system_full_header}Act as a code generation assistant that generates code from user prompt.
336
- {self.user_full_header}
494
+ """
495
+ Generates a single code block based on a prompt.
496
+ Uses the underlying LLM binding via `generate_text`.
497
+ Handles potential continuation if the code block is incomplete.
498
+ """
499
+
500
+ full_prompt = f"""{self.system_full_header}Act as a code generation assistant that generates code from user prompt.
501
+ {self.user_full_header}
337
502
  {prompt}
338
503
  """
339
504
  if template:
@@ -359,44 +524,79 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
359
524
  full_prompt += f"""You must return a single code tag.
360
525
  Do not split the code in multiple tags.
361
526
  {self.ai_full_header}"""
362
- response = self.generate_text(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
363
- codes = self.extract_code_blocks(response)
364
- if len(codes)>0:
365
- if not codes[-1]["is_complete"]:
366
- code = "\n".join(codes[-1]["content"].split("\n")[:-1])
367
- while not codes[-1]["is_complete"]:
368
- response = self.generate_text(prompt+code+self.user_full_header+"continue the code. Start from last line and continue the code. Put the code inside a markdown code tag."+self.separator_template+self.ai_full_header, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
369
- codes = self.extract_code_blocks(response)
370
- if len(codes)==0:
371
- break
372
- else:
373
- if not codes[-1]["is_complete"]:
374
- code +="\n"+ "\n".join(codes[-1]["content"].split("\n")[:-1])
375
- else:
376
- code +="\n"+ "\n".join(codes[-1]["content"].split("\n"))
377
- else:
378
- code = codes[-1]["content"]
379
527
 
380
- return code
381
- else:
382
- return None
528
+ response = self.generate_text(
529
+ full_prompt,
530
+ images=images,
531
+ n_predict=max_size,
532
+ temperature=temperature,
533
+ top_k=top_k,
534
+ top_p=top_p,
535
+ repeat_penalty=repeat_penalty,
536
+ repeat_last_n=repeat_last_n,
537
+ streaming_callback=callback
538
+ )
539
+
540
+ if isinstance(response, dict) and not response.get("status", True):
541
+ ASCIIColors.error(f"Code generation failed: {response.get('error')}")
542
+ return None
543
+
544
+ codes = self.extract_code_blocks(response, format=code_tag_format)
545
+ code_content = None
546
+
547
+ if codes:
548
+ last_code = codes[-1]
549
+ code_content = last_code["content"]
550
+
551
+ # Handle incomplete code block continuation (simple approach)
552
+ max_retries = 3 # Limit continuation attempts
553
+ retries = 0
554
+ while not last_code["is_complete"] and retries < max_retries:
555
+ retries += 1
556
+ ASCIIColors.info(f"Code block seems incomplete. Attempting continuation ({retries}/{max_retries})...")
557
+ continuation_prompt = f"{full_prompt}{code_content}\n\n{self.user_full_header}The previous code block was incomplete. Continue the code exactly from where it left off. Do not repeat the previous part. Only provide the continuation inside a single {code_tag_format} code tag.\n{self.ai_full_header}"
558
+
559
+ continuation_response = self.generate_text(
560
+ continuation_prompt,
561
+ images=images, # Resend images if needed for context
562
+ n_predict=max_size, # Allow space for continuation
563
+ temperature=temperature, # Use same parameters
564
+ top_k=top_k,
565
+ top_p=top_p,
566
+ repeat_penalty=repeat_penalty,
567
+ repeat_last_n=repeat_last_n,
568
+ streaming_callback=callback
569
+ )
570
+
571
+ if isinstance(continuation_response, dict) and not continuation_response.get("status", True):
572
+ ASCIIColors.warning(f"Continuation attempt failed: {continuation_response.get('error')}")
573
+ break # Stop trying if generation fails
574
+
575
+ continuation_codes = self.extract_code_blocks(continuation_response, format=code_tag_format)
576
+
577
+ if continuation_codes:
578
+ new_code_part = continuation_codes[0]["content"]
579
+ code_content += "\n" + new_code_part # Append continuation
580
+ last_code["is_complete"] = continuation_codes[0]["is_complete"] # Update completeness
581
+ if last_code["is_complete"]:
582
+ ASCIIColors.info("Code block continuation successful.")
583
+ break # Exit loop if complete
584
+ else:
585
+ ASCIIColors.warning("Continuation response contained no code block.")
586
+ break # Stop if no code block found in continuation
587
+
588
+ if not last_code["is_complete"]:
589
+ ASCIIColors.warning("Code block remained incomplete after multiple attempts.")
590
+
591
+ return code_content # Return the (potentially completed) code content or None
592
+
383
593
 
384
594
  def extract_code_blocks(self, text: str, format: str = "markdown") -> List[dict]:
385
595
  """
386
596
  Extracts code blocks from text in Markdown or HTML format.
387
-
388
- Parameters:
389
- text (str): The text to extract code blocks from.
390
- format (str): The format of code blocks ("markdown" for ``` or "html" for <code class="">).
391
-
392
- Returns:
393
- List[dict]: A list of dictionaries with:
394
- - 'index' (int): Index of the code block.
395
- - 'file_name' (str): File name from preceding text, if available.
396
- - 'content' (str): Code block content.
397
- - 'type' (str): Language type (from Markdown first line or HTML class).
398
- - 'is_complete' (bool): True if block has a closing tag.
597
+ (Implementation remains the same as provided before)
399
598
  """
599
+ # ... (Keep the existing implementation from the previous file) ...
400
600
  code_blocks = []
401
601
  remaining = text
402
602
  first_index = 0
@@ -411,150 +611,172 @@ Do not split the code in multiple tags.
411
611
  remaining = remaining[index + 3:]
412
612
  first_index += index + 3
413
613
  except ValueError:
414
- if len(indices) % 2 == 1: # Odd number of delimiters
415
- indices.append(first_index + len(remaining))
614
+ if len(indices) % 2 == 1: # Odd number of delimiters means the last block is open
615
+ indices.append(first_index + len(remaining)) # Mark end of text as end of block
416
616
  break
417
617
 
418
618
  elif format.lower() == "html":
419
619
  # HTML: Find <code> and </code> positions, handling nested tags
420
- while remaining:
620
+ cursor = 0
621
+ while cursor < len(text):
421
622
  try:
422
- # Look for opening <code> tag
423
- start_index = remaining.index("<code")
424
- end_of_opening = remaining.index(">", start_index)
425
- indices.append(start_index + first_index)
426
- opening_tag = remaining[start_index:end_of_opening + 1]
427
- remaining = remaining[end_of_opening + 1:]
428
- first_index += end_of_opening + 1
623
+ # Look for opening <code tag
624
+ start_index = text.index("<code", cursor)
625
+ try:
626
+ end_of_opening = text.index(">", start_index)
627
+ except ValueError:
628
+ break # Invalid opening tag
629
+
630
+ indices.append(start_index)
631
+ opening_tag_end = end_of_opening + 1
632
+ cursor = opening_tag_end
429
633
 
430
634
  # Look for matching </code>, accounting for nested <code>
431
635
  nest_level = 0
432
- temp_index = 0
433
- while temp_index < len(remaining):
434
- if remaining[temp_index:].startswith("<code"):
636
+ temp_cursor = cursor
637
+ found_closing = False
638
+ while temp_cursor < len(text):
639
+ if text[temp_cursor:].startswith("<code"):
435
640
  nest_level += 1
436
- temp_index += remaining[temp_index:].index(">") + 1
437
- elif remaining[temp_index:].startswith("</code>"):
641
+ try:
642
+ temp_cursor = text.index(">", temp_cursor) + 1
643
+ except ValueError:
644
+ break # Invalid nested opening tag
645
+ elif text[temp_cursor:].startswith("</code>"):
438
646
  if nest_level == 0:
439
- indices.append(first_index + temp_index)
440
- remaining = remaining[temp_index + len("</code>"):]
441
- first_index += temp_index + len("</code>")
647
+ indices.append(temp_cursor)
648
+ cursor = temp_cursor + len("</code>")
649
+ found_closing = True
442
650
  break
443
651
  nest_level -= 1
444
- temp_index += len("</code>")
652
+ temp_cursor += len("</code>")
445
653
  else:
446
- temp_index += 1
447
- else:
448
- indices.append(first_index + len(remaining))
449
- break
654
+ temp_cursor += 1
655
+
656
+ if not found_closing: # If no closing tag found until the end
657
+ indices.append(len(text))
658
+ break # Stop searching
659
+
450
660
  except ValueError:
451
- break
661
+ break # No more opening tags found
452
662
 
453
663
  else:
454
664
  raise ValueError("Format must be 'markdown' or 'html'")
455
665
 
666
+ # Process indices to extract blocks
456
667
  for i in range(0, len(indices), 2):
457
668
  block_infos = {
458
669
  'index': i // 2,
459
670
  'file_name': "",
460
671
  'content': "",
461
- 'type': 'language-specific',
672
+ 'type': 'language-specific', # Default type
462
673
  'is_complete': False
463
674
  }
464
675
 
465
- # Extract preceding text for file name
466
676
  start_pos = indices[i]
467
- preceding_text = text[:start_pos].strip().splitlines()
468
- if preceding_text:
469
- last_line = preceding_text[-1].strip()
677
+ # --- Extract preceding text for potential file name hints ---
678
+ # Look backwards from start_pos for common patterns
679
+ search_area_start = max(0, start_pos - 200) # Limit search area
680
+ preceding_text_segment = text[search_area_start:start_pos]
681
+ lines = preceding_text_segment.strip().splitlines()
682
+ if lines:
683
+ last_line = lines[-1].strip()
684
+ # Example patterns (adjust as needed)
470
685
  if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
471
686
  block_infos['file_name'] = last_line[len("<file_name>"):-len("</file_name>")].strip()
472
- elif last_line.startswith("## filename:"):
473
- block_infos['file_name'] = last_line[len("## filename:"):].strip()
687
+ elif last_line.lower().startswith("file:") or last_line.lower().startswith("filename:"):
688
+ block_infos['file_name'] = last_line.split(":", 1)[1].strip()
689
+ # --- End file name extraction ---
474
690
 
475
- # Extract content and type
691
+ # Extract content and type based on format
476
692
  if format.lower() == "markdown":
477
- sub_text = text[start_pos + 3:]
693
+ content_start = start_pos + 3 # After ```
478
694
  if i + 1 < len(indices):
479
695
  end_pos = indices[i + 1]
480
- content = text[start_pos + 3:end_pos].strip()
696
+ content_raw = text[content_start:end_pos]
481
697
  block_infos['is_complete'] = True
482
- else:
483
- content = sub_text.strip()
698
+ else: # Last block is open
699
+ content_raw = text[content_start:]
484
700
  block_infos['is_complete'] = False
485
701
 
486
- if content:
487
- first_line = content.split('\n', 1)[0].strip()
488
- if first_line and not first_line.startswith(('{', ' ', '\t')):
702
+ # Check for language specifier on the first line
703
+ first_line_end = content_raw.find('\n')
704
+ if first_line_end != -1:
705
+ first_line = content_raw[:first_line_end].strip()
706
+ if first_line and not first_line.isspace() and ' ' not in first_line: # Basic check for language specifier
489
707
  block_infos['type'] = first_line
490
- content = content[len(first_line):].strip()
708
+ content = content_raw[first_line_end + 1:].strip()
709
+ else:
710
+ content = content_raw.strip()
711
+ else: # Single line code block or no language specifier
712
+ content = content_raw.strip()
713
+ # If content itself looks like a language specifier, clear it
714
+ if content and not content.isspace() and ' ' not in content and len(content)<20:
715
+ block_infos['type'] = content
716
+ content = ""
717
+
491
718
 
492
719
  elif format.lower() == "html":
493
- opening_tag = text[start_pos:text.index(">", start_pos) + 1]
494
- sub_text = text[start_pos + len(opening_tag):]
720
+ # Find end of opening tag to get content start
721
+ try:
722
+ opening_tag_end = text.index(">", start_pos) + 1
723
+ except ValueError:
724
+ continue # Should not happen if indices are correct
725
+
726
+ opening_tag = text[start_pos:opening_tag_end]
727
+
495
728
  if i + 1 < len(indices):
496
729
  end_pos = indices[i + 1]
497
- content = text[start_pos + len(opening_tag):end_pos].strip()
730
+ content = text[opening_tag_end:end_pos].strip()
498
731
  block_infos['is_complete'] = True
499
- else:
500
- content = sub_text.strip()
732
+ else: # Last block is open
733
+ content = text[opening_tag_end:].strip()
501
734
  block_infos['is_complete'] = False
502
735
 
503
- # Extract language from class attribute
504
- if 'class="' in opening_tag:
505
- class_start = opening_tag.index('class="') + len('class="')
506
- class_end = opening_tag.index('"', class_start)
507
- class_value = opening_tag[class_start:class_end]
508
- if class_value.startswith("language-"):
509
- block_infos['type'] = class_value[len("language-"):]
736
+
737
+ # Extract language from class attribute (more robust)
738
+ import re
739
+ match = re.search(r'class\s*=\s*["\']([^"\']*)["\']', opening_tag)
740
+ if match:
741
+ classes = match.group(1).split()
742
+ for cls in classes:
743
+ if cls.startswith("language-"):
744
+ block_infos['type'] = cls[len("language-"):]
745
+ break # Take the first language- class found
510
746
 
511
747
  block_infos['content'] = content
512
- code_blocks.append(block_infos)
748
+ if block_infos['content'] or block_infos['is_complete']: # Add block if it has content or is closed
749
+ code_blocks.append(block_infos)
513
750
 
514
751
  return code_blocks
515
752
 
753
+
516
754
  def extract_thinking_blocks(self, text: str) -> List[str]:
517
755
  """
518
756
  Extracts content between <thinking> or <think> tags from a given text.
519
-
520
- Parameters:
521
- text (str): The text containing thinking blocks
522
-
523
- Returns:
524
- List[str]: List of extracted thinking contents
757
+ (Implementation remains the same as provided before)
525
758
  """
526
759
  import re
527
-
528
- # Pattern to match both <thinking> and <think> blocks with matching tags
529
760
  pattern = r'<(thinking|think)>(.*?)</\1>'
530
- matches = re.finditer(pattern, text, re.DOTALL)
531
-
532
- # Extract content from the second group (index 2) and clean
761
+ matches = re.finditer(pattern, text, re.DOTALL | re.IGNORECASE) # Added IGNORECASE
533
762
  thinking_blocks = [match.group(2).strip() for match in matches]
534
-
535
763
  return thinking_blocks
536
764
 
537
765
  def remove_thinking_blocks(self, text: str) -> str:
538
766
  """
539
767
  Removes thinking blocks (either <thinking> or <think>) from text including the tags.
540
-
541
- Parameters:
542
- text (str): The text containing thinking blocks
543
-
544
- Returns:
545
- str: Text with thinking blocks removed
768
+ (Implementation remains the same as provided before)
546
769
  """
547
770
  import re
548
-
549
- # Pattern to remove both <thinking> and <think> blocks with matching tags
550
- pattern = r'<(thinking|think)>.*?</\1>'
551
- cleaned_text = re.sub(pattern, '', text, flags=re.DOTALL)
552
-
553
- # Remove extra whitespace and normalize newlines
554
- cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text.strip())
555
-
771
+ pattern = r'<(thinking|think)>.*?</\1>\s*' # Added \s* to remove potential trailing whitespace/newlines
772
+ cleaned_text = re.sub(pattern, '', text, flags=re.DOTALL | re.IGNORECASE) # Added IGNORECASE
773
+ # Further cleanup might be needed depending on desired newline handling
774
+ cleaned_text = re.sub(r'\n{3,}', '\n\n', cleaned_text).strip() # Collapse excess newlines
556
775
  return cleaned_text
557
776
 
777
+ # --- Task-oriented methods (Candidates for moving to TasksLibrary) ---
778
+ # Keeping them here for now, but they primarily use generate_code/generate_text
779
+
558
780
  def yes_no(
559
781
  self,
560
782
  question: str,
@@ -565,174 +787,242 @@ Do not split the code in multiple tags.
565
787
  callback = None
566
788
  ) -> bool | dict:
567
789
  """
568
- Answers a yes/no question.
569
-
570
- Args:
571
- question (str): The yes/no question to answer.
572
- context (str, optional): Additional context to provide for the question.
573
- max_answer_length (int, optional): Maximum string length allowed for the response. Defaults to None.
574
- conditionning (str, optional): An optional system message to put at the beginning of the prompt.
575
- return_explanation (bool, optional): If True, returns a dictionary with the answer and explanation. Defaults to False.
576
-
577
- Returns:
578
- bool or dict:
579
- - If return_explanation is False, returns a boolean (True for 'yes', False for 'no').
580
- - If return_explanation is True, returns a dictionary with the answer and explanation.
790
+ Answers a yes/no question using LLM JSON generation.
791
+ (Implementation requires self.generate_code which uses self.generate_text)
581
792
  """
793
+ # ... (Implementation as provided before, relies on self.generate_code) ...
582
794
  if not callback:
583
795
  callback=self.sink
584
796
 
585
- prompt = f"{conditionning}\nQuestion: {question}\nContext: {context}\n"
586
-
587
- template = """
588
- {
589
- "answer": true | false,
590
- "explanation": "Optional explanation if return_explanation is True"
591
- }
592
- """
593
-
594
- response = self.generate_text_code(
797
+ prompt = f"{self.system_full_header}{conditionning}\n{self.user_full_header}Based on the context, answer the question with only 'true' or 'false' and provide a brief explanation.\nContext:\n{context}\nQuestion: {question}\n{self.ai_full_header}"
798
+
799
+ template = """{
800
+ "answer": true | false, // boolean required
801
+ "explanation": "A brief explanation for the answer"
802
+ }"""
803
+
804
+ # Assuming generate_code exists and works as intended
805
+ response_json_str = self.generate_code(
595
806
  prompt=prompt,
596
- template=template,
597
807
  language="json",
808
+ template=template,
598
809
  code_tag_format="markdown",
599
810
  max_size=max_answer_length,
600
811
  callback=callback
601
812
  )
602
-
813
+
814
+ if response_json_str is None:
815
+ ASCIIColors.error("LLM failed to generate JSON for yes/no question.")
816
+ return {"answer": False, "explanation": "Generation failed"} if return_explanation else False
817
+
603
818
  try:
604
- parsed_response = json.loads(response)
605
- answer = parsed_response.get("answer", False)
819
+ # Attempt to repair minor JSON issues before parsing
820
+ import json
821
+ import re
822
+ # Remove potential comments, trailing commas etc.
823
+ response_json_str = re.sub(r"//.*", "", response_json_str)
824
+ response_json_str = re.sub(r",\s*}", "}", response_json_str)
825
+ response_json_str = re.sub(r",\s*]", "]", response_json_str)
826
+
827
+ parsed_response = json.loads(response_json_str)
828
+ answer = parsed_response.get("answer")
606
829
  explanation = parsed_response.get("explanation", "")
607
-
830
+
831
+ # Validate boolean type
832
+ if not isinstance(answer, bool):
833
+ # Attempt to coerce common string representations
834
+ if isinstance(answer, str):
835
+ answer_lower = answer.lower()
836
+ if answer_lower == 'true':
837
+ answer = True
838
+ elif answer_lower == 'false':
839
+ answer = False
840
+ else:
841
+ raise ValueError("Answer is not a valid boolean representation.")
842
+ else:
843
+ raise ValueError("Answer is not a boolean.")
844
+
845
+
608
846
  if return_explanation:
609
847
  return {"answer": answer, "explanation": explanation}
610
848
  else:
611
849
  return answer
612
- except json.JSONDecodeError:
613
- return False
850
+ except (json.JSONDecodeError, ValueError) as e:
851
+ ASCIIColors.error(f"Failed to parse or validate JSON response for yes/no: {e}")
852
+ ASCIIColors.error(f"Received: {response_json_str}")
853
+ # Fallback: try simple string check in the raw LLM output (less reliable)
854
+ if "true" in response_json_str.lower():
855
+ answer_fallback = True
856
+ elif "false" in response_json_str.lower():
857
+ answer_fallback = False
858
+ else:
859
+ answer_fallback = False # Default to false on ambiguity
860
+
861
+ if return_explanation:
862
+ return {"answer": answer_fallback, "explanation": f"Parsing failed ({e}). Fallback used."}
863
+ else:
864
+ return answer_fallback
865
+
614
866
 
615
867
  def multichoice_question(
616
- self,
617
- question: str,
618
- possible_answers: list,
619
- context: str = "",
620
- max_answer_length: int = None,
621
- conditionning: str = "",
868
+ self,
869
+ question: str,
870
+ possible_answers: list,
871
+ context: str = "",
872
+ max_answer_length: int = None,
873
+ conditionning: str = "",
622
874
  return_explanation: bool = False,
623
875
  callback = None
624
- ) -> dict:
876
+ ) -> int | dict: # Corrected return type hint
625
877
  """
626
- Interprets a multi-choice question from a user's response. This function expects only one choice as true.
627
- All other choices are considered false. If none are correct, returns -1.
628
-
629
- Args:
630
- question (str): The multi-choice question posed by the user.
631
- possible_answers (List[Any]): A list containing all valid options for the chosen value.
632
- context (str, optional): Additional context to provide for the question.
633
- max_answer_length (int, optional): Maximum string length allowed while interpreting the user's responses. Defaults to None.
634
- conditionning (str, optional): An optional system message to put at the beginning of the prompt.
635
- return_explanation (bool, optional): If True, returns a dictionary with the choice and explanation. Defaults to False.
636
-
637
- Returns:
638
- dict:
639
- - If return_explanation is False, returns a JSON object with only the selected choice index.
640
- - If return_explanation is True, returns a JSON object with the selected choice index and an explanation.
641
- - Returns {"index": -1} if no match is found among the possible answers.
878
+ Interprets a multi-choice question using LLM JSON generation.
879
+ (Implementation requires self.generate_code which uses self.generate_text)
642
880
  """
881
+ # ... (Implementation as provided before, relies on self.generate_code) ...
643
882
  if not callback:
644
883
  callback=self.sink
645
-
646
- prompt = f"""
647
- {conditionning}\n
648
- QUESTION:\n{question}\n
649
- POSSIBLE ANSWERS:\n"""
650
- for i, answer in enumerate(possible_answers):
651
- prompt += f"{i}. {answer}\n"
652
-
884
+
885
+ choices_text = "\n".join([f"{i}. {ans}" for i, ans in enumerate(possible_answers)])
886
+
887
+ prompt = f"{self.system_full_header}{conditionning}\n"
888
+ prompt += f"{self.user_full_header}Answer the following multiple-choice question based on the context. Respond with a JSON object containing the index of the single best answer and an optional explanation.\n"
653
889
  if context:
654
- prompt += f"\nADDITIONAL CONTEXT:\n{context}\n"
655
-
656
- prompt += "\nRespond with a JSON object containing:\n"
657
- if return_explanation:
658
- prompt += "{\"index\": (the selected answer index), \"explanation\": (reasoning for selection)}"
659
- else:
660
- prompt += "{\"index\": (the selected answer index)}"
661
-
662
- response = self.generate_text_code(prompt, language="json", max_size=max_answer_length,
663
- accept_all_if_no_code_tags_is_present=True, return_full_generated_code=False, callback=callback)
664
-
890
+ prompt += f"Context:\n{context}\n"
891
+ prompt += f"Question:\n{question}\n"
892
+ prompt += f"Possible Answers:\n{choices_text}\n"
893
+ prompt += f"{self.ai_full_header}"
894
+
895
+ template = """{
896
+ "index": 0, // integer index required
897
+ "explanation": "Optional explanation for the choice"
898
+ }"""
899
+
900
+ response_json_str = self.generate_code(
901
+ prompt=prompt,
902
+ template=template,
903
+ language="json",
904
+ code_tag_format="markdown",
905
+ max_size=max_answer_length,
906
+ callback=callback
907
+ )
908
+
909
+ if response_json_str is None:
910
+ ASCIIColors.error("LLM failed to generate JSON for multichoice question.")
911
+ return {"index": -1, "explanation": "Generation failed"} if return_explanation else -1
912
+
665
913
  try:
666
- result = json.loads(response)
667
- if return_explanation:
668
- if "index" in result and isinstance(result["index"], int):
669
- return result["index"], result["index"]
670
- else:
671
- if "index" in result and isinstance(result["index"], int):
672
- return result["index"]
673
- except json.JSONDecodeError:
914
+ # Attempt to repair minor JSON issues before parsing
915
+ import json
916
+ import re
917
+ response_json_str = re.sub(r"//.*", "", response_json_str)
918
+ response_json_str = re.sub(r",\s*}", "}", response_json_str)
919
+ response_json_str = re.sub(r",\s*]", "]", response_json_str)
920
+
921
+ result = json.loads(response_json_str)
922
+ index = result.get("index")
923
+ explanation = result.get("explanation", "")
924
+
925
+ if not isinstance(index, int) or not (0 <= index < len(possible_answers)):
926
+ raise ValueError(f"Invalid index returned: {index}")
927
+
674
928
  if return_explanation:
675
- return -1, "failed to decide"
929
+ return {"index": index, "explanation": explanation}
676
930
  else:
677
- return -1
678
-
931
+ return index
932
+ except (json.JSONDecodeError, ValueError) as e:
933
+ ASCIIColors.error(f"Failed to parse or validate JSON response for multichoice: {e}")
934
+ ASCIIColors.error(f"Received: {response_json_str}")
935
+ # Fallback logic could be added here (e.g., regex for index) but is less reliable
936
+ return {"index": -1, "explanation": f"Parsing failed ({e})."} if return_explanation else -1
937
+
938
+
679
939
  def multichoice_ranking(
680
- self,
681
- question: str,
682
- possible_answers: list,
683
- context: str = "",
684
- max_answer_length: int = 512,
685
- conditionning: str = "",
940
+ self,
941
+ question: str,
942
+ possible_answers: list,
943
+ context: str = "",
944
+ max_answer_length: int = None,
945
+ conditionning: str = "",
686
946
  return_explanation: bool = False,
687
947
  callback = None
688
948
  ) -> dict:
689
949
  """
690
- Ranks answers for a question from best to worst. Returns a JSON object containing the ranked order.
691
-
692
- Args:
693
- question (str): The question for which the answers are being ranked.
694
- possible_answers (List[Any]): A list of possible answers to rank.
695
- context (str, optional): Additional context to provide for the question.
696
- max_answer_length (int, optional): Maximum string length allowed for the response. Defaults to 50.
697
- conditionning (str, optional): An optional system message to put at the beginning of the prompt.
698
- return_explanation (bool, optional): If True, returns a dictionary with the ranked order and explanations. Defaults to False.
699
-
700
- Returns:
701
- dict:
702
- - If return_explanation is False, returns a JSON object with only the ranked order.
703
- - If return_explanation is True, returns a JSON object with the ranked order and explanations.
950
+ Ranks answers for a question from best to worst using LLM JSON generation.
951
+ (Implementation requires self.generate_code which uses self.generate_text)
704
952
  """
953
+ # ... (Implementation as provided before, relies on self.generate_code) ...
705
954
  if not callback:
706
- callback=self.sink
707
-
708
- prompt = f"""
709
- {conditionning}\n
710
- QUESTION:\n{question}\n
711
- POSSIBLE ANSWERS:\n"""
712
- for i, answer in enumerate(possible_answers):
713
- prompt += f"{i}. {answer}\n"
714
-
955
+ callback = self.sink
956
+
957
+ choices_text = "\n".join([f"{i}. {ans}" for i, ans in enumerate(possible_answers)])
958
+
959
+ prompt = f"{self.system_full_header}{conditionning}\n"
960
+ prompt += f"{self.user_full_header}Rank the following answers to the question from best to worst based on the context. Respond with a JSON object containing a list of indices in ranked order and an optional list of explanations.\n"
715
961
  if context:
716
- prompt += f"\nADDITIONAL CONTEXT:\n{context}\n"
717
-
718
- prompt += "\nRespond with a JSON object containing:\n"
719
- if return_explanation:
720
- prompt += "{\"ranking\": (list of indices ordered from best to worst), \"explanations\": (list of reasons for each ranking)}"
721
- else:
722
- prompt += "{\"ranking\": (list of indices ordered from best to worst)}"
723
-
724
- response = self.generate_text_code(prompt, language="json", return_full_generated_code=False, callback=callback)
725
-
962
+ prompt += f"Context:\n{context}\n"
963
+ prompt += f"Question:\n{question}\n"
964
+ prompt += f"Possible Answers to Rank:\n{choices_text}\n"
965
+ prompt += f"{self.ai_full_header}"
966
+
967
+ template = """{
968
+ "ranking": [0, 1, 2], // list of integer indices required, length must match number of answers
969
+ "explanations": ["Optional explanation 1", "Optional explanation 2", "Optional explanation 3"] // Optional list of strings
970
+ }"""
971
+
972
+ response_json_str = self.generate_code(
973
+ prompt=prompt,
974
+ template=template,
975
+ language="json",
976
+ code_tag_format="markdown",
977
+ max_size=max_answer_length,
978
+ callback=callback
979
+ )
980
+
981
+ default_return = {"ranking": [], "explanations": []} if return_explanation else {"ranking": []}
982
+
983
+ if response_json_str is None:
984
+ ASCIIColors.error("LLM failed to generate JSON for ranking.")
985
+ return default_return
986
+
726
987
  try:
727
- result = json.loads(response)
728
- if "ranking" in result and isinstance(result["ranking"], list):
729
- return result
730
- except json.JSONDecodeError:
731
- return {"ranking": []}
732
-
733
-
988
+ # Attempt to repair minor JSON issues before parsing
989
+ import json
990
+ import re
991
+ response_json_str = re.sub(r"//.*", "", response_json_str)
992
+ response_json_str = re.sub(r",\s*}", "}", response_json_str)
993
+ response_json_str = re.sub(r",\s*]", "]", response_json_str)
994
+
995
+ result = json.loads(response_json_str)
996
+ ranking = result.get("ranking")
997
+ explanations = result.get("explanations", []) if return_explanation else None
998
+
999
+ # Validation
1000
+ if not isinstance(ranking, list) or len(ranking) != len(possible_answers):
1001
+ raise ValueError("Ranking is not a list or has incorrect length.")
1002
+ if not all(isinstance(idx, int) and 0 <= idx < len(possible_answers) for idx in ranking):
1003
+ raise ValueError("Ranking contains invalid indices.")
1004
+ if len(set(ranking)) != len(possible_answers):
1005
+ raise ValueError("Ranking contains duplicate indices.")
1006
+ if return_explanation and not isinstance(explanations, list):
1007
+ ASCIIColors.warning("Explanations format is invalid, returning ranking only.")
1008
+ explanations = None # Ignore invalid explanations
1009
+
1010
+
1011
+ if return_explanation:
1012
+ return {"ranking": ranking, "explanations": explanations or [""] * len(ranking)} # Provide empty strings if explanations were invalid/missing
1013
+ else:
1014
+ return {"ranking": ranking}
1015
+
1016
+ except (json.JSONDecodeError, ValueError) as e:
1017
+ ASCIIColors.error(f"Failed to parse or validate JSON response for ranking: {e}")
1018
+ ASCIIColors.error(f"Received: {response_json_str}")
1019
+ return default_return
1020
+
1021
+ # --- Summarization / Analysis Methods (Candidates for TasksLibrary) ---
1022
+ # These use generate_text and tokenization/detokenization
1023
+
734
1024
  def sequential_summarize(
735
- self,
1025
+ self,
736
1026
  text:str,
737
1027
  chunk_processing_prompt:str="Extract relevant information from the current text chunk and update the memory if needed.",
738
1028
  chunk_processing_output_format="markdown",
@@ -740,42 +1030,43 @@ Do not split the code in multiple tags.
740
1030
  final_output_format="markdown",
741
1031
  ctx_size:int=None,
742
1032
  chunk_size:int=None,
1033
+ overlap:int=None, # Added overlap
743
1034
  bootstrap_chunk_size:int=None,
744
1035
  bootstrap_steps:int=None,
745
1036
  callback = None,
746
1037
  debug:bool= False):
747
1038
  """
748
- This function processes a given text in chunks and generates a summary for each chunk.
749
- It then combines the summaries to create a final summary.
750
-
751
- Parameters:
752
- text (str): The input text to be summarized.
753
- chunk_processing_prompt (str, optional): The prompt used for processing each chunk. Defaults to "".
754
- chunk_processing_output_format (str, optional): The format of the output for each chunk. Defaults to "markdown".
755
- final_memory_processing_prompt (str, optional): The prompt used for processing the final memory. Defaults to "Create final summary using this memory.".
756
- final_output_format (str, optional): The format of the final output. Defaults to "markdown".
757
- ctx_size (int, optional): The size of the context. Defaults to None.
758
- chunk_size (int, optional): The size of each chunk. Defaults to None.
759
- callback (callable, optional): A function to be called after processing each chunk. Defaults to None.
760
- debug (bool, optional): A flag to enable debug mode. Defaults to False.
761
-
762
- Returns:
763
- The final summary in the specified format.
1039
+ Processes text in chunks sequentially, updating a memory at each step.
1040
+ (Implementation requires self.tokenize, self.detokenize, self.generate_text, self.extract_code_blocks)
764
1041
  """
1042
+ # ... (Implementation as provided before, relies on core methods) ...
1043
+ if not callback:
1044
+ callback = self.sink
1045
+
765
1046
  if ctx_size is None:
766
- ctx_size = self.ctx_size
767
-
1047
+ ctx_size = self.default_ctx_size or 8192 # Provide a fallback default
768
1048
  if chunk_size is None:
769
- chunk_size = ctx_size//4
770
-
1049
+ chunk_size = ctx_size // 4
1050
+ if overlap is None:
1051
+ overlap = chunk_size // 10 # Default overlap
1052
+ if bootstrap_chunk_size is None:
1053
+ bootstrap_chunk_size = chunk_size // 2 # Smaller initial chunks
1054
+ if bootstrap_steps is None:
1055
+ bootstrap_steps = 2 # Process first few chunks smaller
1056
+
771
1057
  # Tokenize entire text
772
- all_tokens = self.tokenize(text)
1058
+ try:
1059
+ all_tokens = self.tokenize(text)
1060
+ except RuntimeError as e:
1061
+ ASCIIColors.error(f"Tokenization failed: {e}")
1062
+ return "Error: Could not tokenize input text."
773
1063
  total_tokens = len(all_tokens)
774
-
1064
+
775
1065
  # Initialize memory and chunk index
776
1066
  memory = ""
777
1067
  start_token_idx = 0
778
-
1068
+ chunk_id = 0
1069
+
779
1070
  # Create static prompt template
780
1071
  static_prompt_template = f"""{self.system_full_header}
781
1072
  You are a structured sequential text summary assistant that processes documents chunk by chunk, updating a memory of previously generated information at each step.
@@ -789,9 +1080,7 @@ Update the memory by merging previous information with new details from this tex
789
1080
  Only add information explicitly present in the chunk. Retain all relevant prior memory unless clarified or updated by the current chunk.
790
1081
 
791
1082
  ----
792
- # Text chunk:
793
- # Chunk number: {{chunk_id}}
794
- ----
1083
+ Text chunk (Chunk number: {{chunk_id}}):
795
1084
  ```markdown
796
1085
  {{chunk}}
797
1086
  ```
@@ -804,298 +1093,525 @@ Before updating, verify each requested detail:
804
1093
 
805
1094
  Include only confirmed details in the output.
806
1095
  Rewrite the full memory including the updates and keeping relevant data.
807
- Do not discuss the information inside thememory, just put the relevant information without comments.
808
-
809
- ----
810
- # Current document analysis memory:
1096
+ Do not discuss the information inside the memory, just put the relevant information without comments.
1097
+ The output memory must be put inside a {chunk_processing_output_format} markdown code block.
811
1098
  ----
1099
+ Current document analysis memory:
812
1100
  ```{chunk_processing_output_format}
813
1101
  {{memory}}
814
1102
  ```
815
1103
  {self.ai_full_header}
816
- """
817
- # Calculate static prompt tokens (with empty memory and chunk)
818
- chunk_id=0
819
- example_prompt = static_prompt_template.format(custom_prompt=chunk_processing_prompt if chunk_processing_prompt else '', memory="", chunk="", chunk_id=chunk_id)
820
- static_tokens = len(self.tokenize(example_prompt))
821
-
1104
+ ```{chunk_processing_output_format}
1105
+ """ # Added start of code block for AI
1106
+
1107
+ # Calculate static prompt tokens (with estimated placeholders)
1108
+ example_prompt = static_prompt_template.format(
1109
+ custom_prompt=chunk_processing_prompt,
1110
+ memory="<est_memory>",
1111
+ chunk="<est_chunk>",
1112
+ chunk_id=0
1113
+ )
1114
+ try:
1115
+ static_tokens = len(self.tokenize(example_prompt)) - len(self.tokenize("<est_memory>")) - len(self.tokenize("<est_chunk>"))
1116
+ except RuntimeError as e:
1117
+ ASCIIColors.error(f"Tokenization failed during setup: {e}")
1118
+ return "Error: Could not calculate prompt size."
1119
+
822
1120
  # Process text in chunks
823
1121
  while start_token_idx < total_tokens:
824
- # Calculate available tokens for chunk
825
- current_memory_tokens = len(self.tokenize(memory))
826
- available_tokens = ctx_size - static_tokens - current_memory_tokens
827
-
828
- if available_tokens <= 0:
829
- raise ValueError("Memory too large - consider reducing chunk size or increasing context window")
830
-
831
- # Get chunk tokens
832
- if bootstrap_chunk_size is not None and chunk_id < bootstrap_steps:
833
- end_token_idx = min(start_token_idx + bootstrap_chunk_size, total_tokens)
834
- else:
835
- end_token_idx = min(start_token_idx + chunk_size, total_tokens)
1122
+ # Calculate available tokens for chunk + memory
1123
+ available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024) # Reserve space for output
1124
+ if available_tokens_for_dynamic_content <= 100: # Need some minimum space
1125
+ ASCIIColors.error("Context size too small for summarization with current settings.")
1126
+ return "Error: Context size too small."
1127
+
1128
+ # Estimate token split between memory and chunk (e.g., 50/50)
1129
+ max_memory_tokens = available_tokens_for_dynamic_content // 2
1130
+ max_chunk_tokens = available_tokens_for_dynamic_content - max_memory_tokens
1131
+
1132
+ # Truncate memory if needed
1133
+ current_memory_tokens = self.tokenize(memory)
1134
+ if len(current_memory_tokens) > max_memory_tokens:
1135
+ memory = self.detokenize(current_memory_tokens[-max_memory_tokens:]) # Keep recent memory
1136
+ if debug: ASCIIColors.yellow(f"Memory truncated to {max_memory_tokens} tokens.")
1137
+
1138
+ # Determine actual chunk size based on remaining space and settings
1139
+ current_chunk_size = bootstrap_chunk_size if chunk_id < bootstrap_steps else chunk_size
1140
+ current_chunk_size = min(current_chunk_size, max_chunk_tokens) # Adjust chunk size based on available space
1141
+
1142
+ end_token_idx = min(start_token_idx + current_chunk_size, total_tokens)
836
1143
  chunk_tokens = all_tokens[start_token_idx:end_token_idx]
837
1144
  chunk = self.detokenize(chunk_tokens)
838
- chunk_id +=1
839
-
1145
+
1146
+ chunk_id += 1
1147
+ callback(f"Processing chunk {chunk_id}...", MSG_TYPE.MSG_TYPE_STEP)
1148
+
840
1149
  # Generate memory update
841
- prompt = static_prompt_template.format(custom_prompt=chunk_processing_prompt if chunk_processing_prompt else '', memory=memory, chunk=chunk, chunk_id=chunk_id)
1150
+ prompt = static_prompt_template.format(
1151
+ custom_prompt=chunk_processing_prompt,
1152
+ memory=memory,
1153
+ chunk=chunk,
1154
+ chunk_id=chunk_id
1155
+ )
842
1156
  if debug:
843
- ASCIIColors.yellow(f" ----- {chunk_id-1} ------")
844
- ASCIIColors.red(prompt)
845
-
846
- memory = self.generate_text(prompt, n_predict=ctx_size//4, streaming_callback=callback).strip()
847
- code = self.extract_code_blocks(memory)
848
- if code:
849
- memory=code[0]["content"]
850
-
1157
+ ASCIIColors.magenta(f"--- Chunk {chunk_id} Prompt ---")
1158
+ ASCIIColors.cyan(prompt)
1159
+
1160
+ response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
1161
+
1162
+ if isinstance(response, dict): # Handle generation error
1163
+ ASCIIColors.error(f"Chunk {chunk_id} processing failed: {response.get('error')}")
1164
+ # Option: skip chunk or stop? Let's skip for now.
1165
+ start_token_idx = end_token_idx # Move to next chunk index
1166
+ continue
1167
+
1168
+ memory_code_blocks = self.extract_code_blocks(response, format=chunk_processing_output_format)
1169
+ if memory_code_blocks:
1170
+ memory = memory_code_blocks[0]["content"] # Assume first block is the memory
1171
+ else:
1172
+ # Fallback: Try to extract from the end if the AI added text after the block
1173
+ end_tag = f"```{chunk_processing_output_format}"
1174
+ last_occurrence = response.rfind(end_tag)
1175
+ if last_occurrence != -1:
1176
+ # Extract content between the start and end tags
1177
+ start_tag_len = len(f"```{chunk_processing_output_format}\n") # Approx
1178
+ potential_memory = response[last_occurrence + start_tag_len:].strip()
1179
+ if potential_memory.endswith("```"):
1180
+ potential_memory = potential_memory[:-3].strip()
1181
+ if potential_memory: # Use if non-empty
1182
+ memory = potential_memory
1183
+ else: # If extraction failed, keep old memory or use raw response? Use raw response for now.
1184
+ ASCIIColors.warning(f"Could not extract memory block for chunk {chunk_id}. Using raw response.")
1185
+ memory = response.strip().rstrip('```') # Basic cleanup
1186
+ else:
1187
+ ASCIIColors.warning(f"Could not extract memory block for chunk {chunk_id}. Using raw response.")
1188
+ memory = response.strip().rstrip('```')
1189
+
1190
+
851
1191
  if debug:
852
- ASCIIColors.yellow(f" ----- OUT ------")
853
- ASCIIColors.yellow(memory)
854
- ASCIIColors.yellow(" ----- ------")
855
- # Move to next chunk
856
- start_token_idx = end_token_idx
857
-
858
- # Prepare final summary prompt
859
- final_prompt_template = f"""!@>system:
860
- You are a memory summarizer assistant that helps users format their memory information into coherant text in a specific style or format.
1192
+ ASCIIColors.magenta(f"--- Chunk {chunk_id} Updated Memory ---")
1193
+ ASCIIColors.green(memory)
1194
+ ASCIIColors.magenta("----------------------------")
1195
+
1196
+ # Move to next chunk start, considering overlap
1197
+ start_token_idx = max(start_token_idx, end_token_idx - overlap) if overlap>0 and end_token_idx < total_tokens else end_token_idx
1198
+
1199
+
1200
+ # --- Final Aggregation Step ---
1201
+ callback("Aggregating final summary...", MSG_TYPE.MSG_TYPE_STEP)
1202
+ final_prompt_template = f"""{self.system_full_header}
1203
+ You are a memory summarizer assistant.
861
1204
  {final_memory_processing_prompt}.
862
- !@>user:
863
- Here is my document analysis memory:
1205
+ {self.user_full_header}
1206
+ Here is the document analysis memory:
864
1207
  ```{chunk_processing_output_format}
865
- {memory}
1208
+ {{memory}}
866
1209
  ```
867
- The output must be put inside a {final_output_format} markdown tag.
868
- The updated memory must be put in a {chunk_processing_output_format} markdown tag.
869
- !@>assistant:
1210
+ The final output must be put inside a {final_output_format} markdown tag.
1211
+ {self.ai_full_header}
1212
+ ```{final_output_format}
870
1213
  """
871
- # Truncate memory if needed for final prompt
872
- example_final_prompt = final_prompt_template
873
- final_static_tokens = len(self.tokenize(example_final_prompt))
874
- available_final_tokens = ctx_size - final_static_tokens
875
-
876
- memory_tokens = self.tokenize(memory)
877
- if len(memory_tokens) > available_final_tokens:
878
- memory = self.detokenize(memory_tokens[:available_final_tokens])
879
-
880
- # Generate final summary
881
- final_prompt = final_prompt_template
882
- memory = self.generate_text(final_prompt, streaming_callback=callback)
883
- code = self.extract_code_blocks(memory)
884
- if code:
885
- memory=code[0]["content"]
886
- return memory
887
1214
 
1215
+ # Truncate memory if needed for the final prompt
1216
+ final_example_prompt = final_prompt_template.format(memory="<final_memory>")
1217
+ try:
1218
+ final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
1219
+ available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024) # Reserve space for output
1220
+ except RuntimeError as e:
1221
+ ASCIIColors.error(f"Tokenization failed during final setup: {e}")
1222
+ return "Error: Could not calculate final prompt size."
888
1223
 
889
- def update_memory_from_file_chunk_prompt(self, file_name, file_chunk_id, global_chunk_id, chunk, memory, memory_template, query, task_prompt):
890
- return f"""{self.system_full_header}
891
- You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a markdown memory of findings at each step.
892
1224
 
893
- Your goal is to extract relevant information from each text chunk and update the provided markdown memory structure, ensuring no key details are omitted or invented. Maintain the structure of the JSON template.
1225
+ memory_tokens = self.tokenize(memory)
1226
+ if len(memory_tokens) > available_final_tokens:
1227
+ memory = self.detokenize(memory_tokens[-available_final_tokens:]) # Keep most recent info
1228
+ if debug: ASCIIColors.yellow(f"Final memory truncated to {available_final_tokens} tokens.")
894
1229
 
895
- ----
896
- # Current file: {file_name}
897
- # Chunk number in this file: {file_chunk_id}
898
- # Global chunk number: {global_chunk_id}
899
- # Text chunk:
900
- ```markdown
901
- {chunk}
902
- ```
903
- {'Current findings memory (cumulative across all files):' if memory!="" else 'Memory template:'}
904
- ```markdown
905
- {memory if memory!="" else memory_template}
906
- ```
907
- {self.user_full_header}
908
- Query: '{query}'
909
- Task: {task_prompt}
910
- Update the markdown memory by adding new information from this chunk relevant to the query. Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
911
- Ensure the output is valid markdown matching the structure of the provided template.
912
- Make sure to extract only information relevant to answering the user's query or providing important contextual information.
913
- Return the updated markdown memory inside a markdown code block.
914
- {self.ai_full_header}
915
- """
1230
+ # Generate final summary
1231
+ final_prompt = final_prompt_template.format(memory=memory)
1232
+ if debug:
1233
+ ASCIIColors.magenta("--- Final Aggregation Prompt ---")
1234
+ ASCIIColors.cyan(final_prompt)
916
1235
 
917
- def update_memory_from_file_chunk_prompt_markdown(self, file_name, file_chunk_id, global_chunk_id, chunk, memory, query):
918
- return f"""{self.system_full_header}
919
- You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a markdown memory of findings at each step.
1236
+ final_summary_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
920
1237
 
921
- Your goal is to extract relevant information from each text chunk and update the provided markdown memory structure, ensuring no key details are omitted or invented. Maintain the structure of the markdown template.
1238
+ if isinstance(final_summary_raw, dict):
1239
+ ASCIIColors.error(f"Final aggregation failed: {final_summary_raw.get('error')}")
1240
+ return "Error: Final aggregation failed."
1241
+
1242
+ final_code_blocks = self.extract_code_blocks(final_summary_raw, format=final_output_format)
1243
+ if final_code_blocks:
1244
+ final_summary = final_code_blocks[0]["content"]
1245
+ else:
1246
+ # Fallback similar to chunk processing
1247
+ end_tag = f"```{final_output_format}"
1248
+ last_occurrence = final_summary_raw.rfind(end_tag)
1249
+ if last_occurrence != -1:
1250
+ start_tag_len = len(f"```{final_output_format}\n") # Approx
1251
+ potential_summary = final_summary_raw[last_occurrence + start_tag_len:].strip()
1252
+ if potential_summary.endswith("```"):
1253
+ potential_summary = potential_summary[:-3].strip()
1254
+ final_summary = potential_summary if potential_summary else final_summary_raw.strip().rstrip('```')
1255
+ else:
1256
+ final_summary = final_summary_raw.strip().rstrip('```')
1257
+ ASCIIColors.warning("Could not extract final summary block. Using raw response.")
1258
+
1259
+ if debug:
1260
+ ASCIIColors.magenta("--- Final Summary ---")
1261
+ ASCIIColors.green(final_summary)
1262
+ ASCIIColors.magenta("-------------------")
1263
+
1264
+ return final_summary
922
1265
 
923
- ----
924
- # Current file: {file_name}
925
- # Chunk number in this file: {file_chunk_id}
926
- # Global chunk number: {global_chunk_id}
927
- # Text chunk:
928
- ```markdown
929
- {chunk}
930
- ```
931
- Current findings memory (cumulative across all files):
932
- ```markdown
933
- {memory}
934
- ```
935
- {self.user_full_header}
936
- Query: '{query}'
937
- {'Start Creating a memory from the text chunk in a format adapted to answer the user Query' if memory=="" else 'Update the markdown memory by adding new information from this chunk relevant to the query.'} Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
938
- {'Ensure the output is valid markdown matching the structure of the current memory' if memory!='' else 'Ensure the output is valid markdown matching the structure of the provided template.'}
939
- Make sure to extract only information relevant to answering the user's query or providing important contextual information.
940
- Return the updated markdown memory inside a markdown code block.
941
- {self.ai_full_header}
942
- """
943
1266
 
944
1267
  def deep_analyze(
945
1268
  self,
946
1269
  query: str,
947
1270
  text: str = None,
948
- files: list = None,
949
- aggregation_prompt: str = None,
1271
+ files: Optional[List[Union[str, Path]]] = None,
1272
+ aggregation_prompt: str = "Aggregate the findings from the memory into a coherent answer to the original query.",
950
1273
  output_format: str = "markdown",
951
1274
  ctx_size: int = None,
952
1275
  chunk_size: int = None,
1276
+ overlap: int = None, # Added overlap
953
1277
  bootstrap_chunk_size: int = None,
954
1278
  bootstrap_steps: int = None,
955
1279
  callback=None,
956
1280
  debug: bool = False
957
1281
  ):
958
1282
  """
959
- Searches for specific information related to a query in a long text or a list of files.
960
- Processes each file separately in chunks, updates a shared markdown memory with relevant findings, and optionally aggregates them.
961
-
962
- Parameters:
963
- - query (str): The query to search for.
964
- - text (str, optional): The input text to search in. Defaults to None.
965
- - files (list, optional): List of file paths to search in. Defaults to None.
966
- - task_prompt (str, optional): Prompt for processing each chunk. Defaults to a standard markdown extraction prompt.
967
- - aggregation_prompt (str, optional): Prompt for aggregating findings. Defaults to None.
968
- - output_format (str, optional): Output format. Defaults to "markdown".
969
- - ctx_size (int, optional): Context size for the model. Defaults to None (uses self.ctx_size).
970
- - chunk_size (int, optional): Size of each chunk. Defaults to None (ctx_size // 4). Smaller chunk sizes yield better results but are slower.
971
- - bootstrap_chunk_size (int, optional): Size for initial chunks. Defaults to None.
972
- - bootstrap_steps (int, optional): Number of initial chunks using bootstrap size. Defaults to None.
973
- - callback (callable, optional): Function called after each chunk. Defaults to None.
974
- - debug (bool, optional): Enable debug output. Defaults to False.
975
-
976
- Returns:
977
- - str: The search findings or aggregated output in the specified format.
1283
+ Searches for information related to a query in long text or files, processing chunk by chunk.
1284
+ (Implementation requires self.tokenize, self.detokenize, self.generate_text, self.extract_code_blocks)
978
1285
  """
979
- # Set defaults
1286
+ # ... (Implementation mostly similar to previous version, but needs updates) ...
1287
+ if not callback:
1288
+ callback=self.sink
1289
+
1290
+ # Set defaults and validate input
980
1291
  if ctx_size is None:
981
- ctx_size = self.default_ctx_size
1292
+ ctx_size = self.default_ctx_size or 8192
982
1293
  if chunk_size is None:
983
1294
  chunk_size = ctx_size // 4
984
-
985
- # Prepare input
1295
+ if overlap is None:
1296
+ overlap = chunk_size // 10
1297
+ if bootstrap_chunk_size is None:
1298
+ bootstrap_chunk_size = chunk_size // 2
1299
+ if bootstrap_steps is None:
1300
+ bootstrap_steps = 2
1301
+
1302
+ if not text and not files:
1303
+ raise ValueError("Either 'text' or 'files' must be provided.")
1304
+ if text and files:
1305
+ ASCIIColors.warning("Both 'text' and 'files' provided. Processing 'files' only.")
1306
+ text = None # Prioritize files if both are given
1307
+
1308
+ # Prepare input texts from files or the single text string
1309
+ all_texts = []
986
1310
  if files:
987
- all_texts = [(file, open(file, 'r', encoding='utf-8').read()) for file in files]
1311
+ from docling import DocumentConverter # Lazy import
1312
+ converter = DocumentConverter()
1313
+ callback("Loading and converting files...", MSG_TYPE.MSG_TYPE_STEP)
1314
+ for i, file_path in enumerate(files):
1315
+ file_p = Path(file_path)
1316
+ callback(f"Processing file {i+1}/{len(files)}: {file_p.name}", MSG_TYPE.MSG_TYPE_STEP_PROGRESS, {"progress":(i+1)/len(files)*100})
1317
+ try:
1318
+ if file_p.exists():
1319
+ file_content_result = converter.convert(file_p)
1320
+ if file_content_result and file_content_result.document:
1321
+ # Exporting to markdown for consistent processing
1322
+ all_texts.append((str(file_path), file_content_result.document.export_to_markdown()))
1323
+ else:
1324
+ ASCIIColors.error(f"Could not convert file: {file_path}")
1325
+ else:
1326
+ ASCIIColors.error(f"File not found: {file_path}")
1327
+ except Exception as e:
1328
+ ASCIIColors.error(f"Error processing file {file_path}: {e}")
1329
+ trace_exception(e)
1330
+ callback("File processing complete.", MSG_TYPE.MSG_TYPE_STEP_END)
1331
+
988
1332
  elif text:
989
1333
  all_texts = [("input_text", text)]
990
- else:
991
- raise ValueError("Either text or files must be provided.")
992
1334
 
993
- # Set default memory template for article analysis if none provided
994
- memory = ""
1335
+ if not all_texts:
1336
+ return "Error: No valid text content found to analyze."
995
1337
 
996
- # Initialize global chunk counter
1338
+ # Initialize memory and counters
1339
+ memory = ""
997
1340
  global_chunk_id = 0
998
-
999
- # Calculate static prompt tokens
1000
- example_prompt = self.update_memory_from_file_chunk_prompt_markdown("example.txt","0", "0", "", "", query)
1001
- static_tokens = len(self.tokenize(example_prompt))
1002
1341
 
1003
- # Process each file separately
1004
- for file_name, file_text in all_texts:
1005
- file_tokens = self.tokenize(file_text)
1342
+ # Define prompts (can be customized)
1343
+ def update_memory_prompt_template(file_name, file_chunk_id, global_chunk_id, chunk, memory, query):
1344
+ system_header = self.system_full_header
1345
+ user_header = self.user_full_header
1346
+ ai_header = self.ai_full_header
1347
+ mem_header = "Initial memory template:" if not memory else "Current findings memory (cumulative):"
1348
+
1349
+ return f"""{system_header}
1350
+ You are a search assistant processing document chunks to find information relevant to a user query. Update the markdown memory with findings from the current chunk.
1351
+
1352
+ ----
1353
+ File: {file_name}
1354
+ Chunk in File: {file_chunk_id}
1355
+ Global Chunk: {global_chunk_id}
1356
+ Text Chunk:
1357
+ ```markdown
1358
+ {chunk}
1359
+ ```
1360
+ {mem_header}
1361
+ ```markdown
1362
+ """+memory or '# Findings\\n## Key Information\\nDetails relevant to the query...\\n## Context\\nSupporting context...'+f"""
1363
+ ```
1364
+ {user_header}
1365
+ Query: '{query}'
1366
+ Task: Update the markdown memory by adding new information from this chunk relevant to the query. Retain prior findings unless contradicted. Only include explicitly relevant details. Return the *entire updated* markdown memory inside a markdown code block.
1367
+ {ai_header}
1368
+ ```markdown
1369
+ """ # Start AI response with code block
1370
+
1371
+ # Estimate static prompt size (approximate)
1372
+ example_prompt = update_memory_prompt_template("f.txt", 0, 0, "<chunk>", "<memory>", query)
1373
+ try:
1374
+ static_tokens = len(self.tokenize(example_prompt)) - len(self.tokenize("<chunk>")) - len(self.tokenize("<memory>"))
1375
+ except RuntimeError as e:
1376
+ ASCIIColors.error(f"Tokenization failed during setup: {e}")
1377
+ return "Error: Could not calculate prompt size."
1378
+
1379
+ # Process each text (from file or input)
1380
+ callback("Starting deep analysis...", MSG_TYPE.MSG_TYPE_STEP_START)
1381
+ for file_path_str, file_text_content in all_texts:
1382
+ file_name = Path(file_path_str).name
1383
+ callback(f"Analyzing: {file_name}", MSG_TYPE.MSG_TYPE_STEP)
1384
+ try:
1385
+ file_tokens = self.tokenize(file_text_content)
1386
+ except RuntimeError as e:
1387
+ ASCIIColors.error(f"Tokenization failed for {file_name}: {e}")
1388
+ continue # Skip this file
1389
+
1006
1390
  start_token_idx = 0
1007
- file_chunk_id = 0 # Reset chunk counter for each file
1391
+ file_chunk_id = 0
1008
1392
 
1009
1393
  while start_token_idx < len(file_tokens):
1010
- # Calculate available tokens
1011
- current_memory_tokens = len(self.tokenize(memory))
1012
- available_tokens = ctx_size - static_tokens - current_memory_tokens
1013
- if available_tokens <= 0:
1014
- raise ValueError("Memory too large - consider reducing chunk size or increasing context window")
1015
-
1016
- # Adjust chunk size
1017
- actual_chunk_size = (
1018
- min(bootstrap_chunk_size, available_tokens)
1019
- if bootstrap_chunk_size is not None and bootstrap_steps is not None and global_chunk_id < bootstrap_steps
1020
- else min(chunk_size, available_tokens)
1021
- )
1022
-
1023
- end_token_idx = min(start_token_idx + actual_chunk_size, len(file_tokens))
1394
+ # Calculate available space dynamically
1395
+ available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024)
1396
+ if available_tokens_for_dynamic_content <= 100:
1397
+ ASCIIColors.error(f"Context window too small during analysis of {file_name}.")
1398
+ # Option: try truncating memory drastically or break
1399
+ break # Stop processing this file if context is too full
1400
+
1401
+ max_memory_tokens = available_tokens_for_dynamic_content // 2
1402
+ max_chunk_tokens = available_tokens_for_dynamic_content - max_memory_tokens
1403
+
1404
+ # Truncate memory if needed
1405
+ current_memory_tokens = self.tokenize(memory)
1406
+ if len(current_memory_tokens) > max_memory_tokens:
1407
+ memory = self.detokenize(current_memory_tokens[-max_memory_tokens:])
1408
+ if debug: ASCIIColors.yellow(f"Memory truncated (File: {file_name}, Chunk: {file_chunk_id})")
1409
+
1410
+ # Determine chunk size
1411
+ current_chunk_size = bootstrap_chunk_size if global_chunk_id < bootstrap_steps else chunk_size
1412
+ current_chunk_size = min(current_chunk_size, max_chunk_tokens)
1413
+
1414
+ end_token_idx = min(start_token_idx + current_chunk_size, len(file_tokens))
1024
1415
  chunk_tokens = file_tokens[start_token_idx:end_token_idx]
1025
1416
  chunk = self.detokenize(chunk_tokens)
1026
1417
 
1418
+ file_chunk_id += 1
1419
+ global_chunk_id += 1
1420
+ callback(f"Processing chunk {file_chunk_id} (Global {global_chunk_id}) of {file_name}", MSG_TYPE.MSG_TYPE_STEP_PROGRESS, {"progress": end_token_idx/len(file_tokens)*100})
1421
+
1027
1422
  # Generate updated memory
1028
- prompt = self.update_memory_from_file_chunk_prompt_markdown(
1029
- file_name=file_name,
1030
- file_chunk_id=file_chunk_id,
1031
- global_chunk_id=global_chunk_id,
1032
- chunk=chunk,
1033
- memory=memory,
1034
- query=query)
1423
+ prompt = update_memory_prompt_template(
1424
+ file_name=file_name,
1425
+ file_chunk_id=file_chunk_id,
1426
+ global_chunk_id=global_chunk_id,
1427
+ chunk=chunk,
1428
+ memory=memory,
1429
+ query=query
1430
+ )
1035
1431
  if debug:
1036
- print(f"----- Chunk {file_chunk_id} (Global {global_chunk_id}) from {file_name} ------")
1037
- print(prompt)
1432
+ ASCIIColors.magenta(f"--- Deep Analysis Prompt (Global Chunk {global_chunk_id}) ---")
1433
+ ASCIIColors.cyan(prompt)
1434
+
1435
+ response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback for streaming output
1038
1436
 
1039
- output = self.generate_text(prompt, n_predict=ctx_size // 4, streaming_callback=callback).strip()
1040
- code = self.extract_code_blocks(output)
1041
- if code:
1042
- memory = code[0]["content"]
1437
+ if isinstance(response, dict): # Handle error
1438
+ ASCIIColors.error(f"Chunk processing failed (Global {global_chunk_id}): {response.get('error')}")
1439
+ start_token_idx = end_token_idx # Skip to next chunk index
1440
+ continue
1441
+
1442
+ memory_code_blocks = self.extract_code_blocks(response, format="markdown")
1443
+ if memory_code_blocks:
1444
+ memory = memory_code_blocks[0]["content"]
1043
1445
  else:
1044
- memory = output
1446
+ # Fallback logic (same as sequential_summarize)
1447
+ end_tag = "```markdown"
1448
+ last_occurrence = response.rfind(end_tag)
1449
+ if last_occurrence != -1:
1450
+ start_tag_len = len("```markdown\n")
1451
+ potential_memory = response[last_occurrence + start_tag_len:].strip()
1452
+ if potential_memory.endswith("```"):
1453
+ potential_memory = potential_memory[:-3].strip()
1454
+ memory = potential_memory if potential_memory else response.strip().rstrip('```')
1455
+ else:
1456
+ memory = response.strip().rstrip('```')
1457
+ ASCIIColors.warning(f"Could not extract memory block for chunk {global_chunk_id}. Using raw response.")
1458
+
1045
1459
 
1046
1460
  if debug:
1047
- ASCIIColors.red("----- Updated Memory ------")
1048
- ASCIIColors.white(memory)
1049
- ASCIIColors.red("---------------------------")
1461
+ ASCIIColors.magenta(f"--- Updated Memory (After Global Chunk {global_chunk_id}) ---")
1462
+ ASCIIColors.green(memory)
1463
+ ASCIIColors.magenta("-----------------------------------")
1050
1464
 
1051
- start_token_idx = end_token_idx
1052
- file_chunk_id += 1
1053
- global_chunk_id += 1
1465
+ # Move to next chunk start index with overlap
1466
+ start_token_idx = max(start_token_idx, end_token_idx - overlap) if overlap > 0 and end_token_idx < len(file_tokens) else end_token_idx
1467
+
1468
+ callback(f"Finished analyzing: {file_name}", MSG_TYPE.MSG_TYPE_STEP_END)
1054
1469
 
1055
- # Aggregate findings if requested
1056
- if aggregation_prompt:
1057
- final_prompt = f"""{self.system_full_header}
1470
+
1471
+ # --- Final Aggregation ---
1472
+ callback("Aggregating final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
1473
+ final_prompt = f"""{self.system_full_header}
1058
1474
  You are a search results aggregator.
1059
1475
  {self.user_full_header}
1060
1476
  {aggregation_prompt}
1061
- Collected findings (across all files):
1477
+ Collected findings (across all sources):
1062
1478
  ```markdown
1063
1479
  {memory}
1064
1480
  ```
1065
- Provide the final output in {output_format} format.
1481
+ Provide the final aggregated answer in {output_format} format, directly addressing the original query: '{query}'. The final answer must be put inside a {output_format} markdown tag.
1066
1482
  {self.ai_full_header}
1067
- """
1068
- final_output = self.generate_text(final_prompt, streaming_callback=callback)
1069
- code = self.extract_code_blocks(final_output)
1070
- return code[0]["content"] if code else final_output
1071
- return memory
1483
+ ```{output_format}
1484
+ """ # Start AI response
1072
1485
 
1073
- def error(self, content, duration:int=4, client_id=None, verbose:bool=True):
1074
- ASCIIColors.error(content)
1486
+ # Truncate memory if needed for final prompt (similar logic to sequential_summarize)
1487
+ final_example_prompt = final_prompt.replace("{memory}", "<final_memory>")
1488
+ try:
1489
+ final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
1490
+ available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024)
1491
+ except RuntimeError as e:
1492
+ ASCIIColors.error(f"Tokenization failed during final setup: {e}")
1493
+ return "Error: Could not calculate final prompt size."
1075
1494
 
1495
+ memory_tokens = self.tokenize(memory)
1496
+ if len(memory_tokens) > available_final_tokens:
1497
+ memory = self.detokenize(memory_tokens[-available_final_tokens:])
1498
+ if debug: ASCIIColors.yellow(f"Final memory truncated for aggregation.")
1076
1499
 
1500
+ final_prompt = final_prompt.format(memory=memory) # Format with potentially truncated memory
1077
1501
 
1078
- if __name__=="__main__":
1079
- lc = LollmsClient("ollama", model_name="mistral-nemo:latest")
1080
- #lc = LollmsClient("http://localhost:11434", model_name="mistral-nemo:latest", default_generation_mode=ELF_GENERATION_FORMAT.OLLAMA)
1081
- #lc = LollmsClient(model_name="gpt-3.5-turbo-0125", default_generation_mode=ELF_GENERATION_FORMAT.OPENAI)
1082
- print(lc.listModels())
1083
- code = lc.generate_code("Build a simple json that containes name and age. put the output inside a json markdown tag")
1084
- print(code)
1502
+ if debug:
1503
+ ASCIIColors.magenta("--- Final Aggregation Prompt ---")
1504
+ ASCIIColors.cyan(final_prompt)
1085
1505
 
1086
- code ="""<thinking>
1087
- Hello world thinking!
1088
- How you doing?
1089
-
1090
- </thinking>
1091
- This is no thinking
1506
+ final_output_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback
1092
1507
 
1093
- <think>
1094
- Hello world think!
1095
- How you doing?
1096
-
1097
- </think>
1508
+ if isinstance(final_output_raw, dict):
1509
+ ASCIIColors.error(f"Final aggregation failed: {final_output_raw.get('error')}")
1510
+ callback("Aggregation failed.", MSG_TYPE.MSG_TYPE_STEP_END, {'status':False})
1511
+ return "Error: Final aggregation failed."
1098
1512
 
1099
- """
1100
- print(lc.extract_thinking_blocks(code))
1101
- print(lc.remove_thinking_blocks(code))
1513
+ final_code_blocks = self.extract_code_blocks(final_output_raw, format=output_format)
1514
+ if final_code_blocks:
1515
+ final_output = final_code_blocks[0]["content"]
1516
+ else:
1517
+ # Fallback logic
1518
+ end_tag = f"```{output_format}"
1519
+ last_occurrence = final_output_raw.rfind(end_tag)
1520
+ if last_occurrence != -1:
1521
+ start_tag_len = len(f"```{output_format}\n")
1522
+ potential_output = final_output_raw[last_occurrence + start_tag_len:].strip()
1523
+ if potential_output.endswith("```"):
1524
+ potential_output = potential_output[:-3].strip()
1525
+ final_output = potential_output if potential_output else final_output_raw.strip().rstrip('```')
1526
+ else:
1527
+ final_output = final_output_raw.strip().rstrip('```')
1528
+ ASCIIColors.warning("Could not extract final output block. Using raw response.")
1529
+
1530
+
1531
+ if debug:
1532
+ ASCIIColors.magenta("--- Final Aggregated Output ---")
1533
+ ASCIIColors.green(final_output)
1534
+ ASCIIColors.magenta("-----------------------------")
1535
+
1536
+ callback("Deep analysis complete.", MSG_TYPE.MSG_TYPE_STEP_END)
1537
+ return final_output
1538
+
1539
+ def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):
1540
+ """
1541
+ Chunks text based on token count.
1542
+
1543
+ Args:
1544
+ text (str): The text to chunk.
1545
+ tokenizer (callable): Function to tokenize text.
1546
+ detokenizer (callable): Function to detokenize tokens.
1547
+ chunk_size (int): The desired number of tokens per chunk.
1548
+ overlap (int): The number of tokens to overlap between chunks.
1549
+ use_separators (bool): If True, tries to chunk at natural separators (paragraphs, sentences).
1550
+
1551
+ Returns:
1552
+ List[str]: A list of text chunks.
1553
+ """
1554
+ tokens = tokenizer(text)
1555
+ chunks = []
1556
+ start_idx = 0
1557
+
1558
+ if not use_separators:
1559
+ while start_idx < len(tokens):
1560
+ end_idx = min(start_idx + chunk_size, len(tokens))
1561
+ chunks.append(detokenizer(tokens[start_idx:end_idx]))
1562
+ start_idx += chunk_size - overlap
1563
+ if start_idx >= len(tokens): # Ensure last chunk is added correctly
1564
+ break
1565
+ start_idx = max(0, start_idx) # Prevent negative index
1566
+ else:
1567
+ # Find potential separator positions (more robust implementation needed)
1568
+ # This is a basic example using paragraphs first, then sentences.
1569
+ import re
1570
+ separators = ["\n\n", "\n", ". ", "? ", "! "] # Order matters
1571
+
1572
+ current_pos = 0
1573
+ while current_pos < len(text):
1574
+ # Determine target end position based on tokens
1575
+ target_end_token = min(start_idx + chunk_size, len(tokens))
1576
+ target_end_char_approx = len(detokenizer(tokens[:target_end_token])) # Approximate char position
1577
+
1578
+ best_sep_pos = -1
1579
+ # Try finding a good separator near the target end
1580
+ for sep in separators:
1581
+ # Search backwards from the approximate character position
1582
+ search_start = max(current_pos, target_end_char_approx - chunk_size // 2) # Search in a reasonable window
1583
+ sep_pos = text.rfind(sep, search_start, target_end_char_approx + len(sep))
1584
+ if sep_pos > current_pos: # Found a separator after the current start
1585
+ best_sep_pos = max(best_sep_pos, sep_pos + len(sep)) # Take the latest separator found
1586
+
1587
+ # If no good separator found, just cut at token limit
1588
+ if best_sep_pos == -1 or best_sep_pos <= current_pos:
1589
+ end_idx = target_end_token
1590
+ end_char = len(detokenizer(tokens[:end_idx])) if end_idx < len(tokens) else len(text)
1591
+ else:
1592
+ end_char = best_sep_pos
1593
+ end_idx = len(tokenizer(text[:end_char])) # Re-tokenize to find token index
1594
+
1595
+
1596
+ chunk_text_str = text[current_pos:end_char]
1597
+ chunks.append(chunk_text_str)
1598
+
1599
+ # Move to next chunk start, considering overlap in characters
1600
+ overlap_char_approx = len(detokenizer(tokens[:overlap])) # Approx overlap chars
1601
+ next_start_char = max(current_pos, end_char - overlap_char_approx)
1602
+
1603
+ # Try to align next start with a separator too for cleaner breaks
1604
+ best_next_start_sep = next_start_char
1605
+ for sep in separators:
1606
+ sep_pos = text.find(sep, next_start_char)
1607
+ if sep_pos != -1:
1608
+ best_next_start_sep = min(best_next_start_sep, sep_pos+len(sep)) if best_next_start_sep!=next_start_char else sep_pos+len(sep) # Find earliest separator after overlap point
1609
+
1610
+ current_pos = best_next_start_sep if best_next_start_sep > next_start_char else next_start_char
1611
+ start_idx = len(tokenizer(text[:current_pos])) # Update token index for next iteration
1612
+
1613
+
1614
+ if current_pos >= len(text):
1615
+ break
1616
+
1617
+ return chunks