lollms-client 0.11.0__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/lollms/__init__.py +1 -0
- lollms_client/llm_bindings/ollama/__init__.py +2 -1
- lollms_client/llm_bindings/openai/__init__.py +2 -1
- lollms_client/llm_bindings/transformers/__init__.py +1 -0
- lollms_client/lollms_core.py +1133 -617
- lollms_client/lollms_llm_binding.py +2 -0
- lollms_client/lollms_stt_binding.py +137 -0
- lollms_client/lollms_tasks.py +1 -2
- lollms_client/lollms_tti_binding.py +175 -0
- lollms_client/lollms_ttm_binding.py +135 -0
- lollms_client/lollms_tts_binding.py +138 -0
- lollms_client/lollms_ttv_binding.py +135 -0
- lollms_client/stt_bindings/lollms/__init__.py +138 -0
- lollms_client/tti_bindings/lollms/__init__.py +210 -0
- lollms_client/ttm_bindings/__init__.py +0 -0
- lollms_client/ttm_bindings/lollms/__init__.py +73 -0
- lollms_client/tts_bindings/lollms/__init__.py +145 -0
- lollms_client/ttv_bindings/__init__.py +73 -0
- {lollms_client-0.11.0.dist-info → lollms_client-0.12.1.dist-info}/METADATA +1 -1
- lollms_client-0.12.1.dist-info/RECORD +41 -0
- lollms_client-0.11.0.dist-info/RECORD +0 -34
- {lollms_client-0.11.0.dist-info → lollms_client-0.12.1.dist-info}/WHEEL +0 -0
- {lollms_client-0.11.0.dist-info → lollms_client-0.12.1.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.11.0.dist-info → lollms_client-0.12.1.dist-info}/top_level.txt +0 -0
lollms_client/lollms_core.py
CHANGED
|
@@ -1,30 +1,56 @@
|
|
|
1
|
+
# lollms_client/lollms_core.py
|
|
1
2
|
import requests
|
|
2
3
|
from ascii_colors import ASCIIColors, trace_exception
|
|
3
4
|
from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
|
|
4
|
-
from lollms_client.lollms_utilities import encode_image
|
|
5
|
-
from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
|
|
5
|
+
from lollms_client.lollms_utilities import encode_image # Keep utilities needed by core
|
|
6
|
+
from lollms_client.lollms_llm_binding import LollmsLLMBinding, LollmsLLMBindingManager
|
|
7
|
+
# Import new Abstract Base Classes and Managers
|
|
8
|
+
from lollms_client.lollms_tts_binding import LollmsTTSBinding, LollmsTTSBindingManager
|
|
9
|
+
from lollms_client.lollms_tti_binding import LollmsTTIBinding, LollmsTTIBindingManager
|
|
10
|
+
from lollms_client.lollms_stt_binding import LollmsSTTBinding, LollmsSTTBindingManager
|
|
11
|
+
from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingManager
|
|
12
|
+
from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
|
|
13
|
+
|
|
6
14
|
import json
|
|
7
15
|
from enum import Enum
|
|
8
16
|
import base64
|
|
9
17
|
import requests
|
|
10
|
-
import pipmaster as pm
|
|
11
18
|
from typing import List, Optional, Callable, Union, Dict
|
|
12
19
|
import numpy as np
|
|
13
|
-
import pipmaster as pm
|
|
14
20
|
from pathlib import Path
|
|
15
21
|
import os
|
|
16
|
-
|
|
22
|
+
|
|
17
23
|
class LollmsClient():
|
|
18
|
-
"""
|
|
19
|
-
|
|
24
|
+
"""
|
|
25
|
+
Core client class for interacting with LOLLMS services, including LLM, TTS, TTI, STT, TTV, and TTM.
|
|
26
|
+
Provides a unified interface to manage and use different bindings for various modalities.
|
|
27
|
+
"""
|
|
28
|
+
def __init__(self,
|
|
29
|
+
# LLM Binding Parameters
|
|
20
30
|
binding_name: str = "lollms",
|
|
21
|
-
host_address: Optional[str] = None,
|
|
31
|
+
host_address: Optional[str] = None, # Shared host address default for all bindings if not specified
|
|
22
32
|
model_name: str = "",
|
|
23
|
-
service_key: Optional[str] = None,
|
|
24
|
-
verify_ssl_certificate: bool = True,
|
|
25
|
-
personality: Optional[int] = None,
|
|
26
33
|
llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
|
|
27
|
-
|
|
34
|
+
llm_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
|
|
35
|
+
personality: Optional[int] = None, # Specific to LLM lollms binding
|
|
36
|
+
|
|
37
|
+
# Optional Modality Binding Names
|
|
38
|
+
tts_binding_name: Optional[str] = None,
|
|
39
|
+
tti_binding_name: Optional[str] = None,
|
|
40
|
+
stt_binding_name: Optional[str] = None,
|
|
41
|
+
ttv_binding_name: Optional[str] = None,
|
|
42
|
+
ttm_binding_name: Optional[str] = None,
|
|
43
|
+
|
|
44
|
+
# Modality Binding Directories
|
|
45
|
+
tts_bindings_dir: Path = Path(__file__).parent / "tts_bindings",
|
|
46
|
+
tti_bindings_dir: Path = Path(__file__).parent / "tti_bindings",
|
|
47
|
+
stt_bindings_dir: Path = Path(__file__).parent / "stt_bindings",
|
|
48
|
+
ttv_bindings_dir: Path = Path(__file__).parent / "ttv_bindings",
|
|
49
|
+
ttm_bindings_dir: Path = Path(__file__).parent / "ttm_bindings",
|
|
50
|
+
|
|
51
|
+
# General Parameters (mostly defaults for LLM generation)
|
|
52
|
+
service_key: Optional[str] = None, # Shared service key/client_id
|
|
53
|
+
verify_ssl_certificate: bool = True,
|
|
28
54
|
ctx_size: Optional[int] = 8192,
|
|
29
55
|
n_predict: Optional[int] = 4096,
|
|
30
56
|
stream: bool = False,
|
|
@@ -39,37 +65,135 @@ class LollmsClient():
|
|
|
39
65
|
user_name ="user",
|
|
40
66
|
ai_name = "assistant"):
|
|
41
67
|
"""
|
|
42
|
-
Initialize the
|
|
68
|
+
Initialize the LollmsClient with LLM and optional modality bindings.
|
|
43
69
|
|
|
44
70
|
Args:
|
|
45
|
-
binding_name (str): Name of the
|
|
46
|
-
host_address (Optional[str]):
|
|
47
|
-
model_name (str):
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
personality (Optional[int]): Personality ID (used
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
71
|
+
binding_name (str): Name of the primary LLM binding (e.g., "lollms", "ollama").
|
|
72
|
+
host_address (Optional[str]): Default host address for all services. Overridden by binding defaults if None.
|
|
73
|
+
model_name (str): Default model name for the LLM binding.
|
|
74
|
+
llm_bindings_dir (Path): Directory for LLM binding implementations.
|
|
75
|
+
llm_binding_config (Optional[Dict]): Additional config for the LLM binding.
|
|
76
|
+
personality (Optional[int]): Personality ID (used by LLM 'lollms' binding).
|
|
77
|
+
tts_binding_name (Optional[str]): Name of the TTS binding to use (e.g., "lollms").
|
|
78
|
+
tti_binding_name (Optional[str]): Name of the TTI binding to use (e.g., "lollms").
|
|
79
|
+
stt_binding_name (Optional[str]): Name of the STT binding to use (e.g., "lollms").
|
|
80
|
+
ttv_binding_name (Optional[str]): Name of the TTV binding to use (e.g., "lollms").
|
|
81
|
+
ttm_binding_name (Optional[str]): Name of the TTM binding to use (e.g., "lollms").
|
|
82
|
+
tts_bindings_dir (Path): Directory for TTS bindings.
|
|
83
|
+
tti_bindings_dir (Path): Directory for TTI bindings.
|
|
84
|
+
stt_bindings_dir (Path): Directory for STT bindings.
|
|
85
|
+
ttv_bindings_dir (Path): Directory for TTV bindings.
|
|
86
|
+
ttm_bindings_dir (Path): Directory for TTM bindings.
|
|
87
|
+
service_key (Optional[str]): Shared authentication key or client_id.
|
|
88
|
+
verify_ssl_certificate (bool): Whether to verify SSL certificates.
|
|
89
|
+
ctx_size (Optional[int]): Default context size for LLM.
|
|
90
|
+
n_predict (Optional[int]): Default max tokens for LLM.
|
|
91
|
+
stream (bool): Default streaming mode for LLM.
|
|
92
|
+
temperature (float): Default temperature for LLM.
|
|
93
|
+
top_k (int): Default top_k for LLM.
|
|
94
|
+
top_p (float): Default top_p for LLM.
|
|
95
|
+
repeat_penalty (float): Default repeat penalty for LLM.
|
|
96
|
+
repeat_last_n (int): Default repeat last n for LLM.
|
|
97
|
+
seed (Optional[int]): Default seed for LLM.
|
|
98
|
+
n_threads (int): Default threads for LLM.
|
|
99
|
+
streaming_callback (Optional[Callable]): Default streaming callback for LLM.
|
|
100
|
+
user_name (str): Default user name for prompts.
|
|
101
|
+
ai_name (str): Default AI name for prompts.
|
|
65
102
|
|
|
66
103
|
Raises:
|
|
67
|
-
ValueError: If the
|
|
104
|
+
ValueError: If the primary LLM binding cannot be created.
|
|
68
105
|
"""
|
|
106
|
+
self.host_address = host_address # Store initial preference
|
|
107
|
+
self.service_key = service_key
|
|
108
|
+
self.verify_ssl_certificate = verify_ssl_certificate
|
|
109
|
+
|
|
110
|
+
# --- LLM Binding Setup ---
|
|
69
111
|
self.binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
|
|
70
|
-
self.
|
|
71
|
-
|
|
72
|
-
|
|
112
|
+
self.binding = self.binding_manager.create_binding(
|
|
113
|
+
binding_name=binding_name,
|
|
114
|
+
host_address=host_address, # Pass initial host preference
|
|
115
|
+
model_name=model_name,
|
|
116
|
+
service_key=service_key,
|
|
117
|
+
verify_ssl_certificate=verify_ssl_certificate,
|
|
118
|
+
personality=personality,
|
|
119
|
+
# Pass LLM specific config if needed
|
|
120
|
+
**(llm_binding_config or {})
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
if self.binding is None:
|
|
124
|
+
available = self.binding_manager.get_available_bindings()
|
|
125
|
+
raise ValueError(f"Failed to create LLM binding: {binding_name}. Available: {available}")
|
|
126
|
+
|
|
127
|
+
# Determine the effective host address (use LLM binding's if initial was None)
|
|
128
|
+
effective_host_address = self.host_address
|
|
129
|
+
if effective_host_address is None and self.binding:
|
|
130
|
+
effective_host_address = self.binding.host_address
|
|
131
|
+
|
|
132
|
+
# --- Modality Binding Setup ---
|
|
133
|
+
self.tts_binding_manager = LollmsTTSBindingManager(tts_bindings_dir)
|
|
134
|
+
self.tti_binding_manager = LollmsTTIBindingManager(tti_bindings_dir)
|
|
135
|
+
self.stt_binding_manager = LollmsSTTBindingManager(stt_bindings_dir)
|
|
136
|
+
self.ttv_binding_manager = LollmsTTVBindingManager(ttv_bindings_dir)
|
|
137
|
+
self.ttm_binding_manager = LollmsTTMBindingManager(ttm_bindings_dir)
|
|
138
|
+
|
|
139
|
+
self.tts: Optional[LollmsTTSBinding] = None
|
|
140
|
+
self.tti: Optional[LollmsTTIBinding] = None
|
|
141
|
+
self.stt: Optional[LollmsSTTBinding] = None
|
|
142
|
+
self.ttv: Optional[LollmsTTVBinding] = None
|
|
143
|
+
self.ttm: Optional[LollmsTTMBinding] = None
|
|
144
|
+
|
|
145
|
+
if tts_binding_name:
|
|
146
|
+
self.tts = self.tts_binding_manager.create_binding(
|
|
147
|
+
binding_name=tts_binding_name,
|
|
148
|
+
host_address=effective_host_address,
|
|
149
|
+
service_key=self.service_key,
|
|
150
|
+
verify_ssl_certificate=self.verify_ssl_certificate
|
|
151
|
+
)
|
|
152
|
+
if self.tts is None:
|
|
153
|
+
ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
|
|
154
|
+
|
|
155
|
+
if tti_binding_name:
|
|
156
|
+
self.tti = self.tti_binding_manager.create_binding(
|
|
157
|
+
binding_name=tti_binding_name,
|
|
158
|
+
host_address=effective_host_address,
|
|
159
|
+
service_key=self.service_key, # Passed as service_key, used as client_id by lollms TTI binding
|
|
160
|
+
verify_ssl_certificate=self.verify_ssl_certificate
|
|
161
|
+
)
|
|
162
|
+
if self.tti is None:
|
|
163
|
+
ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
|
|
164
|
+
|
|
165
|
+
if stt_binding_name:
|
|
166
|
+
self.stt = self.stt_binding_manager.create_binding(
|
|
167
|
+
binding_name=stt_binding_name,
|
|
168
|
+
host_address=effective_host_address,
|
|
169
|
+
service_key=self.service_key,
|
|
170
|
+
verify_ssl_certificate=self.verify_ssl_certificate
|
|
171
|
+
)
|
|
172
|
+
if self.stt is None:
|
|
173
|
+
ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
|
|
174
|
+
|
|
175
|
+
if ttv_binding_name:
|
|
176
|
+
self.ttv = self.ttv_binding_manager.create_binding(
|
|
177
|
+
binding_name=ttv_binding_name,
|
|
178
|
+
host_address=effective_host_address,
|
|
179
|
+
service_key=self.service_key,
|
|
180
|
+
verify_ssl_certificate=self.verify_ssl_certificate
|
|
181
|
+
)
|
|
182
|
+
if self.ttv is None:
|
|
183
|
+
ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
|
|
184
|
+
|
|
185
|
+
if ttm_binding_name:
|
|
186
|
+
self.ttm = self.ttm_binding_manager.create_binding(
|
|
187
|
+
binding_name=ttm_binding_name,
|
|
188
|
+
host_address=effective_host_address,
|
|
189
|
+
service_key=self.service_key,
|
|
190
|
+
verify_ssl_certificate=self.verify_ssl_certificate
|
|
191
|
+
)
|
|
192
|
+
if self.ttm is None:
|
|
193
|
+
ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# --- Store Default Generation Parameters ---
|
|
73
197
|
self.default_ctx_size = ctx_size
|
|
74
198
|
self.default_n_predict = n_predict
|
|
75
199
|
self.default_stream = stream
|
|
@@ -81,29 +205,10 @@ class LollmsClient():
|
|
|
81
205
|
self.default_seed = seed
|
|
82
206
|
self.default_n_threads = n_threads
|
|
83
207
|
self.default_streaming_callback = streaming_callback
|
|
84
|
-
|
|
85
|
-
#
|
|
86
|
-
self.binding = self.binding_manager.create_binding(
|
|
87
|
-
binding_name=binding_name,
|
|
88
|
-
host_address=host_address,
|
|
89
|
-
model_name=model_name,
|
|
90
|
-
service_key=service_key,
|
|
91
|
-
verify_ssl_certificate=verify_ssl_certificate,
|
|
92
|
-
personality=personality
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
if self.binding is None:
|
|
96
|
-
raise ValueError(f"Failed to create binding: {binding_name}. Available bindings: {self.binding_manager.get_available_bindings()}")
|
|
97
|
-
|
|
98
|
-
# Apply additional configuration if provided
|
|
99
|
-
if binding_config:
|
|
100
|
-
for key, value in binding_config.items():
|
|
101
|
-
setattr(self.binding, key, value)
|
|
208
|
+
|
|
209
|
+
# --- Prompt Formatting Attributes ---
|
|
102
210
|
self.user_name = user_name
|
|
103
211
|
self.ai_name = ai_name
|
|
104
|
-
self.service_key = service_key
|
|
105
|
-
|
|
106
|
-
self.verify_ssl_certificate = verify_ssl_certificate
|
|
107
212
|
self.start_header_id_template ="!@>"
|
|
108
213
|
self.end_header_id_template =": "
|
|
109
214
|
self.system_message_template ="system"
|
|
@@ -116,24 +221,25 @@ class LollmsClient():
|
|
|
116
221
|
self.end_ai_message_id_template =""
|
|
117
222
|
|
|
118
223
|
|
|
224
|
+
# --- Prompt Formatting Properties ---
|
|
119
225
|
@property
|
|
120
226
|
def system_full_header(self) -> str:
|
|
121
227
|
"""Get the start_header_id_template."""
|
|
122
228
|
return f"{self.start_header_id_template}{self.system_message_template}{self.end_header_id_template}"
|
|
123
|
-
|
|
229
|
+
|
|
124
230
|
def system_custom_header(self, ai_name) -> str:
|
|
125
231
|
"""Get the start_header_id_template."""
|
|
126
232
|
return f"{self.start_header_id_template}{ai_name}{self.end_header_id_template}"
|
|
127
|
-
|
|
233
|
+
|
|
128
234
|
@property
|
|
129
235
|
def user_full_header(self) -> str:
|
|
130
236
|
"""Get the start_header_id_template."""
|
|
131
237
|
return f"{self.start_user_header_id_template}{self.user_name}{self.end_user_header_id_template}"
|
|
132
|
-
|
|
238
|
+
|
|
133
239
|
def user_custom_header(self, user_name="user") -> str:
|
|
134
240
|
"""Get the start_header_id_template."""
|
|
135
241
|
return f"{self.start_user_header_id_template}{user_name}{self.end_user_header_id_template}"
|
|
136
|
-
|
|
242
|
+
|
|
137
243
|
@property
|
|
138
244
|
def ai_full_header(self) -> str:
|
|
139
245
|
"""Get the start_header_id_template."""
|
|
@@ -144,10 +250,13 @@ class LollmsClient():
|
|
|
144
250
|
return f"{self.start_ai_header_id_template}{ai_name}{self.end_ai_header_id_template}"
|
|
145
251
|
|
|
146
252
|
def sink(self, s=None,i=None,d=None):
|
|
253
|
+
"""Placeholder sink method."""
|
|
147
254
|
pass
|
|
255
|
+
|
|
256
|
+
# --- Core LLM Binding Methods ---
|
|
148
257
|
def tokenize(self, text: str) -> list:
|
|
149
258
|
"""
|
|
150
|
-
Tokenize text using the active binding.
|
|
259
|
+
Tokenize text using the active LLM binding.
|
|
151
260
|
|
|
152
261
|
Args:
|
|
153
262
|
text (str): The text to tokenize.
|
|
@@ -155,11 +264,13 @@ class LollmsClient():
|
|
|
155
264
|
Returns:
|
|
156
265
|
list: List of tokens.
|
|
157
266
|
"""
|
|
158
|
-
|
|
159
|
-
|
|
267
|
+
if self.binding:
|
|
268
|
+
return self.binding.tokenize(text)
|
|
269
|
+
raise RuntimeError("LLM binding not initialized.")
|
|
270
|
+
|
|
160
271
|
def detokenize(self, tokens: list) -> str:
|
|
161
272
|
"""
|
|
162
|
-
Detokenize tokens using the active binding.
|
|
273
|
+
Detokenize tokens using the active LLM binding.
|
|
163
274
|
|
|
164
275
|
Args:
|
|
165
276
|
tokens (list): List of tokens to detokenize.
|
|
@@ -167,20 +278,24 @@ class LollmsClient():
|
|
|
167
278
|
Returns:
|
|
168
279
|
str: Detokenized text.
|
|
169
280
|
"""
|
|
170
|
-
|
|
171
|
-
|
|
281
|
+
if self.binding:
|
|
282
|
+
return self.binding.detokenize(tokens)
|
|
283
|
+
raise RuntimeError("LLM binding not initialized.")
|
|
284
|
+
|
|
172
285
|
def get_model_details(self) -> dict:
|
|
173
286
|
"""
|
|
174
|
-
Get model information from the active binding.
|
|
287
|
+
Get model information from the active LLM binding.
|
|
175
288
|
|
|
176
289
|
Returns:
|
|
177
290
|
dict: Model information dictionary.
|
|
178
291
|
"""
|
|
179
|
-
|
|
180
|
-
|
|
292
|
+
if self.binding:
|
|
293
|
+
return self.binding.get_model_info()
|
|
294
|
+
raise RuntimeError("LLM binding not initialized.")
|
|
295
|
+
|
|
181
296
|
def switch_model(self, model_name: str) -> bool:
|
|
182
297
|
"""
|
|
183
|
-
Load a new model in the active binding.
|
|
298
|
+
Load a new model in the active LLM binding.
|
|
184
299
|
|
|
185
300
|
Args:
|
|
186
301
|
model_name (str): Name of the model to load.
|
|
@@ -188,18 +303,20 @@ class LollmsClient():
|
|
|
188
303
|
Returns:
|
|
189
304
|
bool: True if model loaded successfully, False otherwise.
|
|
190
305
|
"""
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
306
|
+
if self.binding:
|
|
307
|
+
return self.binding.load_model(model_name)
|
|
308
|
+
raise RuntimeError("LLM binding not initialized.")
|
|
309
|
+
|
|
310
|
+
def get_available_llm_bindings(self) -> List[str]: # Renamed for clarity
|
|
194
311
|
"""
|
|
195
|
-
Get list of available
|
|
312
|
+
Get list of available LLM binding names.
|
|
196
313
|
|
|
197
314
|
Returns:
|
|
198
|
-
List[str]: List of binding names that can be used.
|
|
315
|
+
List[str]: List of binding names that can be used for LLMs.
|
|
199
316
|
"""
|
|
200
317
|
return self.binding_manager.get_available_bindings()
|
|
201
|
-
|
|
202
|
-
def generate_text(self,
|
|
318
|
+
|
|
319
|
+
def generate_text(self,
|
|
203
320
|
prompt: str,
|
|
204
321
|
images: Optional[List[str]] = None,
|
|
205
322
|
n_predict: Optional[int] = None,
|
|
@@ -212,9 +329,9 @@ class LollmsClient():
|
|
|
212
329
|
seed: Optional[int] = None,
|
|
213
330
|
n_threads: Optional[int] = None,
|
|
214
331
|
ctx_size: int | None = None,
|
|
215
|
-
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> str:
|
|
332
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> Union[str, dict]:
|
|
216
333
|
"""
|
|
217
|
-
Generate text using the active binding, using instance defaults if parameters are not provided.
|
|
334
|
+
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
218
335
|
|
|
219
336
|
Args:
|
|
220
337
|
prompt (str): The input prompt for text generation.
|
|
@@ -228,59 +345,91 @@ class LollmsClient():
|
|
|
228
345
|
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
229
346
|
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
230
347
|
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
348
|
+
ctx_size (int | None): Context size override for this generation.
|
|
231
349
|
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
232
|
-
Uses instance default if None.
|
|
233
|
-
- First parameter (str): The chunk of text received from the stream.
|
|
234
|
-
- Second parameter (MSG_TYPE): The message type enum (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
235
350
|
|
|
236
351
|
Returns:
|
|
237
352
|
Union[str, dict]: Generated text or error dictionary if failed.
|
|
238
353
|
"""
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
354
|
+
if self.binding:
|
|
355
|
+
return self.binding.generate_text(
|
|
356
|
+
prompt=prompt,
|
|
357
|
+
images=images,
|
|
358
|
+
n_predict=n_predict if n_predict is not None else self.default_n_predict,
|
|
359
|
+
stream=stream if stream is not None else self.default_stream,
|
|
360
|
+
temperature=temperature if temperature is not None else self.default_temperature,
|
|
361
|
+
top_k=top_k if top_k is not None else self.default_top_k,
|
|
362
|
+
top_p=top_p if top_p is not None else self.default_top_p,
|
|
363
|
+
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
|
|
364
|
+
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
|
|
365
|
+
seed=seed if seed is not None else self.default_seed,
|
|
366
|
+
n_threads=n_threads if n_threads is not None else self.default_n_threads,
|
|
367
|
+
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
|
|
368
|
+
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
|
|
369
|
+
)
|
|
370
|
+
raise RuntimeError("LLM binding not initialized.")
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def embed(self, text, **kwargs):
|
|
374
|
+
"""
|
|
375
|
+
Generate embeddings for the input text using the active LLM binding.
|
|
254
376
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
377
|
+
Args:
|
|
378
|
+
text (str or List[str]): Input text to embed.
|
|
379
|
+
**kwargs: Additional arguments specific to the binding's embed method.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
list: List of embeddings.
|
|
383
|
+
"""
|
|
384
|
+
if self.binding:
|
|
385
|
+
return self.binding.embed(text, **kwargs)
|
|
386
|
+
raise RuntimeError("LLM binding not initialized.")
|
|
258
387
|
|
|
259
388
|
|
|
260
389
|
def listModels(self):
|
|
261
|
-
|
|
390
|
+
"""Lists models available to the current LLM binding."""
|
|
391
|
+
if self.binding:
|
|
392
|
+
return self.binding.listModels()
|
|
393
|
+
raise RuntimeError("LLM binding not initialized.")
|
|
262
394
|
|
|
395
|
+
# --- Convenience Methods for Lollms LLM Binding Features ---
|
|
396
|
+
def listMountedPersonalities(self) -> Union[List[Dict], Dict]:
|
|
397
|
+
"""
|
|
398
|
+
Lists mounted personalities *if* the active LLM binding is 'lollms'.
|
|
263
399
|
|
|
400
|
+
Returns:
|
|
401
|
+
Union[List[Dict], Dict]: List of personality dicts or error dict.
|
|
402
|
+
"""
|
|
403
|
+
if self.binding and hasattr(self.binding, 'lollms_listMountedPersonalities'):
|
|
404
|
+
return self.binding.lollms_listMountedPersonalities()
|
|
405
|
+
else:
|
|
406
|
+
ASCIIColors.warning("listMountedPersonalities is only available for the 'lollms' LLM binding.")
|
|
407
|
+
return {"status": False, "error": "Functionality not available for the current binding"}
|
|
264
408
|
|
|
409
|
+
# --- Code Generation / Extraction Helpers (These might be moved to TasksLibrary later) ---
|
|
265
410
|
def generate_codes(
|
|
266
|
-
self,
|
|
267
|
-
prompt,
|
|
268
|
-
images=[],
|
|
411
|
+
self,
|
|
412
|
+
prompt,
|
|
413
|
+
images=[],
|
|
269
414
|
template=None,
|
|
270
415
|
language="json",
|
|
271
416
|
code_tag_format="markdown", # or "html"
|
|
272
|
-
max_size = None,
|
|
273
|
-
temperature = None,
|
|
274
|
-
top_k = None,
|
|
275
|
-
top_p=None,
|
|
276
|
-
repeat_penalty=None,
|
|
277
|
-
repeat_last_n=None,
|
|
278
|
-
callback=None,
|
|
279
|
-
debug=False
|
|
417
|
+
max_size = None,
|
|
418
|
+
temperature = None,
|
|
419
|
+
top_k = None,
|
|
420
|
+
top_p=None,
|
|
421
|
+
repeat_penalty=None,
|
|
422
|
+
repeat_last_n=None,
|
|
423
|
+
callback=None,
|
|
424
|
+
debug=False
|
|
280
425
|
):
|
|
426
|
+
"""
|
|
427
|
+
Generates multiple code blocks based on a prompt.
|
|
428
|
+
Uses the underlying LLM binding via `generate_text`.
|
|
429
|
+
"""
|
|
281
430
|
response_full = ""
|
|
282
|
-
full_prompt = f"""{self.system_full_header}Act as a code generation assistant that generates code from user prompt.
|
|
283
|
-
{self.user_full_header}
|
|
431
|
+
full_prompt = f"""{self.system_full_header}Act as a code generation assistant that generates code from user prompt.
|
|
432
|
+
{self.user_full_header}
|
|
284
433
|
{prompt}
|
|
285
434
|
"""
|
|
286
435
|
if template:
|
|
@@ -306,24 +455,35 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
306
455
|
full_prompt += f"""Do not split the code in multiple tags.
|
|
307
456
|
{self.ai_full_header}"""
|
|
308
457
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
458
|
+
# Use generate_text which handles images internally
|
|
459
|
+
response = self.generate_text(
|
|
460
|
+
full_prompt,
|
|
461
|
+
images=images,
|
|
462
|
+
n_predict=max_size,
|
|
463
|
+
temperature=temperature,
|
|
464
|
+
top_k=top_k,
|
|
465
|
+
top_p=top_p,
|
|
466
|
+
repeat_penalty=repeat_penalty,
|
|
467
|
+
repeat_last_n=repeat_last_n,
|
|
468
|
+
streaming_callback=callback # Assuming generate_text handles streaming callback
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
if isinstance(response, dict) and not response.get("status", True): # Check for error dict
|
|
472
|
+
ASCIIColors.error(f"Code generation failed: {response.get('error')}")
|
|
473
|
+
return []
|
|
474
|
+
|
|
315
475
|
response_full += response
|
|
316
|
-
codes = self.extract_code_blocks(response)
|
|
476
|
+
codes = self.extract_code_blocks(response, format=code_tag_format)
|
|
317
477
|
return codes
|
|
318
|
-
|
|
478
|
+
|
|
319
479
|
def generate_code(
|
|
320
|
-
self,
|
|
321
|
-
prompt,
|
|
480
|
+
self,
|
|
481
|
+
prompt,
|
|
322
482
|
images=[],
|
|
323
483
|
template=None,
|
|
324
484
|
language="json",
|
|
325
|
-
code_tag_format="markdown", # or "html"
|
|
326
|
-
max_size = None,
|
|
485
|
+
code_tag_format="markdown", # or "html"
|
|
486
|
+
max_size = None,
|
|
327
487
|
temperature = None,
|
|
328
488
|
top_k = None,
|
|
329
489
|
top_p=None,
|
|
@@ -331,9 +491,14 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
331
491
|
repeat_last_n=None,
|
|
332
492
|
callback=None,
|
|
333
493
|
debug=False ):
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
494
|
+
"""
|
|
495
|
+
Generates a single code block based on a prompt.
|
|
496
|
+
Uses the underlying LLM binding via `generate_text`.
|
|
497
|
+
Handles potential continuation if the code block is incomplete.
|
|
498
|
+
"""
|
|
499
|
+
|
|
500
|
+
full_prompt = f"""{self.system_full_header}Act as a code generation assistant that generates code from user prompt.
|
|
501
|
+
{self.user_full_header}
|
|
337
502
|
{prompt}
|
|
338
503
|
"""
|
|
339
504
|
if template:
|
|
@@ -359,44 +524,79 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|
|
359
524
|
full_prompt += f"""You must return a single code tag.
|
|
360
525
|
Do not split the code in multiple tags.
|
|
361
526
|
{self.ai_full_header}"""
|
|
362
|
-
response = self.generate_text(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
|
|
363
|
-
codes = self.extract_code_blocks(response)
|
|
364
|
-
if len(codes)>0:
|
|
365
|
-
if not codes[-1]["is_complete"]:
|
|
366
|
-
code = "\n".join(codes[-1]["content"].split("\n")[:-1])
|
|
367
|
-
while not codes[-1]["is_complete"]:
|
|
368
|
-
response = self.generate_text(prompt+code+self.user_full_header+"continue the code. Start from last line and continue the code. Put the code inside a markdown code tag."+self.separator_template+self.ai_full_header, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
|
|
369
|
-
codes = self.extract_code_blocks(response)
|
|
370
|
-
if len(codes)==0:
|
|
371
|
-
break
|
|
372
|
-
else:
|
|
373
|
-
if not codes[-1]["is_complete"]:
|
|
374
|
-
code +="\n"+ "\n".join(codes[-1]["content"].split("\n")[:-1])
|
|
375
|
-
else:
|
|
376
|
-
code +="\n"+ "\n".join(codes[-1]["content"].split("\n"))
|
|
377
|
-
else:
|
|
378
|
-
code = codes[-1]["content"]
|
|
379
527
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
528
|
+
response = self.generate_text(
|
|
529
|
+
full_prompt,
|
|
530
|
+
images=images,
|
|
531
|
+
n_predict=max_size,
|
|
532
|
+
temperature=temperature,
|
|
533
|
+
top_k=top_k,
|
|
534
|
+
top_p=top_p,
|
|
535
|
+
repeat_penalty=repeat_penalty,
|
|
536
|
+
repeat_last_n=repeat_last_n,
|
|
537
|
+
streaming_callback=callback
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
if isinstance(response, dict) and not response.get("status", True):
|
|
541
|
+
ASCIIColors.error(f"Code generation failed: {response.get('error')}")
|
|
542
|
+
return None
|
|
543
|
+
|
|
544
|
+
codes = self.extract_code_blocks(response, format=code_tag_format)
|
|
545
|
+
code_content = None
|
|
546
|
+
|
|
547
|
+
if codes:
|
|
548
|
+
last_code = codes[-1]
|
|
549
|
+
code_content = last_code["content"]
|
|
550
|
+
|
|
551
|
+
# Handle incomplete code block continuation (simple approach)
|
|
552
|
+
max_retries = 3 # Limit continuation attempts
|
|
553
|
+
retries = 0
|
|
554
|
+
while not last_code["is_complete"] and retries < max_retries:
|
|
555
|
+
retries += 1
|
|
556
|
+
ASCIIColors.info(f"Code block seems incomplete. Attempting continuation ({retries}/{max_retries})...")
|
|
557
|
+
continuation_prompt = f"{full_prompt}{code_content}\n\n{self.user_full_header}The previous code block was incomplete. Continue the code exactly from where it left off. Do not repeat the previous part. Only provide the continuation inside a single {code_tag_format} code tag.\n{self.ai_full_header}"
|
|
558
|
+
|
|
559
|
+
continuation_response = self.generate_text(
|
|
560
|
+
continuation_prompt,
|
|
561
|
+
images=images, # Resend images if needed for context
|
|
562
|
+
n_predict=max_size, # Allow space for continuation
|
|
563
|
+
temperature=temperature, # Use same parameters
|
|
564
|
+
top_k=top_k,
|
|
565
|
+
top_p=top_p,
|
|
566
|
+
repeat_penalty=repeat_penalty,
|
|
567
|
+
repeat_last_n=repeat_last_n,
|
|
568
|
+
streaming_callback=callback
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
if isinstance(continuation_response, dict) and not continuation_response.get("status", True):
|
|
572
|
+
ASCIIColors.warning(f"Continuation attempt failed: {continuation_response.get('error')}")
|
|
573
|
+
break # Stop trying if generation fails
|
|
574
|
+
|
|
575
|
+
continuation_codes = self.extract_code_blocks(continuation_response, format=code_tag_format)
|
|
576
|
+
|
|
577
|
+
if continuation_codes:
|
|
578
|
+
new_code_part = continuation_codes[0]["content"]
|
|
579
|
+
code_content += "\n" + new_code_part # Append continuation
|
|
580
|
+
last_code["is_complete"] = continuation_codes[0]["is_complete"] # Update completeness
|
|
581
|
+
if last_code["is_complete"]:
|
|
582
|
+
ASCIIColors.info("Code block continuation successful.")
|
|
583
|
+
break # Exit loop if complete
|
|
584
|
+
else:
|
|
585
|
+
ASCIIColors.warning("Continuation response contained no code block.")
|
|
586
|
+
break # Stop if no code block found in continuation
|
|
587
|
+
|
|
588
|
+
if not last_code["is_complete"]:
|
|
589
|
+
ASCIIColors.warning("Code block remained incomplete after multiple attempts.")
|
|
590
|
+
|
|
591
|
+
return code_content # Return the (potentially completed) code content or None
|
|
592
|
+
|
|
383
593
|
|
|
384
594
|
def extract_code_blocks(self, text: str, format: str = "markdown") -> List[dict]:
|
|
385
595
|
"""
|
|
386
596
|
Extracts code blocks from text in Markdown or HTML format.
|
|
387
|
-
|
|
388
|
-
Parameters:
|
|
389
|
-
text (str): The text to extract code blocks from.
|
|
390
|
-
format (str): The format of code blocks ("markdown" for ``` or "html" for <code class="">).
|
|
391
|
-
|
|
392
|
-
Returns:
|
|
393
|
-
List[dict]: A list of dictionaries with:
|
|
394
|
-
- 'index' (int): Index of the code block.
|
|
395
|
-
- 'file_name' (str): File name from preceding text, if available.
|
|
396
|
-
- 'content' (str): Code block content.
|
|
397
|
-
- 'type' (str): Language type (from Markdown first line or HTML class).
|
|
398
|
-
- 'is_complete' (bool): True if block has a closing tag.
|
|
597
|
+
(Implementation remains the same as provided before)
|
|
399
598
|
"""
|
|
599
|
+
# ... (Keep the existing implementation from the previous file) ...
|
|
400
600
|
code_blocks = []
|
|
401
601
|
remaining = text
|
|
402
602
|
first_index = 0
|
|
@@ -411,150 +611,172 @@ Do not split the code in multiple tags.
|
|
|
411
611
|
remaining = remaining[index + 3:]
|
|
412
612
|
first_index += index + 3
|
|
413
613
|
except ValueError:
|
|
414
|
-
if len(indices) % 2 == 1: # Odd number of delimiters
|
|
415
|
-
indices.append(first_index + len(remaining))
|
|
614
|
+
if len(indices) % 2 == 1: # Odd number of delimiters means the last block is open
|
|
615
|
+
indices.append(first_index + len(remaining)) # Mark end of text as end of block
|
|
416
616
|
break
|
|
417
617
|
|
|
418
618
|
elif format.lower() == "html":
|
|
419
619
|
# HTML: Find <code> and </code> positions, handling nested tags
|
|
420
|
-
|
|
620
|
+
cursor = 0
|
|
621
|
+
while cursor < len(text):
|
|
421
622
|
try:
|
|
422
|
-
# Look for opening <code
|
|
423
|
-
start_index =
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
623
|
+
# Look for opening <code tag
|
|
624
|
+
start_index = text.index("<code", cursor)
|
|
625
|
+
try:
|
|
626
|
+
end_of_opening = text.index(">", start_index)
|
|
627
|
+
except ValueError:
|
|
628
|
+
break # Invalid opening tag
|
|
629
|
+
|
|
630
|
+
indices.append(start_index)
|
|
631
|
+
opening_tag_end = end_of_opening + 1
|
|
632
|
+
cursor = opening_tag_end
|
|
429
633
|
|
|
430
634
|
# Look for matching </code>, accounting for nested <code>
|
|
431
635
|
nest_level = 0
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
636
|
+
temp_cursor = cursor
|
|
637
|
+
found_closing = False
|
|
638
|
+
while temp_cursor < len(text):
|
|
639
|
+
if text[temp_cursor:].startswith("<code"):
|
|
435
640
|
nest_level += 1
|
|
436
|
-
|
|
437
|
-
|
|
641
|
+
try:
|
|
642
|
+
temp_cursor = text.index(">", temp_cursor) + 1
|
|
643
|
+
except ValueError:
|
|
644
|
+
break # Invalid nested opening tag
|
|
645
|
+
elif text[temp_cursor:].startswith("</code>"):
|
|
438
646
|
if nest_level == 0:
|
|
439
|
-
indices.append(
|
|
440
|
-
|
|
441
|
-
|
|
647
|
+
indices.append(temp_cursor)
|
|
648
|
+
cursor = temp_cursor + len("</code>")
|
|
649
|
+
found_closing = True
|
|
442
650
|
break
|
|
443
651
|
nest_level -= 1
|
|
444
|
-
|
|
652
|
+
temp_cursor += len("</code>")
|
|
445
653
|
else:
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
654
|
+
temp_cursor += 1
|
|
655
|
+
|
|
656
|
+
if not found_closing: # If no closing tag found until the end
|
|
657
|
+
indices.append(len(text))
|
|
658
|
+
break # Stop searching
|
|
659
|
+
|
|
450
660
|
except ValueError:
|
|
451
|
-
break
|
|
661
|
+
break # No more opening tags found
|
|
452
662
|
|
|
453
663
|
else:
|
|
454
664
|
raise ValueError("Format must be 'markdown' or 'html'")
|
|
455
665
|
|
|
666
|
+
# Process indices to extract blocks
|
|
456
667
|
for i in range(0, len(indices), 2):
|
|
457
668
|
block_infos = {
|
|
458
669
|
'index': i // 2,
|
|
459
670
|
'file_name': "",
|
|
460
671
|
'content': "",
|
|
461
|
-
'type': 'language-specific',
|
|
672
|
+
'type': 'language-specific', # Default type
|
|
462
673
|
'is_complete': False
|
|
463
674
|
}
|
|
464
675
|
|
|
465
|
-
# Extract preceding text for file name
|
|
466
676
|
start_pos = indices[i]
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
677
|
+
# --- Extract preceding text for potential file name hints ---
|
|
678
|
+
# Look backwards from start_pos for common patterns
|
|
679
|
+
search_area_start = max(0, start_pos - 200) # Limit search area
|
|
680
|
+
preceding_text_segment = text[search_area_start:start_pos]
|
|
681
|
+
lines = preceding_text_segment.strip().splitlines()
|
|
682
|
+
if lines:
|
|
683
|
+
last_line = lines[-1].strip()
|
|
684
|
+
# Example patterns (adjust as needed)
|
|
470
685
|
if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
|
|
471
686
|
block_infos['file_name'] = last_line[len("<file_name>"):-len("</file_name>")].strip()
|
|
472
|
-
elif last_line.startswith("
|
|
473
|
-
block_infos['file_name'] = last_line
|
|
687
|
+
elif last_line.lower().startswith("file:") or last_line.lower().startswith("filename:"):
|
|
688
|
+
block_infos['file_name'] = last_line.split(":", 1)[1].strip()
|
|
689
|
+
# --- End file name extraction ---
|
|
474
690
|
|
|
475
|
-
# Extract content and type
|
|
691
|
+
# Extract content and type based on format
|
|
476
692
|
if format.lower() == "markdown":
|
|
477
|
-
|
|
693
|
+
content_start = start_pos + 3 # After ```
|
|
478
694
|
if i + 1 < len(indices):
|
|
479
695
|
end_pos = indices[i + 1]
|
|
480
|
-
|
|
696
|
+
content_raw = text[content_start:end_pos]
|
|
481
697
|
block_infos['is_complete'] = True
|
|
482
|
-
else:
|
|
483
|
-
|
|
698
|
+
else: # Last block is open
|
|
699
|
+
content_raw = text[content_start:]
|
|
484
700
|
block_infos['is_complete'] = False
|
|
485
701
|
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
702
|
+
# Check for language specifier on the first line
|
|
703
|
+
first_line_end = content_raw.find('\n')
|
|
704
|
+
if first_line_end != -1:
|
|
705
|
+
first_line = content_raw[:first_line_end].strip()
|
|
706
|
+
if first_line and not first_line.isspace() and ' ' not in first_line: # Basic check for language specifier
|
|
489
707
|
block_infos['type'] = first_line
|
|
490
|
-
content =
|
|
708
|
+
content = content_raw[first_line_end + 1:].strip()
|
|
709
|
+
else:
|
|
710
|
+
content = content_raw.strip()
|
|
711
|
+
else: # Single line code block or no language specifier
|
|
712
|
+
content = content_raw.strip()
|
|
713
|
+
# If content itself looks like a language specifier, clear it
|
|
714
|
+
if content and not content.isspace() and ' ' not in content and len(content)<20:
|
|
715
|
+
block_infos['type'] = content
|
|
716
|
+
content = ""
|
|
717
|
+
|
|
491
718
|
|
|
492
719
|
elif format.lower() == "html":
|
|
493
|
-
|
|
494
|
-
|
|
720
|
+
# Find end of opening tag to get content start
|
|
721
|
+
try:
|
|
722
|
+
opening_tag_end = text.index(">", start_pos) + 1
|
|
723
|
+
except ValueError:
|
|
724
|
+
continue # Should not happen if indices are correct
|
|
725
|
+
|
|
726
|
+
opening_tag = text[start_pos:opening_tag_end]
|
|
727
|
+
|
|
495
728
|
if i + 1 < len(indices):
|
|
496
729
|
end_pos = indices[i + 1]
|
|
497
|
-
content = text[
|
|
730
|
+
content = text[opening_tag_end:end_pos].strip()
|
|
498
731
|
block_infos['is_complete'] = True
|
|
499
|
-
else:
|
|
500
|
-
content =
|
|
732
|
+
else: # Last block is open
|
|
733
|
+
content = text[opening_tag_end:].strip()
|
|
501
734
|
block_infos['is_complete'] = False
|
|
502
735
|
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
736
|
+
|
|
737
|
+
# Extract language from class attribute (more robust)
|
|
738
|
+
import re
|
|
739
|
+
match = re.search(r'class\s*=\s*["\']([^"\']*)["\']', opening_tag)
|
|
740
|
+
if match:
|
|
741
|
+
classes = match.group(1).split()
|
|
742
|
+
for cls in classes:
|
|
743
|
+
if cls.startswith("language-"):
|
|
744
|
+
block_infos['type'] = cls[len("language-"):]
|
|
745
|
+
break # Take the first language- class found
|
|
510
746
|
|
|
511
747
|
block_infos['content'] = content
|
|
512
|
-
|
|
748
|
+
if block_infos['content'] or block_infos['is_complete']: # Add block if it has content or is closed
|
|
749
|
+
code_blocks.append(block_infos)
|
|
513
750
|
|
|
514
751
|
return code_blocks
|
|
515
752
|
|
|
753
|
+
|
|
516
754
|
def extract_thinking_blocks(self, text: str) -> List[str]:
|
|
517
755
|
"""
|
|
518
756
|
Extracts content between <thinking> or <think> tags from a given text.
|
|
519
|
-
|
|
520
|
-
Parameters:
|
|
521
|
-
text (str): The text containing thinking blocks
|
|
522
|
-
|
|
523
|
-
Returns:
|
|
524
|
-
List[str]: List of extracted thinking contents
|
|
757
|
+
(Implementation remains the same as provided before)
|
|
525
758
|
"""
|
|
526
759
|
import re
|
|
527
|
-
|
|
528
|
-
# Pattern to match both <thinking> and <think> blocks with matching tags
|
|
529
760
|
pattern = r'<(thinking|think)>(.*?)</\1>'
|
|
530
|
-
matches = re.finditer(pattern, text, re.DOTALL)
|
|
531
|
-
|
|
532
|
-
# Extract content from the second group (index 2) and clean
|
|
761
|
+
matches = re.finditer(pattern, text, re.DOTALL | re.IGNORECASE) # Added IGNORECASE
|
|
533
762
|
thinking_blocks = [match.group(2).strip() for match in matches]
|
|
534
|
-
|
|
535
763
|
return thinking_blocks
|
|
536
764
|
|
|
537
765
|
def remove_thinking_blocks(self, text: str) -> str:
|
|
538
766
|
"""
|
|
539
767
|
Removes thinking blocks (either <thinking> or <think>) from text including the tags.
|
|
540
|
-
|
|
541
|
-
Parameters:
|
|
542
|
-
text (str): The text containing thinking blocks
|
|
543
|
-
|
|
544
|
-
Returns:
|
|
545
|
-
str: Text with thinking blocks removed
|
|
768
|
+
(Implementation remains the same as provided before)
|
|
546
769
|
"""
|
|
547
770
|
import re
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
cleaned_text = re.sub(
|
|
552
|
-
|
|
553
|
-
# Remove extra whitespace and normalize newlines
|
|
554
|
-
cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text.strip())
|
|
555
|
-
|
|
771
|
+
pattern = r'<(thinking|think)>.*?</\1>\s*' # Added \s* to remove potential trailing whitespace/newlines
|
|
772
|
+
cleaned_text = re.sub(pattern, '', text, flags=re.DOTALL | re.IGNORECASE) # Added IGNORECASE
|
|
773
|
+
# Further cleanup might be needed depending on desired newline handling
|
|
774
|
+
cleaned_text = re.sub(r'\n{3,}', '\n\n', cleaned_text).strip() # Collapse excess newlines
|
|
556
775
|
return cleaned_text
|
|
557
776
|
|
|
777
|
+
# --- Task-oriented methods (Candidates for moving to TasksLibrary) ---
|
|
778
|
+
# Keeping them here for now, but they primarily use generate_code/generate_text
|
|
779
|
+
|
|
558
780
|
def yes_no(
|
|
559
781
|
self,
|
|
560
782
|
question: str,
|
|
@@ -565,174 +787,242 @@ Do not split the code in multiple tags.
|
|
|
565
787
|
callback = None
|
|
566
788
|
) -> bool | dict:
|
|
567
789
|
"""
|
|
568
|
-
Answers a yes/no question.
|
|
569
|
-
|
|
570
|
-
Args:
|
|
571
|
-
question (str): The yes/no question to answer.
|
|
572
|
-
context (str, optional): Additional context to provide for the question.
|
|
573
|
-
max_answer_length (int, optional): Maximum string length allowed for the response. Defaults to None.
|
|
574
|
-
conditionning (str, optional): An optional system message to put at the beginning of the prompt.
|
|
575
|
-
return_explanation (bool, optional): If True, returns a dictionary with the answer and explanation. Defaults to False.
|
|
576
|
-
|
|
577
|
-
Returns:
|
|
578
|
-
bool or dict:
|
|
579
|
-
- If return_explanation is False, returns a boolean (True for 'yes', False for 'no').
|
|
580
|
-
- If return_explanation is True, returns a dictionary with the answer and explanation.
|
|
790
|
+
Answers a yes/no question using LLM JSON generation.
|
|
791
|
+
(Implementation requires self.generate_code which uses self.generate_text)
|
|
581
792
|
"""
|
|
793
|
+
# ... (Implementation as provided before, relies on self.generate_code) ...
|
|
582
794
|
if not callback:
|
|
583
795
|
callback=self.sink
|
|
584
796
|
|
|
585
|
-
prompt = f"{conditionning}\
|
|
586
|
-
|
|
587
|
-
template = """
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
response = self.generate_text_code(
|
|
797
|
+
prompt = f"{self.system_full_header}{conditionning}\n{self.user_full_header}Based on the context, answer the question with only 'true' or 'false' and provide a brief explanation.\nContext:\n{context}\nQuestion: {question}\n{self.ai_full_header}"
|
|
798
|
+
|
|
799
|
+
template = """{
|
|
800
|
+
"answer": true | false, // boolean required
|
|
801
|
+
"explanation": "A brief explanation for the answer"
|
|
802
|
+
}"""
|
|
803
|
+
|
|
804
|
+
# Assuming generate_code exists and works as intended
|
|
805
|
+
response_json_str = self.generate_code(
|
|
595
806
|
prompt=prompt,
|
|
596
|
-
template=template,
|
|
597
807
|
language="json",
|
|
808
|
+
template=template,
|
|
598
809
|
code_tag_format="markdown",
|
|
599
810
|
max_size=max_answer_length,
|
|
600
811
|
callback=callback
|
|
601
812
|
)
|
|
602
|
-
|
|
813
|
+
|
|
814
|
+
if response_json_str is None:
|
|
815
|
+
ASCIIColors.error("LLM failed to generate JSON for yes/no question.")
|
|
816
|
+
return {"answer": False, "explanation": "Generation failed"} if return_explanation else False
|
|
817
|
+
|
|
603
818
|
try:
|
|
604
|
-
|
|
605
|
-
|
|
819
|
+
# Attempt to repair minor JSON issues before parsing
|
|
820
|
+
import json
|
|
821
|
+
import re
|
|
822
|
+
# Remove potential comments, trailing commas etc.
|
|
823
|
+
response_json_str = re.sub(r"//.*", "", response_json_str)
|
|
824
|
+
response_json_str = re.sub(r",\s*}", "}", response_json_str)
|
|
825
|
+
response_json_str = re.sub(r",\s*]", "]", response_json_str)
|
|
826
|
+
|
|
827
|
+
parsed_response = json.loads(response_json_str)
|
|
828
|
+
answer = parsed_response.get("answer")
|
|
606
829
|
explanation = parsed_response.get("explanation", "")
|
|
607
|
-
|
|
830
|
+
|
|
831
|
+
# Validate boolean type
|
|
832
|
+
if not isinstance(answer, bool):
|
|
833
|
+
# Attempt to coerce common string representations
|
|
834
|
+
if isinstance(answer, str):
|
|
835
|
+
answer_lower = answer.lower()
|
|
836
|
+
if answer_lower == 'true':
|
|
837
|
+
answer = True
|
|
838
|
+
elif answer_lower == 'false':
|
|
839
|
+
answer = False
|
|
840
|
+
else:
|
|
841
|
+
raise ValueError("Answer is not a valid boolean representation.")
|
|
842
|
+
else:
|
|
843
|
+
raise ValueError("Answer is not a boolean.")
|
|
844
|
+
|
|
845
|
+
|
|
608
846
|
if return_explanation:
|
|
609
847
|
return {"answer": answer, "explanation": explanation}
|
|
610
848
|
else:
|
|
611
849
|
return answer
|
|
612
|
-
except json.JSONDecodeError:
|
|
613
|
-
|
|
850
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
851
|
+
ASCIIColors.error(f"Failed to parse or validate JSON response for yes/no: {e}")
|
|
852
|
+
ASCIIColors.error(f"Received: {response_json_str}")
|
|
853
|
+
# Fallback: try simple string check in the raw LLM output (less reliable)
|
|
854
|
+
if "true" in response_json_str.lower():
|
|
855
|
+
answer_fallback = True
|
|
856
|
+
elif "false" in response_json_str.lower():
|
|
857
|
+
answer_fallback = False
|
|
858
|
+
else:
|
|
859
|
+
answer_fallback = False # Default to false on ambiguity
|
|
860
|
+
|
|
861
|
+
if return_explanation:
|
|
862
|
+
return {"answer": answer_fallback, "explanation": f"Parsing failed ({e}). Fallback used."}
|
|
863
|
+
else:
|
|
864
|
+
return answer_fallback
|
|
865
|
+
|
|
614
866
|
|
|
615
867
|
def multichoice_question(
|
|
616
|
-
self,
|
|
617
|
-
question: str,
|
|
618
|
-
possible_answers: list,
|
|
619
|
-
context: str = "",
|
|
620
|
-
max_answer_length: int = None,
|
|
621
|
-
conditionning: str = "",
|
|
868
|
+
self,
|
|
869
|
+
question: str,
|
|
870
|
+
possible_answers: list,
|
|
871
|
+
context: str = "",
|
|
872
|
+
max_answer_length: int = None,
|
|
873
|
+
conditionning: str = "",
|
|
622
874
|
return_explanation: bool = False,
|
|
623
875
|
callback = None
|
|
624
|
-
) -> dict:
|
|
876
|
+
) -> int | dict: # Corrected return type hint
|
|
625
877
|
"""
|
|
626
|
-
Interprets a multi-choice question
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
Args:
|
|
630
|
-
question (str): The multi-choice question posed by the user.
|
|
631
|
-
possible_answers (List[Any]): A list containing all valid options for the chosen value.
|
|
632
|
-
context (str, optional): Additional context to provide for the question.
|
|
633
|
-
max_answer_length (int, optional): Maximum string length allowed while interpreting the user's responses. Defaults to None.
|
|
634
|
-
conditionning (str, optional): An optional system message to put at the beginning of the prompt.
|
|
635
|
-
return_explanation (bool, optional): If True, returns a dictionary with the choice and explanation. Defaults to False.
|
|
636
|
-
|
|
637
|
-
Returns:
|
|
638
|
-
dict:
|
|
639
|
-
- If return_explanation is False, returns a JSON object with only the selected choice index.
|
|
640
|
-
- If return_explanation is True, returns a JSON object with the selected choice index and an explanation.
|
|
641
|
-
- Returns {"index": -1} if no match is found among the possible answers.
|
|
878
|
+
Interprets a multi-choice question using LLM JSON generation.
|
|
879
|
+
(Implementation requires self.generate_code which uses self.generate_text)
|
|
642
880
|
"""
|
|
881
|
+
# ... (Implementation as provided before, relies on self.generate_code) ...
|
|
643
882
|
if not callback:
|
|
644
883
|
callback=self.sink
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
for i, answer in enumerate(possible_answers):
|
|
651
|
-
prompt += f"{i}. {answer}\n"
|
|
652
|
-
|
|
884
|
+
|
|
885
|
+
choices_text = "\n".join([f"{i}. {ans}" for i, ans in enumerate(possible_answers)])
|
|
886
|
+
|
|
887
|
+
prompt = f"{self.system_full_header}{conditionning}\n"
|
|
888
|
+
prompt += f"{self.user_full_header}Answer the following multiple-choice question based on the context. Respond with a JSON object containing the index of the single best answer and an optional explanation.\n"
|
|
653
889
|
if context:
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
prompt += "
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
890
|
+
prompt += f"Context:\n{context}\n"
|
|
891
|
+
prompt += f"Question:\n{question}\n"
|
|
892
|
+
prompt += f"Possible Answers:\n{choices_text}\n"
|
|
893
|
+
prompt += f"{self.ai_full_header}"
|
|
894
|
+
|
|
895
|
+
template = """{
|
|
896
|
+
"index": 0, // integer index required
|
|
897
|
+
"explanation": "Optional explanation for the choice"
|
|
898
|
+
}"""
|
|
899
|
+
|
|
900
|
+
response_json_str = self.generate_code(
|
|
901
|
+
prompt=prompt,
|
|
902
|
+
template=template,
|
|
903
|
+
language="json",
|
|
904
|
+
code_tag_format="markdown",
|
|
905
|
+
max_size=max_answer_length,
|
|
906
|
+
callback=callback
|
|
907
|
+
)
|
|
908
|
+
|
|
909
|
+
if response_json_str is None:
|
|
910
|
+
ASCIIColors.error("LLM failed to generate JSON for multichoice question.")
|
|
911
|
+
return {"index": -1, "explanation": "Generation failed"} if return_explanation else -1
|
|
912
|
+
|
|
665
913
|
try:
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
914
|
+
# Attempt to repair minor JSON issues before parsing
|
|
915
|
+
import json
|
|
916
|
+
import re
|
|
917
|
+
response_json_str = re.sub(r"//.*", "", response_json_str)
|
|
918
|
+
response_json_str = re.sub(r",\s*}", "}", response_json_str)
|
|
919
|
+
response_json_str = re.sub(r",\s*]", "]", response_json_str)
|
|
920
|
+
|
|
921
|
+
result = json.loads(response_json_str)
|
|
922
|
+
index = result.get("index")
|
|
923
|
+
explanation = result.get("explanation", "")
|
|
924
|
+
|
|
925
|
+
if not isinstance(index, int) or not (0 <= index < len(possible_answers)):
|
|
926
|
+
raise ValueError(f"Invalid index returned: {index}")
|
|
927
|
+
|
|
674
928
|
if return_explanation:
|
|
675
|
-
return
|
|
929
|
+
return {"index": index, "explanation": explanation}
|
|
676
930
|
else:
|
|
677
|
-
return
|
|
678
|
-
|
|
931
|
+
return index
|
|
932
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
933
|
+
ASCIIColors.error(f"Failed to parse or validate JSON response for multichoice: {e}")
|
|
934
|
+
ASCIIColors.error(f"Received: {response_json_str}")
|
|
935
|
+
# Fallback logic could be added here (e.g., regex for index) but is less reliable
|
|
936
|
+
return {"index": -1, "explanation": f"Parsing failed ({e})."} if return_explanation else -1
|
|
937
|
+
|
|
938
|
+
|
|
679
939
|
def multichoice_ranking(
|
|
680
|
-
self,
|
|
681
|
-
question: str,
|
|
682
|
-
possible_answers: list,
|
|
683
|
-
context: str = "",
|
|
684
|
-
max_answer_length: int =
|
|
685
|
-
conditionning: str = "",
|
|
940
|
+
self,
|
|
941
|
+
question: str,
|
|
942
|
+
possible_answers: list,
|
|
943
|
+
context: str = "",
|
|
944
|
+
max_answer_length: int = None,
|
|
945
|
+
conditionning: str = "",
|
|
686
946
|
return_explanation: bool = False,
|
|
687
947
|
callback = None
|
|
688
948
|
) -> dict:
|
|
689
949
|
"""
|
|
690
|
-
Ranks answers for a question from best to worst
|
|
691
|
-
|
|
692
|
-
Args:
|
|
693
|
-
question (str): The question for which the answers are being ranked.
|
|
694
|
-
possible_answers (List[Any]): A list of possible answers to rank.
|
|
695
|
-
context (str, optional): Additional context to provide for the question.
|
|
696
|
-
max_answer_length (int, optional): Maximum string length allowed for the response. Defaults to 50.
|
|
697
|
-
conditionning (str, optional): An optional system message to put at the beginning of the prompt.
|
|
698
|
-
return_explanation (bool, optional): If True, returns a dictionary with the ranked order and explanations. Defaults to False.
|
|
699
|
-
|
|
700
|
-
Returns:
|
|
701
|
-
dict:
|
|
702
|
-
- If return_explanation is False, returns a JSON object with only the ranked order.
|
|
703
|
-
- If return_explanation is True, returns a JSON object with the ranked order and explanations.
|
|
950
|
+
Ranks answers for a question from best to worst using LLM JSON generation.
|
|
951
|
+
(Implementation requires self.generate_code which uses self.generate_text)
|
|
704
952
|
"""
|
|
953
|
+
# ... (Implementation as provided before, relies on self.generate_code) ...
|
|
705
954
|
if not callback:
|
|
706
|
-
callback=self.sink
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
for i, answer in enumerate(possible_answers):
|
|
713
|
-
prompt += f"{i}. {answer}\n"
|
|
714
|
-
|
|
955
|
+
callback = self.sink
|
|
956
|
+
|
|
957
|
+
choices_text = "\n".join([f"{i}. {ans}" for i, ans in enumerate(possible_answers)])
|
|
958
|
+
|
|
959
|
+
prompt = f"{self.system_full_header}{conditionning}\n"
|
|
960
|
+
prompt += f"{self.user_full_header}Rank the following answers to the question from best to worst based on the context. Respond with a JSON object containing a list of indices in ranked order and an optional list of explanations.\n"
|
|
715
961
|
if context:
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
prompt += "
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
962
|
+
prompt += f"Context:\n{context}\n"
|
|
963
|
+
prompt += f"Question:\n{question}\n"
|
|
964
|
+
prompt += f"Possible Answers to Rank:\n{choices_text}\n"
|
|
965
|
+
prompt += f"{self.ai_full_header}"
|
|
966
|
+
|
|
967
|
+
template = """{
|
|
968
|
+
"ranking": [0, 1, 2], // list of integer indices required, length must match number of answers
|
|
969
|
+
"explanations": ["Optional explanation 1", "Optional explanation 2", "Optional explanation 3"] // Optional list of strings
|
|
970
|
+
}"""
|
|
971
|
+
|
|
972
|
+
response_json_str = self.generate_code(
|
|
973
|
+
prompt=prompt,
|
|
974
|
+
template=template,
|
|
975
|
+
language="json",
|
|
976
|
+
code_tag_format="markdown",
|
|
977
|
+
max_size=max_answer_length,
|
|
978
|
+
callback=callback
|
|
979
|
+
)
|
|
980
|
+
|
|
981
|
+
default_return = {"ranking": [], "explanations": []} if return_explanation else {"ranking": []}
|
|
982
|
+
|
|
983
|
+
if response_json_str is None:
|
|
984
|
+
ASCIIColors.error("LLM failed to generate JSON for ranking.")
|
|
985
|
+
return default_return
|
|
986
|
+
|
|
726
987
|
try:
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
988
|
+
# Attempt to repair minor JSON issues before parsing
|
|
989
|
+
import json
|
|
990
|
+
import re
|
|
991
|
+
response_json_str = re.sub(r"//.*", "", response_json_str)
|
|
992
|
+
response_json_str = re.sub(r",\s*}", "}", response_json_str)
|
|
993
|
+
response_json_str = re.sub(r",\s*]", "]", response_json_str)
|
|
994
|
+
|
|
995
|
+
result = json.loads(response_json_str)
|
|
996
|
+
ranking = result.get("ranking")
|
|
997
|
+
explanations = result.get("explanations", []) if return_explanation else None
|
|
998
|
+
|
|
999
|
+
# Validation
|
|
1000
|
+
if not isinstance(ranking, list) or len(ranking) != len(possible_answers):
|
|
1001
|
+
raise ValueError("Ranking is not a list or has incorrect length.")
|
|
1002
|
+
if not all(isinstance(idx, int) and 0 <= idx < len(possible_answers) for idx in ranking):
|
|
1003
|
+
raise ValueError("Ranking contains invalid indices.")
|
|
1004
|
+
if len(set(ranking)) != len(possible_answers):
|
|
1005
|
+
raise ValueError("Ranking contains duplicate indices.")
|
|
1006
|
+
if return_explanation and not isinstance(explanations, list):
|
|
1007
|
+
ASCIIColors.warning("Explanations format is invalid, returning ranking only.")
|
|
1008
|
+
explanations = None # Ignore invalid explanations
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
if return_explanation:
|
|
1012
|
+
return {"ranking": ranking, "explanations": explanations or [""] * len(ranking)} # Provide empty strings if explanations were invalid/missing
|
|
1013
|
+
else:
|
|
1014
|
+
return {"ranking": ranking}
|
|
1015
|
+
|
|
1016
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
1017
|
+
ASCIIColors.error(f"Failed to parse or validate JSON response for ranking: {e}")
|
|
1018
|
+
ASCIIColors.error(f"Received: {response_json_str}")
|
|
1019
|
+
return default_return
|
|
1020
|
+
|
|
1021
|
+
# --- Summarization / Analysis Methods (Candidates for TasksLibrary) ---
|
|
1022
|
+
# These use generate_text and tokenization/detokenization
|
|
1023
|
+
|
|
734
1024
|
def sequential_summarize(
|
|
735
|
-
self,
|
|
1025
|
+
self,
|
|
736
1026
|
text:str,
|
|
737
1027
|
chunk_processing_prompt:str="Extract relevant information from the current text chunk and update the memory if needed.",
|
|
738
1028
|
chunk_processing_output_format="markdown",
|
|
@@ -740,42 +1030,43 @@ Do not split the code in multiple tags.
|
|
|
740
1030
|
final_output_format="markdown",
|
|
741
1031
|
ctx_size:int=None,
|
|
742
1032
|
chunk_size:int=None,
|
|
1033
|
+
overlap:int=None, # Added overlap
|
|
743
1034
|
bootstrap_chunk_size:int=None,
|
|
744
1035
|
bootstrap_steps:int=None,
|
|
745
1036
|
callback = None,
|
|
746
1037
|
debug:bool= False):
|
|
747
1038
|
"""
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
Parameters:
|
|
752
|
-
text (str): The input text to be summarized.
|
|
753
|
-
chunk_processing_prompt (str, optional): The prompt used for processing each chunk. Defaults to "".
|
|
754
|
-
chunk_processing_output_format (str, optional): The format of the output for each chunk. Defaults to "markdown".
|
|
755
|
-
final_memory_processing_prompt (str, optional): The prompt used for processing the final memory. Defaults to "Create final summary using this memory.".
|
|
756
|
-
final_output_format (str, optional): The format of the final output. Defaults to "markdown".
|
|
757
|
-
ctx_size (int, optional): The size of the context. Defaults to None.
|
|
758
|
-
chunk_size (int, optional): The size of each chunk. Defaults to None.
|
|
759
|
-
callback (callable, optional): A function to be called after processing each chunk. Defaults to None.
|
|
760
|
-
debug (bool, optional): A flag to enable debug mode. Defaults to False.
|
|
761
|
-
|
|
762
|
-
Returns:
|
|
763
|
-
The final summary in the specified format.
|
|
1039
|
+
Processes text in chunks sequentially, updating a memory at each step.
|
|
1040
|
+
(Implementation requires self.tokenize, self.detokenize, self.generate_text, self.extract_code_blocks)
|
|
764
1041
|
"""
|
|
1042
|
+
# ... (Implementation as provided before, relies on core methods) ...
|
|
1043
|
+
if not callback:
|
|
1044
|
+
callback = self.sink
|
|
1045
|
+
|
|
765
1046
|
if ctx_size is None:
|
|
766
|
-
ctx_size = self.
|
|
767
|
-
|
|
1047
|
+
ctx_size = self.default_ctx_size or 8192 # Provide a fallback default
|
|
768
1048
|
if chunk_size is None:
|
|
769
|
-
chunk_size = ctx_size//4
|
|
770
|
-
|
|
1049
|
+
chunk_size = ctx_size // 4
|
|
1050
|
+
if overlap is None:
|
|
1051
|
+
overlap = chunk_size // 10 # Default overlap
|
|
1052
|
+
if bootstrap_chunk_size is None:
|
|
1053
|
+
bootstrap_chunk_size = chunk_size // 2 # Smaller initial chunks
|
|
1054
|
+
if bootstrap_steps is None:
|
|
1055
|
+
bootstrap_steps = 2 # Process first few chunks smaller
|
|
1056
|
+
|
|
771
1057
|
# Tokenize entire text
|
|
772
|
-
|
|
1058
|
+
try:
|
|
1059
|
+
all_tokens = self.tokenize(text)
|
|
1060
|
+
except RuntimeError as e:
|
|
1061
|
+
ASCIIColors.error(f"Tokenization failed: {e}")
|
|
1062
|
+
return "Error: Could not tokenize input text."
|
|
773
1063
|
total_tokens = len(all_tokens)
|
|
774
|
-
|
|
1064
|
+
|
|
775
1065
|
# Initialize memory and chunk index
|
|
776
1066
|
memory = ""
|
|
777
1067
|
start_token_idx = 0
|
|
778
|
-
|
|
1068
|
+
chunk_id = 0
|
|
1069
|
+
|
|
779
1070
|
# Create static prompt template
|
|
780
1071
|
static_prompt_template = f"""{self.system_full_header}
|
|
781
1072
|
You are a structured sequential text summary assistant that processes documents chunk by chunk, updating a memory of previously generated information at each step.
|
|
@@ -789,9 +1080,7 @@ Update the memory by merging previous information with new details from this tex
|
|
|
789
1080
|
Only add information explicitly present in the chunk. Retain all relevant prior memory unless clarified or updated by the current chunk.
|
|
790
1081
|
|
|
791
1082
|
----
|
|
792
|
-
|
|
793
|
-
# Chunk number: {{chunk_id}}
|
|
794
|
-
----
|
|
1083
|
+
Text chunk (Chunk number: {{chunk_id}}):
|
|
795
1084
|
```markdown
|
|
796
1085
|
{{chunk}}
|
|
797
1086
|
```
|
|
@@ -804,298 +1093,525 @@ Before updating, verify each requested detail:
|
|
|
804
1093
|
|
|
805
1094
|
Include only confirmed details in the output.
|
|
806
1095
|
Rewrite the full memory including the updates and keeping relevant data.
|
|
807
|
-
Do not discuss the information inside
|
|
808
|
-
|
|
809
|
-
----
|
|
810
|
-
# Current document analysis memory:
|
|
1096
|
+
Do not discuss the information inside the memory, just put the relevant information without comments.
|
|
1097
|
+
The output memory must be put inside a {chunk_processing_output_format} markdown code block.
|
|
811
1098
|
----
|
|
1099
|
+
Current document analysis memory:
|
|
812
1100
|
```{chunk_processing_output_format}
|
|
813
1101
|
{{memory}}
|
|
814
1102
|
```
|
|
815
1103
|
{self.ai_full_header}
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
1104
|
+
```{chunk_processing_output_format}
|
|
1105
|
+
""" # Added start of code block for AI
|
|
1106
|
+
|
|
1107
|
+
# Calculate static prompt tokens (with estimated placeholders)
|
|
1108
|
+
example_prompt = static_prompt_template.format(
|
|
1109
|
+
custom_prompt=chunk_processing_prompt,
|
|
1110
|
+
memory="<est_memory>",
|
|
1111
|
+
chunk="<est_chunk>",
|
|
1112
|
+
chunk_id=0
|
|
1113
|
+
)
|
|
1114
|
+
try:
|
|
1115
|
+
static_tokens = len(self.tokenize(example_prompt)) - len(self.tokenize("<est_memory>")) - len(self.tokenize("<est_chunk>"))
|
|
1116
|
+
except RuntimeError as e:
|
|
1117
|
+
ASCIIColors.error(f"Tokenization failed during setup: {e}")
|
|
1118
|
+
return "Error: Could not calculate prompt size."
|
|
1119
|
+
|
|
822
1120
|
# Process text in chunks
|
|
823
1121
|
while start_token_idx < total_tokens:
|
|
824
|
-
# Calculate available tokens for chunk
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
1122
|
+
# Calculate available tokens for chunk + memory
|
|
1123
|
+
available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024) # Reserve space for output
|
|
1124
|
+
if available_tokens_for_dynamic_content <= 100: # Need some minimum space
|
|
1125
|
+
ASCIIColors.error("Context size too small for summarization with current settings.")
|
|
1126
|
+
return "Error: Context size too small."
|
|
1127
|
+
|
|
1128
|
+
# Estimate token split between memory and chunk (e.g., 50/50)
|
|
1129
|
+
max_memory_tokens = available_tokens_for_dynamic_content // 2
|
|
1130
|
+
max_chunk_tokens = available_tokens_for_dynamic_content - max_memory_tokens
|
|
1131
|
+
|
|
1132
|
+
# Truncate memory if needed
|
|
1133
|
+
current_memory_tokens = self.tokenize(memory)
|
|
1134
|
+
if len(current_memory_tokens) > max_memory_tokens:
|
|
1135
|
+
memory = self.detokenize(current_memory_tokens[-max_memory_tokens:]) # Keep recent memory
|
|
1136
|
+
if debug: ASCIIColors.yellow(f"Memory truncated to {max_memory_tokens} tokens.")
|
|
1137
|
+
|
|
1138
|
+
# Determine actual chunk size based on remaining space and settings
|
|
1139
|
+
current_chunk_size = bootstrap_chunk_size if chunk_id < bootstrap_steps else chunk_size
|
|
1140
|
+
current_chunk_size = min(current_chunk_size, max_chunk_tokens) # Adjust chunk size based on available space
|
|
1141
|
+
|
|
1142
|
+
end_token_idx = min(start_token_idx + current_chunk_size, total_tokens)
|
|
836
1143
|
chunk_tokens = all_tokens[start_token_idx:end_token_idx]
|
|
837
1144
|
chunk = self.detokenize(chunk_tokens)
|
|
838
|
-
|
|
839
|
-
|
|
1145
|
+
|
|
1146
|
+
chunk_id += 1
|
|
1147
|
+
callback(f"Processing chunk {chunk_id}...", MSG_TYPE.MSG_TYPE_STEP)
|
|
1148
|
+
|
|
840
1149
|
# Generate memory update
|
|
841
|
-
prompt = static_prompt_template.format(
|
|
1150
|
+
prompt = static_prompt_template.format(
|
|
1151
|
+
custom_prompt=chunk_processing_prompt,
|
|
1152
|
+
memory=memory,
|
|
1153
|
+
chunk=chunk,
|
|
1154
|
+
chunk_id=chunk_id
|
|
1155
|
+
)
|
|
842
1156
|
if debug:
|
|
843
|
-
ASCIIColors.
|
|
844
|
-
ASCIIColors.
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
if
|
|
849
|
-
|
|
850
|
-
|
|
1157
|
+
ASCIIColors.magenta(f"--- Chunk {chunk_id} Prompt ---")
|
|
1158
|
+
ASCIIColors.cyan(prompt)
|
|
1159
|
+
|
|
1160
|
+
response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
|
|
1161
|
+
|
|
1162
|
+
if isinstance(response, dict): # Handle generation error
|
|
1163
|
+
ASCIIColors.error(f"Chunk {chunk_id} processing failed: {response.get('error')}")
|
|
1164
|
+
# Option: skip chunk or stop? Let's skip for now.
|
|
1165
|
+
start_token_idx = end_token_idx # Move to next chunk index
|
|
1166
|
+
continue
|
|
1167
|
+
|
|
1168
|
+
memory_code_blocks = self.extract_code_blocks(response, format=chunk_processing_output_format)
|
|
1169
|
+
if memory_code_blocks:
|
|
1170
|
+
memory = memory_code_blocks[0]["content"] # Assume first block is the memory
|
|
1171
|
+
else:
|
|
1172
|
+
# Fallback: Try to extract from the end if the AI added text after the block
|
|
1173
|
+
end_tag = f"```{chunk_processing_output_format}"
|
|
1174
|
+
last_occurrence = response.rfind(end_tag)
|
|
1175
|
+
if last_occurrence != -1:
|
|
1176
|
+
# Extract content between the start and end tags
|
|
1177
|
+
start_tag_len = len(f"```{chunk_processing_output_format}\n") # Approx
|
|
1178
|
+
potential_memory = response[last_occurrence + start_tag_len:].strip()
|
|
1179
|
+
if potential_memory.endswith("```"):
|
|
1180
|
+
potential_memory = potential_memory[:-3].strip()
|
|
1181
|
+
if potential_memory: # Use if non-empty
|
|
1182
|
+
memory = potential_memory
|
|
1183
|
+
else: # If extraction failed, keep old memory or use raw response? Use raw response for now.
|
|
1184
|
+
ASCIIColors.warning(f"Could not extract memory block for chunk {chunk_id}. Using raw response.")
|
|
1185
|
+
memory = response.strip().rstrip('```') # Basic cleanup
|
|
1186
|
+
else:
|
|
1187
|
+
ASCIIColors.warning(f"Could not extract memory block for chunk {chunk_id}. Using raw response.")
|
|
1188
|
+
memory = response.strip().rstrip('```')
|
|
1189
|
+
|
|
1190
|
+
|
|
851
1191
|
if debug:
|
|
852
|
-
ASCIIColors.
|
|
853
|
-
ASCIIColors.
|
|
854
|
-
ASCIIColors.
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
1192
|
+
ASCIIColors.magenta(f"--- Chunk {chunk_id} Updated Memory ---")
|
|
1193
|
+
ASCIIColors.green(memory)
|
|
1194
|
+
ASCIIColors.magenta("----------------------------")
|
|
1195
|
+
|
|
1196
|
+
# Move to next chunk start, considering overlap
|
|
1197
|
+
start_token_idx = max(start_token_idx, end_token_idx - overlap) if overlap>0 and end_token_idx < total_tokens else end_token_idx
|
|
1198
|
+
|
|
1199
|
+
|
|
1200
|
+
# --- Final Aggregation Step ---
|
|
1201
|
+
callback("Aggregating final summary...", MSG_TYPE.MSG_TYPE_STEP)
|
|
1202
|
+
final_prompt_template = f"""{self.system_full_header}
|
|
1203
|
+
You are a memory summarizer assistant.
|
|
861
1204
|
{final_memory_processing_prompt}.
|
|
862
|
-
|
|
863
|
-
Here is
|
|
1205
|
+
{self.user_full_header}
|
|
1206
|
+
Here is the document analysis memory:
|
|
864
1207
|
```{chunk_processing_output_format}
|
|
865
|
-
{memory}
|
|
1208
|
+
{{memory}}
|
|
866
1209
|
```
|
|
867
|
-
The output must be put inside a {final_output_format} markdown tag.
|
|
868
|
-
|
|
869
|
-
|
|
1210
|
+
The final output must be put inside a {final_output_format} markdown tag.
|
|
1211
|
+
{self.ai_full_header}
|
|
1212
|
+
```{final_output_format}
|
|
870
1213
|
"""
|
|
871
|
-
# Truncate memory if needed for final prompt
|
|
872
|
-
example_final_prompt = final_prompt_template
|
|
873
|
-
final_static_tokens = len(self.tokenize(example_final_prompt))
|
|
874
|
-
available_final_tokens = ctx_size - final_static_tokens
|
|
875
|
-
|
|
876
|
-
memory_tokens = self.tokenize(memory)
|
|
877
|
-
if len(memory_tokens) > available_final_tokens:
|
|
878
|
-
memory = self.detokenize(memory_tokens[:available_final_tokens])
|
|
879
|
-
|
|
880
|
-
# Generate final summary
|
|
881
|
-
final_prompt = final_prompt_template
|
|
882
|
-
memory = self.generate_text(final_prompt, streaming_callback=callback)
|
|
883
|
-
code = self.extract_code_blocks(memory)
|
|
884
|
-
if code:
|
|
885
|
-
memory=code[0]["content"]
|
|
886
|
-
return memory
|
|
887
1214
|
|
|
1215
|
+
# Truncate memory if needed for the final prompt
|
|
1216
|
+
final_example_prompt = final_prompt_template.format(memory="<final_memory>")
|
|
1217
|
+
try:
|
|
1218
|
+
final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
|
|
1219
|
+
available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024) # Reserve space for output
|
|
1220
|
+
except RuntimeError as e:
|
|
1221
|
+
ASCIIColors.error(f"Tokenization failed during final setup: {e}")
|
|
1222
|
+
return "Error: Could not calculate final prompt size."
|
|
888
1223
|
|
|
889
|
-
def update_memory_from_file_chunk_prompt(self, file_name, file_chunk_id, global_chunk_id, chunk, memory, memory_template, query, task_prompt):
|
|
890
|
-
return f"""{self.system_full_header}
|
|
891
|
-
You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a markdown memory of findings at each step.
|
|
892
1224
|
|
|
893
|
-
|
|
1225
|
+
memory_tokens = self.tokenize(memory)
|
|
1226
|
+
if len(memory_tokens) > available_final_tokens:
|
|
1227
|
+
memory = self.detokenize(memory_tokens[-available_final_tokens:]) # Keep most recent info
|
|
1228
|
+
if debug: ASCIIColors.yellow(f"Final memory truncated to {available_final_tokens} tokens.")
|
|
894
1229
|
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
```markdown
|
|
901
|
-
{chunk}
|
|
902
|
-
```
|
|
903
|
-
{'Current findings memory (cumulative across all files):' if memory!="" else 'Memory template:'}
|
|
904
|
-
```markdown
|
|
905
|
-
{memory if memory!="" else memory_template}
|
|
906
|
-
```
|
|
907
|
-
{self.user_full_header}
|
|
908
|
-
Query: '{query}'
|
|
909
|
-
Task: {task_prompt}
|
|
910
|
-
Update the markdown memory by adding new information from this chunk relevant to the query. Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
|
|
911
|
-
Ensure the output is valid markdown matching the structure of the provided template.
|
|
912
|
-
Make sure to extract only information relevant to answering the user's query or providing important contextual information.
|
|
913
|
-
Return the updated markdown memory inside a markdown code block.
|
|
914
|
-
{self.ai_full_header}
|
|
915
|
-
"""
|
|
1230
|
+
# Generate final summary
|
|
1231
|
+
final_prompt = final_prompt_template.format(memory=memory)
|
|
1232
|
+
if debug:
|
|
1233
|
+
ASCIIColors.magenta("--- Final Aggregation Prompt ---")
|
|
1234
|
+
ASCIIColors.cyan(final_prompt)
|
|
916
1235
|
|
|
917
|
-
|
|
918
|
-
return f"""{self.system_full_header}
|
|
919
|
-
You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a markdown memory of findings at each step.
|
|
1236
|
+
final_summary_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback)
|
|
920
1237
|
|
|
921
|
-
|
|
1238
|
+
if isinstance(final_summary_raw, dict):
|
|
1239
|
+
ASCIIColors.error(f"Final aggregation failed: {final_summary_raw.get('error')}")
|
|
1240
|
+
return "Error: Final aggregation failed."
|
|
1241
|
+
|
|
1242
|
+
final_code_blocks = self.extract_code_blocks(final_summary_raw, format=final_output_format)
|
|
1243
|
+
if final_code_blocks:
|
|
1244
|
+
final_summary = final_code_blocks[0]["content"]
|
|
1245
|
+
else:
|
|
1246
|
+
# Fallback similar to chunk processing
|
|
1247
|
+
end_tag = f"```{final_output_format}"
|
|
1248
|
+
last_occurrence = final_summary_raw.rfind(end_tag)
|
|
1249
|
+
if last_occurrence != -1:
|
|
1250
|
+
start_tag_len = len(f"```{final_output_format}\n") # Approx
|
|
1251
|
+
potential_summary = final_summary_raw[last_occurrence + start_tag_len:].strip()
|
|
1252
|
+
if potential_summary.endswith("```"):
|
|
1253
|
+
potential_summary = potential_summary[:-3].strip()
|
|
1254
|
+
final_summary = potential_summary if potential_summary else final_summary_raw.strip().rstrip('```')
|
|
1255
|
+
else:
|
|
1256
|
+
final_summary = final_summary_raw.strip().rstrip('```')
|
|
1257
|
+
ASCIIColors.warning("Could not extract final summary block. Using raw response.")
|
|
1258
|
+
|
|
1259
|
+
if debug:
|
|
1260
|
+
ASCIIColors.magenta("--- Final Summary ---")
|
|
1261
|
+
ASCIIColors.green(final_summary)
|
|
1262
|
+
ASCIIColors.magenta("-------------------")
|
|
1263
|
+
|
|
1264
|
+
return final_summary
|
|
922
1265
|
|
|
923
|
-
----
|
|
924
|
-
# Current file: {file_name}
|
|
925
|
-
# Chunk number in this file: {file_chunk_id}
|
|
926
|
-
# Global chunk number: {global_chunk_id}
|
|
927
|
-
# Text chunk:
|
|
928
|
-
```markdown
|
|
929
|
-
{chunk}
|
|
930
|
-
```
|
|
931
|
-
Current findings memory (cumulative across all files):
|
|
932
|
-
```markdown
|
|
933
|
-
{memory}
|
|
934
|
-
```
|
|
935
|
-
{self.user_full_header}
|
|
936
|
-
Query: '{query}'
|
|
937
|
-
{'Start Creating a memory from the text chunk in a format adapted to answer the user Query' if memory=="" else 'Update the markdown memory by adding new information from this chunk relevant to the query.'} Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
|
|
938
|
-
{'Ensure the output is valid markdown matching the structure of the current memory' if memory!='' else 'Ensure the output is valid markdown matching the structure of the provided template.'}
|
|
939
|
-
Make sure to extract only information relevant to answering the user's query or providing important contextual information.
|
|
940
|
-
Return the updated markdown memory inside a markdown code block.
|
|
941
|
-
{self.ai_full_header}
|
|
942
|
-
"""
|
|
943
1266
|
|
|
944
1267
|
def deep_analyze(
|
|
945
1268
|
self,
|
|
946
1269
|
query: str,
|
|
947
1270
|
text: str = None,
|
|
948
|
-
files:
|
|
949
|
-
aggregation_prompt: str =
|
|
1271
|
+
files: Optional[List[Union[str, Path]]] = None,
|
|
1272
|
+
aggregation_prompt: str = "Aggregate the findings from the memory into a coherent answer to the original query.",
|
|
950
1273
|
output_format: str = "markdown",
|
|
951
1274
|
ctx_size: int = None,
|
|
952
1275
|
chunk_size: int = None,
|
|
1276
|
+
overlap: int = None, # Added overlap
|
|
953
1277
|
bootstrap_chunk_size: int = None,
|
|
954
1278
|
bootstrap_steps: int = None,
|
|
955
1279
|
callback=None,
|
|
956
1280
|
debug: bool = False
|
|
957
1281
|
):
|
|
958
1282
|
"""
|
|
959
|
-
Searches for
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
Parameters:
|
|
963
|
-
- query (str): The query to search for.
|
|
964
|
-
- text (str, optional): The input text to search in. Defaults to None.
|
|
965
|
-
- files (list, optional): List of file paths to search in. Defaults to None.
|
|
966
|
-
- task_prompt (str, optional): Prompt for processing each chunk. Defaults to a standard markdown extraction prompt.
|
|
967
|
-
- aggregation_prompt (str, optional): Prompt for aggregating findings. Defaults to None.
|
|
968
|
-
- output_format (str, optional): Output format. Defaults to "markdown".
|
|
969
|
-
- ctx_size (int, optional): Context size for the model. Defaults to None (uses self.ctx_size).
|
|
970
|
-
- chunk_size (int, optional): Size of each chunk. Defaults to None (ctx_size // 4). Smaller chunk sizes yield better results but are slower.
|
|
971
|
-
- bootstrap_chunk_size (int, optional): Size for initial chunks. Defaults to None.
|
|
972
|
-
- bootstrap_steps (int, optional): Number of initial chunks using bootstrap size. Defaults to None.
|
|
973
|
-
- callback (callable, optional): Function called after each chunk. Defaults to None.
|
|
974
|
-
- debug (bool, optional): Enable debug output. Defaults to False.
|
|
975
|
-
|
|
976
|
-
Returns:
|
|
977
|
-
- str: The search findings or aggregated output in the specified format.
|
|
1283
|
+
Searches for information related to a query in long text or files, processing chunk by chunk.
|
|
1284
|
+
(Implementation requires self.tokenize, self.detokenize, self.generate_text, self.extract_code_blocks)
|
|
978
1285
|
"""
|
|
979
|
-
#
|
|
1286
|
+
# ... (Implementation mostly similar to previous version, but needs updates) ...
|
|
1287
|
+
if not callback:
|
|
1288
|
+
callback=self.sink
|
|
1289
|
+
|
|
1290
|
+
# Set defaults and validate input
|
|
980
1291
|
if ctx_size is None:
|
|
981
|
-
ctx_size = self.default_ctx_size
|
|
1292
|
+
ctx_size = self.default_ctx_size or 8192
|
|
982
1293
|
if chunk_size is None:
|
|
983
1294
|
chunk_size = ctx_size // 4
|
|
984
|
-
|
|
985
|
-
|
|
1295
|
+
if overlap is None:
|
|
1296
|
+
overlap = chunk_size // 10
|
|
1297
|
+
if bootstrap_chunk_size is None:
|
|
1298
|
+
bootstrap_chunk_size = chunk_size // 2
|
|
1299
|
+
if bootstrap_steps is None:
|
|
1300
|
+
bootstrap_steps = 2
|
|
1301
|
+
|
|
1302
|
+
if not text and not files:
|
|
1303
|
+
raise ValueError("Either 'text' or 'files' must be provided.")
|
|
1304
|
+
if text and files:
|
|
1305
|
+
ASCIIColors.warning("Both 'text' and 'files' provided. Processing 'files' only.")
|
|
1306
|
+
text = None # Prioritize files if both are given
|
|
1307
|
+
|
|
1308
|
+
# Prepare input texts from files or the single text string
|
|
1309
|
+
all_texts = []
|
|
986
1310
|
if files:
|
|
987
|
-
|
|
1311
|
+
from docling import DocumentConverter # Lazy import
|
|
1312
|
+
converter = DocumentConverter()
|
|
1313
|
+
callback("Loading and converting files...", MSG_TYPE.MSG_TYPE_STEP)
|
|
1314
|
+
for i, file_path in enumerate(files):
|
|
1315
|
+
file_p = Path(file_path)
|
|
1316
|
+
callback(f"Processing file {i+1}/{len(files)}: {file_p.name}", MSG_TYPE.MSG_TYPE_STEP_PROGRESS, {"progress":(i+1)/len(files)*100})
|
|
1317
|
+
try:
|
|
1318
|
+
if file_p.exists():
|
|
1319
|
+
file_content_result = converter.convert(file_p)
|
|
1320
|
+
if file_content_result and file_content_result.document:
|
|
1321
|
+
# Exporting to markdown for consistent processing
|
|
1322
|
+
all_texts.append((str(file_path), file_content_result.document.export_to_markdown()))
|
|
1323
|
+
else:
|
|
1324
|
+
ASCIIColors.error(f"Could not convert file: {file_path}")
|
|
1325
|
+
else:
|
|
1326
|
+
ASCIIColors.error(f"File not found: {file_path}")
|
|
1327
|
+
except Exception as e:
|
|
1328
|
+
ASCIIColors.error(f"Error processing file {file_path}: {e}")
|
|
1329
|
+
trace_exception(e)
|
|
1330
|
+
callback("File processing complete.", MSG_TYPE.MSG_TYPE_STEP_END)
|
|
1331
|
+
|
|
988
1332
|
elif text:
|
|
989
1333
|
all_texts = [("input_text", text)]
|
|
990
|
-
else:
|
|
991
|
-
raise ValueError("Either text or files must be provided.")
|
|
992
1334
|
|
|
993
|
-
|
|
994
|
-
|
|
1335
|
+
if not all_texts:
|
|
1336
|
+
return "Error: No valid text content found to analyze."
|
|
995
1337
|
|
|
996
|
-
# Initialize
|
|
1338
|
+
# Initialize memory and counters
|
|
1339
|
+
memory = ""
|
|
997
1340
|
global_chunk_id = 0
|
|
998
|
-
|
|
999
|
-
# Calculate static prompt tokens
|
|
1000
|
-
example_prompt = self.update_memory_from_file_chunk_prompt_markdown("example.txt","0", "0", "", "", query)
|
|
1001
|
-
static_tokens = len(self.tokenize(example_prompt))
|
|
1002
1341
|
|
|
1003
|
-
#
|
|
1004
|
-
|
|
1005
|
-
|
|
1342
|
+
# Define prompts (can be customized)
|
|
1343
|
+
def update_memory_prompt_template(file_name, file_chunk_id, global_chunk_id, chunk, memory, query):
|
|
1344
|
+
system_header = self.system_full_header
|
|
1345
|
+
user_header = self.user_full_header
|
|
1346
|
+
ai_header = self.ai_full_header
|
|
1347
|
+
mem_header = "Initial memory template:" if not memory else "Current findings memory (cumulative):"
|
|
1348
|
+
|
|
1349
|
+
return f"""{system_header}
|
|
1350
|
+
You are a search assistant processing document chunks to find information relevant to a user query. Update the markdown memory with findings from the current chunk.
|
|
1351
|
+
|
|
1352
|
+
----
|
|
1353
|
+
File: {file_name}
|
|
1354
|
+
Chunk in File: {file_chunk_id}
|
|
1355
|
+
Global Chunk: {global_chunk_id}
|
|
1356
|
+
Text Chunk:
|
|
1357
|
+
```markdown
|
|
1358
|
+
{chunk}
|
|
1359
|
+
```
|
|
1360
|
+
{mem_header}
|
|
1361
|
+
```markdown
|
|
1362
|
+
"""+memory or '# Findings\\n## Key Information\\nDetails relevant to the query...\\n## Context\\nSupporting context...'+f"""
|
|
1363
|
+
```
|
|
1364
|
+
{user_header}
|
|
1365
|
+
Query: '{query}'
|
|
1366
|
+
Task: Update the markdown memory by adding new information from this chunk relevant to the query. Retain prior findings unless contradicted. Only include explicitly relevant details. Return the *entire updated* markdown memory inside a markdown code block.
|
|
1367
|
+
{ai_header}
|
|
1368
|
+
```markdown
|
|
1369
|
+
""" # Start AI response with code block
|
|
1370
|
+
|
|
1371
|
+
# Estimate static prompt size (approximate)
|
|
1372
|
+
example_prompt = update_memory_prompt_template("f.txt", 0, 0, "<chunk>", "<memory>", query)
|
|
1373
|
+
try:
|
|
1374
|
+
static_tokens = len(self.tokenize(example_prompt)) - len(self.tokenize("<chunk>")) - len(self.tokenize("<memory>"))
|
|
1375
|
+
except RuntimeError as e:
|
|
1376
|
+
ASCIIColors.error(f"Tokenization failed during setup: {e}")
|
|
1377
|
+
return "Error: Could not calculate prompt size."
|
|
1378
|
+
|
|
1379
|
+
# Process each text (from file or input)
|
|
1380
|
+
callback("Starting deep analysis...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1381
|
+
for file_path_str, file_text_content in all_texts:
|
|
1382
|
+
file_name = Path(file_path_str).name
|
|
1383
|
+
callback(f"Analyzing: {file_name}", MSG_TYPE.MSG_TYPE_STEP)
|
|
1384
|
+
try:
|
|
1385
|
+
file_tokens = self.tokenize(file_text_content)
|
|
1386
|
+
except RuntimeError as e:
|
|
1387
|
+
ASCIIColors.error(f"Tokenization failed for {file_name}: {e}")
|
|
1388
|
+
continue # Skip this file
|
|
1389
|
+
|
|
1006
1390
|
start_token_idx = 0
|
|
1007
|
-
file_chunk_id = 0
|
|
1391
|
+
file_chunk_id = 0
|
|
1008
1392
|
|
|
1009
1393
|
while start_token_idx < len(file_tokens):
|
|
1010
|
-
# Calculate available
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
)
|
|
1022
|
-
|
|
1023
|
-
|
|
1394
|
+
# Calculate available space dynamically
|
|
1395
|
+
available_tokens_for_dynamic_content = ctx_size - static_tokens - (self.default_n_predict or 1024)
|
|
1396
|
+
if available_tokens_for_dynamic_content <= 100:
|
|
1397
|
+
ASCIIColors.error(f"Context window too small during analysis of {file_name}.")
|
|
1398
|
+
# Option: try truncating memory drastically or break
|
|
1399
|
+
break # Stop processing this file if context is too full
|
|
1400
|
+
|
|
1401
|
+
max_memory_tokens = available_tokens_for_dynamic_content // 2
|
|
1402
|
+
max_chunk_tokens = available_tokens_for_dynamic_content - max_memory_tokens
|
|
1403
|
+
|
|
1404
|
+
# Truncate memory if needed
|
|
1405
|
+
current_memory_tokens = self.tokenize(memory)
|
|
1406
|
+
if len(current_memory_tokens) > max_memory_tokens:
|
|
1407
|
+
memory = self.detokenize(current_memory_tokens[-max_memory_tokens:])
|
|
1408
|
+
if debug: ASCIIColors.yellow(f"Memory truncated (File: {file_name}, Chunk: {file_chunk_id})")
|
|
1409
|
+
|
|
1410
|
+
# Determine chunk size
|
|
1411
|
+
current_chunk_size = bootstrap_chunk_size if global_chunk_id < bootstrap_steps else chunk_size
|
|
1412
|
+
current_chunk_size = min(current_chunk_size, max_chunk_tokens)
|
|
1413
|
+
|
|
1414
|
+
end_token_idx = min(start_token_idx + current_chunk_size, len(file_tokens))
|
|
1024
1415
|
chunk_tokens = file_tokens[start_token_idx:end_token_idx]
|
|
1025
1416
|
chunk = self.detokenize(chunk_tokens)
|
|
1026
1417
|
|
|
1418
|
+
file_chunk_id += 1
|
|
1419
|
+
global_chunk_id += 1
|
|
1420
|
+
callback(f"Processing chunk {file_chunk_id} (Global {global_chunk_id}) of {file_name}", MSG_TYPE.MSG_TYPE_STEP_PROGRESS, {"progress": end_token_idx/len(file_tokens)*100})
|
|
1421
|
+
|
|
1027
1422
|
# Generate updated memory
|
|
1028
|
-
prompt =
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1423
|
+
prompt = update_memory_prompt_template(
|
|
1424
|
+
file_name=file_name,
|
|
1425
|
+
file_chunk_id=file_chunk_id,
|
|
1426
|
+
global_chunk_id=global_chunk_id,
|
|
1427
|
+
chunk=chunk,
|
|
1428
|
+
memory=memory,
|
|
1429
|
+
query=query
|
|
1430
|
+
)
|
|
1035
1431
|
if debug:
|
|
1036
|
-
|
|
1037
|
-
|
|
1432
|
+
ASCIIColors.magenta(f"--- Deep Analysis Prompt (Global Chunk {global_chunk_id}) ---")
|
|
1433
|
+
ASCIIColors.cyan(prompt)
|
|
1434
|
+
|
|
1435
|
+
response = self.generate_text(prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback for streaming output
|
|
1038
1436
|
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1437
|
+
if isinstance(response, dict): # Handle error
|
|
1438
|
+
ASCIIColors.error(f"Chunk processing failed (Global {global_chunk_id}): {response.get('error')}")
|
|
1439
|
+
start_token_idx = end_token_idx # Skip to next chunk index
|
|
1440
|
+
continue
|
|
1441
|
+
|
|
1442
|
+
memory_code_blocks = self.extract_code_blocks(response, format="markdown")
|
|
1443
|
+
if memory_code_blocks:
|
|
1444
|
+
memory = memory_code_blocks[0]["content"]
|
|
1043
1445
|
else:
|
|
1044
|
-
|
|
1446
|
+
# Fallback logic (same as sequential_summarize)
|
|
1447
|
+
end_tag = "```markdown"
|
|
1448
|
+
last_occurrence = response.rfind(end_tag)
|
|
1449
|
+
if last_occurrence != -1:
|
|
1450
|
+
start_tag_len = len("```markdown\n")
|
|
1451
|
+
potential_memory = response[last_occurrence + start_tag_len:].strip()
|
|
1452
|
+
if potential_memory.endswith("```"):
|
|
1453
|
+
potential_memory = potential_memory[:-3].strip()
|
|
1454
|
+
memory = potential_memory if potential_memory else response.strip().rstrip('```')
|
|
1455
|
+
else:
|
|
1456
|
+
memory = response.strip().rstrip('```')
|
|
1457
|
+
ASCIIColors.warning(f"Could not extract memory block for chunk {global_chunk_id}. Using raw response.")
|
|
1458
|
+
|
|
1045
1459
|
|
|
1046
1460
|
if debug:
|
|
1047
|
-
ASCIIColors.
|
|
1048
|
-
ASCIIColors.
|
|
1049
|
-
ASCIIColors.
|
|
1461
|
+
ASCIIColors.magenta(f"--- Updated Memory (After Global Chunk {global_chunk_id}) ---")
|
|
1462
|
+
ASCIIColors.green(memory)
|
|
1463
|
+
ASCIIColors.magenta("-----------------------------------")
|
|
1050
1464
|
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1465
|
+
# Move to next chunk start index with overlap
|
|
1466
|
+
start_token_idx = max(start_token_idx, end_token_idx - overlap) if overlap > 0 and end_token_idx < len(file_tokens) else end_token_idx
|
|
1467
|
+
|
|
1468
|
+
callback(f"Finished analyzing: {file_name}", MSG_TYPE.MSG_TYPE_STEP_END)
|
|
1054
1469
|
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1470
|
+
|
|
1471
|
+
# --- Final Aggregation ---
|
|
1472
|
+
callback("Aggregating final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
|
|
1473
|
+
final_prompt = f"""{self.system_full_header}
|
|
1058
1474
|
You are a search results aggregator.
|
|
1059
1475
|
{self.user_full_header}
|
|
1060
1476
|
{aggregation_prompt}
|
|
1061
|
-
Collected findings (across all
|
|
1477
|
+
Collected findings (across all sources):
|
|
1062
1478
|
```markdown
|
|
1063
1479
|
{memory}
|
|
1064
1480
|
```
|
|
1065
|
-
Provide the final
|
|
1481
|
+
Provide the final aggregated answer in {output_format} format, directly addressing the original query: '{query}'. The final answer must be put inside a {output_format} markdown tag.
|
|
1066
1482
|
{self.ai_full_header}
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
code = self.extract_code_blocks(final_output)
|
|
1070
|
-
return code[0]["content"] if code else final_output
|
|
1071
|
-
return memory
|
|
1483
|
+
```{output_format}
|
|
1484
|
+
""" # Start AI response
|
|
1072
1485
|
|
|
1073
|
-
|
|
1074
|
-
|
|
1486
|
+
# Truncate memory if needed for final prompt (similar logic to sequential_summarize)
|
|
1487
|
+
final_example_prompt = final_prompt.replace("{memory}", "<final_memory>")
|
|
1488
|
+
try:
|
|
1489
|
+
final_static_tokens = len(self.tokenize(final_example_prompt)) - len(self.tokenize("<final_memory>"))
|
|
1490
|
+
available_final_tokens = ctx_size - final_static_tokens - (self.default_n_predict or 1024)
|
|
1491
|
+
except RuntimeError as e:
|
|
1492
|
+
ASCIIColors.error(f"Tokenization failed during final setup: {e}")
|
|
1493
|
+
return "Error: Could not calculate final prompt size."
|
|
1075
1494
|
|
|
1495
|
+
memory_tokens = self.tokenize(memory)
|
|
1496
|
+
if len(memory_tokens) > available_final_tokens:
|
|
1497
|
+
memory = self.detokenize(memory_tokens[-available_final_tokens:])
|
|
1498
|
+
if debug: ASCIIColors.yellow(f"Final memory truncated for aggregation.")
|
|
1076
1499
|
|
|
1500
|
+
final_prompt = final_prompt.format(memory=memory) # Format with potentially truncated memory
|
|
1077
1501
|
|
|
1078
|
-
if
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
#lc = LollmsClient(model_name="gpt-3.5-turbo-0125", default_generation_mode=ELF_GENERATION_FORMAT.OPENAI)
|
|
1082
|
-
print(lc.listModels())
|
|
1083
|
-
code = lc.generate_code("Build a simple json that containes name and age. put the output inside a json markdown tag")
|
|
1084
|
-
print(code)
|
|
1502
|
+
if debug:
|
|
1503
|
+
ASCIIColors.magenta("--- Final Aggregation Prompt ---")
|
|
1504
|
+
ASCIIColors.cyan(final_prompt)
|
|
1085
1505
|
|
|
1086
|
-
|
|
1087
|
-
Hello world thinking!
|
|
1088
|
-
How you doing?
|
|
1089
|
-
|
|
1090
|
-
</thinking>
|
|
1091
|
-
This is no thinking
|
|
1506
|
+
final_output_raw = self.generate_text(final_prompt, n_predict=(self.default_n_predict or 1024), streaming_callback=callback) # Use main callback
|
|
1092
1507
|
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
</think>
|
|
1508
|
+
if isinstance(final_output_raw, dict):
|
|
1509
|
+
ASCIIColors.error(f"Final aggregation failed: {final_output_raw.get('error')}")
|
|
1510
|
+
callback("Aggregation failed.", MSG_TYPE.MSG_TYPE_STEP_END, {'status':False})
|
|
1511
|
+
return "Error: Final aggregation failed."
|
|
1098
1512
|
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1513
|
+
final_code_blocks = self.extract_code_blocks(final_output_raw, format=output_format)
|
|
1514
|
+
if final_code_blocks:
|
|
1515
|
+
final_output = final_code_blocks[0]["content"]
|
|
1516
|
+
else:
|
|
1517
|
+
# Fallback logic
|
|
1518
|
+
end_tag = f"```{output_format}"
|
|
1519
|
+
last_occurrence = final_output_raw.rfind(end_tag)
|
|
1520
|
+
if last_occurrence != -1:
|
|
1521
|
+
start_tag_len = len(f"```{output_format}\n")
|
|
1522
|
+
potential_output = final_output_raw[last_occurrence + start_tag_len:].strip()
|
|
1523
|
+
if potential_output.endswith("```"):
|
|
1524
|
+
potential_output = potential_output[:-3].strip()
|
|
1525
|
+
final_output = potential_output if potential_output else final_output_raw.strip().rstrip('```')
|
|
1526
|
+
else:
|
|
1527
|
+
final_output = final_output_raw.strip().rstrip('```')
|
|
1528
|
+
ASCIIColors.warning("Could not extract final output block. Using raw response.")
|
|
1529
|
+
|
|
1530
|
+
|
|
1531
|
+
if debug:
|
|
1532
|
+
ASCIIColors.magenta("--- Final Aggregated Output ---")
|
|
1533
|
+
ASCIIColors.green(final_output)
|
|
1534
|
+
ASCIIColors.magenta("-----------------------------")
|
|
1535
|
+
|
|
1536
|
+
callback("Deep analysis complete.", MSG_TYPE.MSG_TYPE_STEP_END)
|
|
1537
|
+
return final_output
|
|
1538
|
+
|
|
1539
|
+
def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):
|
|
1540
|
+
"""
|
|
1541
|
+
Chunks text based on token count.
|
|
1542
|
+
|
|
1543
|
+
Args:
|
|
1544
|
+
text (str): The text to chunk.
|
|
1545
|
+
tokenizer (callable): Function to tokenize text.
|
|
1546
|
+
detokenizer (callable): Function to detokenize tokens.
|
|
1547
|
+
chunk_size (int): The desired number of tokens per chunk.
|
|
1548
|
+
overlap (int): The number of tokens to overlap between chunks.
|
|
1549
|
+
use_separators (bool): If True, tries to chunk at natural separators (paragraphs, sentences).
|
|
1550
|
+
|
|
1551
|
+
Returns:
|
|
1552
|
+
List[str]: A list of text chunks.
|
|
1553
|
+
"""
|
|
1554
|
+
tokens = tokenizer(text)
|
|
1555
|
+
chunks = []
|
|
1556
|
+
start_idx = 0
|
|
1557
|
+
|
|
1558
|
+
if not use_separators:
|
|
1559
|
+
while start_idx < len(tokens):
|
|
1560
|
+
end_idx = min(start_idx + chunk_size, len(tokens))
|
|
1561
|
+
chunks.append(detokenizer(tokens[start_idx:end_idx]))
|
|
1562
|
+
start_idx += chunk_size - overlap
|
|
1563
|
+
if start_idx >= len(tokens): # Ensure last chunk is added correctly
|
|
1564
|
+
break
|
|
1565
|
+
start_idx = max(0, start_idx) # Prevent negative index
|
|
1566
|
+
else:
|
|
1567
|
+
# Find potential separator positions (more robust implementation needed)
|
|
1568
|
+
# This is a basic example using paragraphs first, then sentences.
|
|
1569
|
+
import re
|
|
1570
|
+
separators = ["\n\n", "\n", ". ", "? ", "! "] # Order matters
|
|
1571
|
+
|
|
1572
|
+
current_pos = 0
|
|
1573
|
+
while current_pos < len(text):
|
|
1574
|
+
# Determine target end position based on tokens
|
|
1575
|
+
target_end_token = min(start_idx + chunk_size, len(tokens))
|
|
1576
|
+
target_end_char_approx = len(detokenizer(tokens[:target_end_token])) # Approximate char position
|
|
1577
|
+
|
|
1578
|
+
best_sep_pos = -1
|
|
1579
|
+
# Try finding a good separator near the target end
|
|
1580
|
+
for sep in separators:
|
|
1581
|
+
# Search backwards from the approximate character position
|
|
1582
|
+
search_start = max(current_pos, target_end_char_approx - chunk_size // 2) # Search in a reasonable window
|
|
1583
|
+
sep_pos = text.rfind(sep, search_start, target_end_char_approx + len(sep))
|
|
1584
|
+
if sep_pos > current_pos: # Found a separator after the current start
|
|
1585
|
+
best_sep_pos = max(best_sep_pos, sep_pos + len(sep)) # Take the latest separator found
|
|
1586
|
+
|
|
1587
|
+
# If no good separator found, just cut at token limit
|
|
1588
|
+
if best_sep_pos == -1 or best_sep_pos <= current_pos:
|
|
1589
|
+
end_idx = target_end_token
|
|
1590
|
+
end_char = len(detokenizer(tokens[:end_idx])) if end_idx < len(tokens) else len(text)
|
|
1591
|
+
else:
|
|
1592
|
+
end_char = best_sep_pos
|
|
1593
|
+
end_idx = len(tokenizer(text[:end_char])) # Re-tokenize to find token index
|
|
1594
|
+
|
|
1595
|
+
|
|
1596
|
+
chunk_text_str = text[current_pos:end_char]
|
|
1597
|
+
chunks.append(chunk_text_str)
|
|
1598
|
+
|
|
1599
|
+
# Move to next chunk start, considering overlap in characters
|
|
1600
|
+
overlap_char_approx = len(detokenizer(tokens[:overlap])) # Approx overlap chars
|
|
1601
|
+
next_start_char = max(current_pos, end_char - overlap_char_approx)
|
|
1602
|
+
|
|
1603
|
+
# Try to align next start with a separator too for cleaner breaks
|
|
1604
|
+
best_next_start_sep = next_start_char
|
|
1605
|
+
for sep in separators:
|
|
1606
|
+
sep_pos = text.find(sep, next_start_char)
|
|
1607
|
+
if sep_pos != -1:
|
|
1608
|
+
best_next_start_sep = min(best_next_start_sep, sep_pos+len(sep)) if best_next_start_sep!=next_start_char else sep_pos+len(sep) # Find earliest separator after overlap point
|
|
1609
|
+
|
|
1610
|
+
current_pos = best_next_start_sep if best_next_start_sep > next_start_char else next_start_char
|
|
1611
|
+
start_idx = len(tokenizer(text[:current_pos])) # Update token index for next iteration
|
|
1612
|
+
|
|
1613
|
+
|
|
1614
|
+
if current_pos >= len(text):
|
|
1615
|
+
break
|
|
1616
|
+
|
|
1617
|
+
return chunks
|