lollms-client 0.10.0__tar.gz → 0.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

Files changed (39) hide show
  1. {lollms_client-0.10.0 → lollms_client-0.11.0}/PKG-INFO +11 -2
  2. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/llm_bindings/lollms/__init__.py +1 -0
  3. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/llm_bindings/ollama/__init__.py +5 -1
  4. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/llm_bindings/openai/__init__.py +1 -0
  5. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/llm_bindings/transformers/__init__.py +67 -71
  6. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_core.py +276 -208
  7. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client.egg-info/PKG-INFO +11 -2
  8. {lollms_client-0.10.0 → lollms_client-0.11.0}/setup.py +1 -1
  9. {lollms_client-0.10.0 → lollms_client-0.11.0}/LICENSE +0 -0
  10. {lollms_client-0.10.0 → lollms_client-0.11.0}/README.md +0 -0
  11. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/__init__.py +0 -0
  12. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/llm_bindings/__init__.py +0 -0
  13. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_config.py +0 -0
  14. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_discussion.py +0 -0
  15. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_functions.py +0 -0
  16. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_js_analyzer.py +0 -0
  17. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_llm_binding.py +0 -0
  18. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_personality.py +0 -0
  19. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_personality_worker.py +0 -0
  20. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_python_analyzer.py +0 -0
  21. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_stt.py +0 -0
  22. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_tasks.py +0 -0
  23. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_tti.py +0 -0
  24. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_tts.py +0 -0
  25. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_types.py +0 -0
  26. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/lollms_utilities.py +0 -0
  27. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/stt_bindings/__init__.py +0 -0
  28. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/stt_bindings/lollms/__init__.py +0 -0
  29. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/tti_bindings/__init__.py +0 -0
  30. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/tti_bindings/lollms/__init__.py +0 -0
  31. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/tts_bindings/__init__.py +0 -0
  32. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/tts_bindings/lollms/__init__.py +0 -0
  33. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/ttv_bindings/__init__.py +0 -0
  34. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client/ttv_bindings/lollms/__init__.py +0 -0
  35. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client.egg-info/SOURCES.txt +0 -0
  36. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client.egg-info/dependency_links.txt +0 -0
  37. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client.egg-info/requires.txt +0 -0
  38. {lollms_client-0.10.0 → lollms_client-0.11.0}/lollms_client.egg-info/top_level.txt +0 -0
  39. {lollms_client-0.10.0 → lollms_client-0.11.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.10.0
3
+ Version: 0.11.0
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Home-page: https://github.com/ParisNeo/lollms_client
6
6
  Author: ParisNeo
@@ -11,6 +11,15 @@ Classifier: Operating System :: OS Independent
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
13
  Requires-Dist: requests
14
+ Dynamic: author
15
+ Dynamic: author-email
16
+ Dynamic: classifier
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license-file
21
+ Dynamic: requires-dist
22
+ Dynamic: summary
14
23
 
15
24
  # lollms_client
16
25
 
@@ -54,6 +54,7 @@ class LollmsLLMBinding(LollmsLLMBinding):
54
54
  repeat_last_n: int = 40,
55
55
  seed: Optional[int] = None,
56
56
  n_threads: int = 8,
57
+ ctx_size: int | None = None,
57
58
  streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
58
59
  """
59
60
  Generate text using the LOLLMS service, with optional image support.
@@ -54,6 +54,7 @@ class OllamaBinding(LollmsLLMBinding):
54
54
  repeat_last_n: int = 40,
55
55
  seed: Optional[int] = None,
56
56
  n_threads: int = 8,
57
+ ctx_size: int | None = None,
57
58
  streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
58
59
  """
59
60
  Generate text using the Ollama service, with optional image support.
@@ -111,8 +112,10 @@ class OllamaBinding(LollmsLLMBinding):
111
112
  }],
112
113
  "stream": stream,
113
114
  "temperature": float(temperature),
114
- "max_tokens": n_predict
115
+ "max_tokens": n_predict,
115
116
  }
117
+ if ctx_size is not None:
118
+ data["num_ctx"] = ctx_size
116
119
  url = f'{host_address}/api/chat'
117
120
  else:
118
121
  # Text-only generation using /api/generate endpoint
@@ -265,6 +268,7 @@ class OllamaBinding(LollmsLLMBinding):
265
268
  }
266
269
  response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
267
270
  try:
271
+ ASCIIColors.debug("Listing ollama models")
268
272
  data = response.json()
269
273
  model_info = []
270
274
 
@@ -62,6 +62,7 @@ class OpenAIBinding(LollmsLLMBinding):
62
62
  repeat_last_n: int = 40,
63
63
  seed: Optional[int] = None,
64
64
  n_threads: int = 8,
65
+ ctx_size: int | None = None,
65
66
  streaming_callback: Optional[Callable[[str, str], None]] = None) -> str:
66
67
  """
67
68
  Generate text based on the provided prompt and parameters.
@@ -11,12 +11,12 @@ from ascii_colors import ASCIIColors
11
11
  import pipmaster as pm
12
12
  if not pm.is_installed("torch"):
13
13
  ASCIIColors.yellow("Diffusers: Torch not found. Installing it")
14
- pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
14
+ pm.install_multiple(["torch", "torchvision", "torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
15
15
 
16
16
  import torch
17
17
  if not torch.cuda.is_available():
18
18
  ASCIIColors.yellow("Diffusers: Torch not using cuda. Reinstalling it")
19
- pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
19
+ pm.install_multiple(["torch", "torchvision", "torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
20
20
  import torch
21
21
 
22
22
  if not pm.is_installed("transformers"):
@@ -26,6 +26,7 @@ BindingName = "TransformersBinding"
26
26
 
27
27
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, BitsAndBytesConfig
28
28
  from packaging import version
29
+ import transformers
29
30
 
30
31
  class TransformersBinding(LollmsLLMBinding):
31
32
  """Transformers-specific binding implementation"""
@@ -35,7 +36,8 @@ class TransformersBinding(LollmsLLMBinding):
35
36
  model_name: str = "",
36
37
  service_key: str = None,
37
38
  verify_ssl_certificate: bool = True,
38
- default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat):
39
+ default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
40
+ prompt_template: Optional[str] = None):
39
41
  """
40
42
  Initialize the Transformers binding.
41
43
 
@@ -45,6 +47,7 @@ class TransformersBinding(LollmsLLMBinding):
45
47
  service_key (str): Authentication key for the service. Defaults to None.
46
48
  verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
47
49
  default_completion_format (ELF_COMPLETION_FORMAT): Default format for completions.
50
+ prompt_template (Optional[str]): Custom prompt template. If None, inferred from model.
48
51
  """
49
52
  super().__init__(
50
53
  host_address=host_address,
@@ -76,6 +79,9 @@ class TransformersBinding(LollmsLLMBinding):
76
79
 
77
80
  self.generation_config = GenerationConfig.from_pretrained(str(model_name))
78
81
 
82
+ # Infer or set prompt template
83
+ self.prompt_template = prompt_template if prompt_template else self._infer_prompt_template(model_name)
84
+
79
85
  # Display device information
80
86
  device = next(self.model.parameters()).device
81
87
  device_type = "CPU" if device.type == "cpu" else "GPU"
@@ -86,26 +92,47 @@ class TransformersBinding(LollmsLLMBinding):
86
92
  [ASCIIColors.color_green, ASCIIColors.color_blue if device_type == "GPU" else ASCIIColors.color_red]
87
93
  )
88
94
 
95
+ def _infer_prompt_template(self, model_name: str) -> str:
96
+ """
97
+ Infer the prompt template based on the model name.
98
+
99
+ Args:
100
+ model_name (str): Name of the model.
101
+
102
+ Returns:
103
+ str: The inferred prompt template format string.
104
+ """
105
+ model_name = model_name.lower()
106
+ if "llama-2" in model_name or "llama" in model_name:
107
+ return "[INST] <<SYS>> {system_prompt} <</SYS>> {user_prompt} [/INST]"
108
+ elif "gpt" in model_name:
109
+ return "{system_prompt}\n{user_prompt}" # Simple concatenation for GPT-style models
110
+ else:
111
+ # Default to a basic chat format
112
+ ASCIIColors.yellow(f"Warning: No specific template found for {model_name}. Using default chat format.")
113
+ return "[INST] {system_prompt}\n{user_prompt} [/INST]"
89
114
 
90
115
  def generate_text(self,
91
- prompt: str,
92
- images: Optional[List[str]] = None,
93
- n_predict: Optional[int] = None,
94
- stream: bool = False,
95
- temperature: float = 0.1,
96
- top_k: int = 50,
97
- top_p: float = 0.95,
98
- repeat_penalty: float = 0.8,
99
- repeat_last_n: int = 40,
100
- seed: Optional[int] = None,
101
- n_threads: int = 8,
102
- streaming_callback: Optional[Callable[[str, str], None]] = None,
103
- return_legacy_cache: bool = False) -> Union[str, dict]:
116
+ prompt: str,
117
+ images: Optional[List[str]] = None,
118
+ n_predict: Optional[int] = None,
119
+ stream: bool = False,
120
+ temperature: float = 0.1,
121
+ top_k: int = 50,
122
+ top_p: float = 0.95,
123
+ repeat_penalty: float = 0.8,
124
+ repeat_last_n: int = 40,
125
+ seed: Optional[int] = None,
126
+ n_threads: int = 8,
127
+ ctx_size: int | None = None,
128
+ streaming_callback: Optional[Callable[[str, str], None]] = None,
129
+ return_legacy_cache: bool = False,
130
+ system_prompt: str = "You are a helpful assistant.") -> Union[str, dict]:
104
131
  """
105
132
  Generate text using the Transformers model, with optional image support.
106
133
 
107
134
  Args:
108
- prompt (str): The input prompt for text generation.
135
+ prompt (str): The input prompt for text generation (user prompt).
109
136
  images (Optional[List[str]]): List of image file paths for multimodal generation.
110
137
  n_predict (Optional[int]): Maximum number of tokens to generate.
111
138
  stream (bool): Whether to stream the output. Defaults to False.
@@ -118,6 +145,7 @@ class TransformersBinding(LollmsLLMBinding):
118
145
  n_threads (int): Number of threads to use. Defaults to 8.
119
146
  streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
120
147
  return_legacy_cache (bool): Whether to use legacy cache format (pre-v4.47). Defaults to False.
148
+ system_prompt (str): System prompt to set model behavior. Defaults to "You are a helpful assistant."
121
149
 
122
150
  Returns:
123
151
  Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
@@ -130,6 +158,12 @@ class TransformersBinding(LollmsLLMBinding):
130
158
  if seed is not None:
131
159
  torch.manual_seed(seed)
132
160
 
161
+ # Apply the prompt template
162
+ formatted_prompt = self.prompt_template.format(
163
+ system_prompt=system_prompt,
164
+ user_prompt=prompt
165
+ )
166
+
133
167
  # Prepare generation config
134
168
  self.generation_config.max_new_tokens = n_predict if n_predict else 2048
135
169
  self.generation_config.temperature = temperature
@@ -139,14 +173,14 @@ class TransformersBinding(LollmsLLMBinding):
139
173
  self.generation_config.pad_token_id = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id is not None else self.tokenizer.eos_token_id
140
174
 
141
175
  # Tokenize input with attention mask
142
- inputs = self.tokenizer(prompt, return_tensors="pt", padding=True)
176
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt", padding=True)
143
177
  input_ids = inputs.input_ids.to(self.model.device)
144
178
  attention_mask = inputs.attention_mask.to(self.model.device)
145
179
 
146
180
  # Handle image input if provided (basic implementation)
147
181
  if images and len(images) > 0:
148
182
  ASCIIColors.yellow("Warning: Image processing not fully implemented in this binding")
149
- prompt += "\n[Image content not processed]"
183
+ formatted_prompt += "\n[Image content not processed]"
150
184
 
151
185
  # Check transformers version for cache handling
152
186
  use_legacy_cache = return_legacy_cache or version.parse(transformers.__version__) < version.parse("4.47.0")
@@ -169,10 +203,8 @@ class TransformersBinding(LollmsLLMBinding):
169
203
  ):
170
204
  # Handle different output formats based on version/cache setting
171
205
  if use_legacy_cache:
172
- # Legacy format: tuple of (sequences, scores, ...)
173
206
  sequences = output[0]
174
207
  else:
175
- # New format: Cache instance
176
208
  sequences = output.sequences
177
209
 
178
210
  # Decode the new tokens
@@ -212,70 +244,34 @@ class TransformersBinding(LollmsLLMBinding):
212
244
  error_msg = f"Error generating text: {str(e)}"
213
245
  ASCIIColors.red(error_msg)
214
246
  return {"status": "error", "error": error_msg}
215
-
216
-
217
247
 
218
248
  def tokenize(self, text: str) -> list:
219
- """
220
- Tokenize the input text into a list of characters.
221
-
222
- Args:
223
- text (str): The text to tokenize.
224
-
225
- Returns:
226
- list: List of individual characters.
227
- """
249
+ """Tokenize the input text into a list of characters."""
228
250
  return list(text)
229
251
 
230
252
  def detokenize(self, tokens: list) -> str:
231
- """
232
- Convert a list of tokens back to text.
233
-
234
- Args:
235
- tokens (list): List of tokens (characters) to detokenize.
236
-
237
- Returns:
238
- str: Detokenized text.
239
- """
253
+ """Convert a list of tokens back to text."""
240
254
  return "".join(tokens)
255
+
241
256
  def embed(self, text: str, **kwargs) -> list:
242
- """
243
- Get embeddings for the input text using Ollama API
244
-
245
- Args:
246
- text (str or List[str]): Input text to embed
247
- **kwargs: Additional arguments like model, truncate, options, keep_alive
248
-
249
- Returns:
250
- dict: Response containing embeddings
251
- """
252
- pass
257
+ """Get embeddings for the input text (placeholder)."""
258
+ pass
259
+
253
260
  def get_model_info(self) -> dict:
254
- """
255
- Return information about the current Ollama model.
256
-
257
- Returns:
258
- dict: Dictionary containing model name, version, and host address.
259
- """
261
+ """Return information about the current model."""
260
262
  return {
261
- "name": "ollama",
262
- "version": "2.0",
263
+ "name": "transformers",
264
+ "version": transformers.__version__,
263
265
  "host_address": self.host_address,
264
266
  "model_name": self.model_name
265
267
  }
268
+
266
269
  def listModels(self):
267
- """ Lists available models """
270
+ """Lists available models (placeholder)."""
268
271
  pass
272
+
269
273
  def load_model(self, model_name: str) -> bool:
270
- """
271
- Load a specific model into the Ollama binding.
272
-
273
- Args:
274
- model_name (str): Name of the model to load.
275
-
276
- Returns:
277
- bool: True if model loaded successfully.
278
- """
274
+ """Load a specific model into the binding."""
279
275
  self.model = model_name
280
276
  self.model_name = model_name
281
- return True
277
+ return True
@@ -13,7 +13,6 @@ import numpy as np
13
13
  import pipmaster as pm
14
14
  from pathlib import Path
15
15
  import os
16
-
17
16
 
18
17
  class LollmsClient():
19
18
  """Core class for interacting with LOLLMS bindings"""
@@ -212,6 +211,7 @@ class LollmsClient():
212
211
  repeat_last_n: Optional[int] = None,
213
212
  seed: Optional[int] = None,
214
213
  n_threads: Optional[int] = None,
214
+ ctx_size: int | None = None,
215
215
  streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> str:
216
216
  """
217
217
  Generate text using the active binding, using instance defaults if parameters are not provided.
@@ -248,6 +248,7 @@ class LollmsClient():
248
248
  repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
249
249
  seed=seed if seed is not None else self.default_seed,
250
250
  n_threads=n_threads if n_threads is not None else self.default_n_threads,
251
+ ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
251
252
  streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
252
253
  )
253
254
 
@@ -257,7 +258,7 @@ class LollmsClient():
257
258
 
258
259
 
259
260
  def listModels(self):
260
- self.binding.listModels()
261
+ return self.binding.listModels()
261
262
 
262
263
 
263
264
 
@@ -380,103 +381,135 @@ Do not split the code in multiple tags.
380
381
  else:
381
382
  return None
382
383
 
383
- def extract_code_blocks(self, text: str) -> List[dict]:
384
+ def extract_code_blocks(self, text: str, format: str = "markdown") -> List[dict]:
384
385
  """
385
- This function extracts code blocks from a given text.
386
+ Extracts code blocks from text in Markdown or HTML format.
386
387
 
387
388
  Parameters:
388
- text (str): The text from which to extract code blocks. Code blocks are identified by triple backticks (```).
389
+ text (str): The text to extract code blocks from.
390
+ format (str): The format of code blocks ("markdown" for ``` or "html" for <code class="">).
389
391
 
390
392
  Returns:
391
- List[dict]: A list of dictionaries where each dictionary represents a code block and contains the following keys:
392
- - 'index' (int): The index of the code block in the text.
393
- - 'file_name' (str): The name of the file extracted from the preceding line, if available.
394
- - 'content' (str): The content of the code block.
395
- - 'type' (str): The type of the code block. If the code block starts with a language specifier (like 'python' or 'java'), this field will contain that specifier. Otherwise, it will be set to 'language-specific'.
396
- - 'is_complete' (bool): True if the block has a closing tag, False otherwise.
397
-
398
- Note:
399
- The function assumes that the number of triple backticks in the text is even.
400
- If the number of triple backticks is odd, it will consider the rest of the text as the last code block.
401
- """
393
+ List[dict]: A list of dictionaries with:
394
+ - 'index' (int): Index of the code block.
395
+ - 'file_name' (str): File name from preceding text, if available.
396
+ - 'content' (str): Code block content.
397
+ - 'type' (str): Language type (from Markdown first line or HTML class).
398
+ - 'is_complete' (bool): True if block has a closing tag.
399
+ """
400
+ code_blocks = []
402
401
  remaining = text
403
- bloc_index = 0
404
402
  first_index = 0
405
403
  indices = []
406
- while len(remaining) > 0:
407
- try:
408
- index = remaining.index("```")
409
- indices.append(index + first_index)
410
- remaining = remaining[index + 3:]
411
- first_index += index + 3
412
- bloc_index += 1
413
- except Exception as ex:
414
- if bloc_index % 2 == 1:
415
- index = len(remaining)
416
- indices.append(index)
417
- remaining = ""
418
404
 
419
- code_blocks = []
420
- is_start = True
421
- for index, code_delimiter_position in enumerate(indices):
405
+ if format.lower() == "markdown":
406
+ # Markdown: Find triple backtick positions
407
+ while remaining:
408
+ try:
409
+ index = remaining.index("```")
410
+ indices.append(index + first_index)
411
+ remaining = remaining[index + 3:]
412
+ first_index += index + 3
413
+ except ValueError:
414
+ if len(indices) % 2 == 1: # Odd number of delimiters
415
+ indices.append(first_index + len(remaining))
416
+ break
417
+
418
+ elif format.lower() == "html":
419
+ # HTML: Find <code> and </code> positions, handling nested tags
420
+ while remaining:
421
+ try:
422
+ # Look for opening <code> tag
423
+ start_index = remaining.index("<code")
424
+ end_of_opening = remaining.index(">", start_index)
425
+ indices.append(start_index + first_index)
426
+ opening_tag = remaining[start_index:end_of_opening + 1]
427
+ remaining = remaining[end_of_opening + 1:]
428
+ first_index += end_of_opening + 1
429
+
430
+ # Look for matching </code>, accounting for nested <code>
431
+ nest_level = 0
432
+ temp_index = 0
433
+ while temp_index < len(remaining):
434
+ if remaining[temp_index:].startswith("<code"):
435
+ nest_level += 1
436
+ temp_index += remaining[temp_index:].index(">") + 1
437
+ elif remaining[temp_index:].startswith("</code>"):
438
+ if nest_level == 0:
439
+ indices.append(first_index + temp_index)
440
+ remaining = remaining[temp_index + len("</code>"):]
441
+ first_index += temp_index + len("</code>")
442
+ break
443
+ nest_level -= 1
444
+ temp_index += len("</code>")
445
+ else:
446
+ temp_index += 1
447
+ else:
448
+ indices.append(first_index + len(remaining))
449
+ break
450
+ except ValueError:
451
+ break
452
+
453
+ else:
454
+ raise ValueError("Format must be 'markdown' or 'html'")
455
+
456
+ for i in range(0, len(indices), 2):
422
457
  block_infos = {
423
- 'index': index,
458
+ 'index': i // 2,
424
459
  'file_name': "",
425
- 'section': "",
426
460
  'content': "",
427
- 'type': "",
461
+ 'type': 'language-specific',
428
462
  'is_complete': False
429
463
  }
430
- if is_start:
431
- # Check the preceding line for file name
432
- preceding_text = text[:code_delimiter_position].strip().splitlines()
433
- if preceding_text:
434
- last_line = preceding_text[-1].strip()
435
- if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
436
- file_name = last_line[len("<file_name>"):-len("</file_name>")].strip()
437
- block_infos['file_name'] = file_name
438
- elif last_line.startswith("## filename:"):
439
- file_name = last_line[len("## filename:"):].strip()
440
- block_infos['file_name'] = file_name
441
- if last_line.startswith("<section>") and last_line.endswith("</section>"):
442
- section = last_line[len("<section>"):-len("</section>")].strip()
443
- block_infos['section'] = section
444
-
445
- sub_text = text[code_delimiter_position + 3:]
446
- if len(sub_text) > 0:
447
- try:
448
- find_space = sub_text.index(" ")
449
- except:
450
- find_space = int(1e10)
451
- try:
452
- find_return = sub_text.index("\n")
453
- except:
454
- find_return = int(1e10)
455
- next_index = min(find_return, find_space)
456
- if '{' in sub_text[:next_index]:
457
- next_index = 0
458
- start_pos = next_index
459
- if code_delimiter_position + 3 < len(text) and text[code_delimiter_position + 3] in ["\n", " ", "\t"]:
460
- block_infos["type"] = 'language-specific'
461
- else:
462
- block_infos["type"] = sub_text[:next_index]
463
464
 
464
- if index + 1 < len(indices):
465
- next_pos = indices[index + 1] - code_delimiter_position
466
- if next_pos - 3 < len(sub_text) and sub_text[next_pos - 3] == "`":
467
- block_infos["content"] = sub_text[start_pos:next_pos - 3].strip()
468
- block_infos["is_complete"] = True
469
- else:
470
- block_infos["content"] = sub_text[start_pos:next_pos].strip()
471
- block_infos["is_complete"] = False
472
- else:
473
- block_infos["content"] = sub_text[start_pos:].strip()
474
- block_infos["is_complete"] = False
475
- code_blocks.append(block_infos)
476
- is_start = False
477
- else:
478
- is_start = True
479
- continue
465
+ # Extract preceding text for file name
466
+ start_pos = indices[i]
467
+ preceding_text = text[:start_pos].strip().splitlines()
468
+ if preceding_text:
469
+ last_line = preceding_text[-1].strip()
470
+ if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
471
+ block_infos['file_name'] = last_line[len("<file_name>"):-len("</file_name>")].strip()
472
+ elif last_line.startswith("## filename:"):
473
+ block_infos['file_name'] = last_line[len("## filename:"):].strip()
474
+
475
+ # Extract content and type
476
+ if format.lower() == "markdown":
477
+ sub_text = text[start_pos + 3:]
478
+ if i + 1 < len(indices):
479
+ end_pos = indices[i + 1]
480
+ content = text[start_pos + 3:end_pos].strip()
481
+ block_infos['is_complete'] = True
482
+ else:
483
+ content = sub_text.strip()
484
+ block_infos['is_complete'] = False
485
+
486
+ if content:
487
+ first_line = content.split('\n', 1)[0].strip()
488
+ if first_line and not first_line.startswith(('{', ' ', '\t')):
489
+ block_infos['type'] = first_line
490
+ content = content[len(first_line):].strip()
491
+
492
+ elif format.lower() == "html":
493
+ opening_tag = text[start_pos:text.index(">", start_pos) + 1]
494
+ sub_text = text[start_pos + len(opening_tag):]
495
+ if i + 1 < len(indices):
496
+ end_pos = indices[i + 1]
497
+ content = text[start_pos + len(opening_tag):end_pos].strip()
498
+ block_infos['is_complete'] = True
499
+ else:
500
+ content = sub_text.strip()
501
+ block_infos['is_complete'] = False
502
+
503
+ # Extract language from class attribute
504
+ if 'class="' in opening_tag:
505
+ class_start = opening_tag.index('class="') + len('class="')
506
+ class_end = opening_tag.index('"', class_start)
507
+ class_value = opening_tag[class_start:class_end]
508
+ if class_value.startswith("language-"):
509
+ block_infos['type'] = class_value[len("language-"):]
510
+
511
+ block_infos['content'] = content
512
+ code_blocks.append(block_infos)
480
513
 
481
514
  return code_blocks
482
515
 
@@ -852,156 +885,191 @@ The updated memory must be put in a {chunk_processing_output_format} markdown ta
852
885
  memory=code[0]["content"]
853
886
  return memory
854
887
 
855
- def deep_analyze(
856
- self,
857
- query: str,
858
- text: str = None,
859
- files: list = None,
860
- search_prompt: str = "Extract information related to the query from the current text chunk and update the memory with new findings.",
861
- aggregation_prompt: str = None,
862
- output_format: str = "markdown",
863
- ctx_size: int = None,
864
- chunk_size: int = None,
865
- bootstrap_chunk_size: int = None,
866
- bootstrap_steps: int = None,
867
- callback=None,
868
- debug: bool = False
869
- ):
870
- """
871
- Searches for specific information related to a query in a long text or a list of files.
872
- Processes the input in chunks, updates a memory with relevant findings, and optionally aggregates them.
873
-
874
- Parameters:
875
- - query (str): The query to search for.
876
- - text (str, optional): The input text to search in. Defaults to None.
877
- - files (list, optional): List of file paths to search in. Defaults to None.
878
- - search_prompt (str, optional): Prompt for processing each chunk. Defaults to a standard extraction prompt.
879
- - aggregation_prompt (str, optional): Prompt for aggregating findings. Defaults to None.
880
- - output_format (str, optional): Output format. Defaults to "markdown".
881
- - ctx_size (int, optional): Context size for the model. Defaults to None (uses self.ctx_size).
882
- - chunk_size (int, optional): Size of each chunk. Defaults to None (ctx_size // 4). Smaller chunk sizes yield better results but is slower.
883
- - bootstrap_chunk_size (int, optional): Size for initial chunks. Defaults to None.
884
- - bootstrap_steps (int, optional): Number of initial chunks using bootstrap size. Defaults to None.
885
- - callback (callable, optional): Function called after each chunk. Defaults to None.
886
- - debug (bool, optional): Enable debug output. Defaults to False.
887
888
 
888
- Returns:
889
- - str: The search findings or aggregated output in the specified format.
890
- """
891
- # Set defaults
892
- if ctx_size is None:
893
- ctx_size = self.ctx_size
894
- if chunk_size is None:
895
- chunk_size = ctx_size // 4
896
-
897
- # Prepare input
898
- if files:
899
- all_texts = [(file, open(file, 'r', encoding='utf-8').read()) for file in files]
900
- elif text:
901
- all_texts = [("input_text", text)]
902
- else:
903
- raise ValueError("Either text or files must be provided.")
889
+ def update_memory_from_file_chunk_prompt(self, file_name, file_chunk_id, global_chunk_id, chunk, memory, memory_template, query, task_prompt):
890
+ return f"""{self.system_full_header}
891
+ You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a markdown memory of findings at each step.
904
892
 
905
- # Initialize memory and chunk counter
906
- memory = ""
907
- chunk_id = 0
893
+ Your goal is to extract relevant information from each text chunk and update the provided markdown memory structure, ensuring no key details are omitted or invented. Maintain the structure of the JSON template.
908
894
 
909
- # Define search prompt template using f-string and the provided search_prompt
910
- search_prompt_template = f"""{self.system_full_header}
911
- You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a memory of findings at each step.
895
+ ----
896
+ # Current file: {file_name}
897
+ # Chunk number in this file: {file_chunk_id}
898
+ # Global chunk number: {global_chunk_id}
899
+ # Text chunk:
900
+ ```markdown
901
+ {chunk}
902
+ ```
903
+ {'Current findings memory (cumulative across all files):' if memory!="" else 'Memory template:'}
904
+ ```markdown
905
+ {memory if memory!="" else memory_template}
906
+ ```
907
+ {self.user_full_header}
908
+ Query: '{query}'
909
+ Task: {task_prompt}
910
+ Update the markdown memory by adding new information from this chunk relevant to the query. Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
911
+ Ensure the output is valid markdown matching the structure of the provided template.
912
+ Make sure to extract only information relevant to answering the user's query or providing important contextual information.
913
+ Return the updated markdown memory inside a markdown code block.
914
+ {self.ai_full_header}
915
+ """
912
916
 
913
- Your goal is to extract and combine relevant information from each text chunk with the existing memory, ensuring no key details are omitted or invented.
917
+ def update_memory_from_file_chunk_prompt_markdown(self, file_name, file_chunk_id, global_chunk_id, chunk, memory, query):
918
+ return f"""{self.system_full_header}
919
+ You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a markdown memory of findings at each step.
914
920
 
921
+ Your goal is to extract relevant information from each text chunk and update the provided markdown memory structure, ensuring no key details are omitted or invented. Maintain the structure of the markdown template.
915
922
 
916
923
  ----
917
- # Chunk number: {{chunk_id}}
924
+ # Current file: {file_name}
925
+ # Chunk number in this file: {file_chunk_id}
926
+ # Global chunk number: {global_chunk_id}
918
927
  # Text chunk:
919
928
  ```markdown
920
- {{chunk}}
929
+ {chunk}
921
930
  ```
922
-
923
- Current findings memory:
931
+ Current findings memory (cumulative across all files):
924
932
  ```markdown
925
- {{memory}}
933
+ {memory}
926
934
  ```
927
935
  {self.user_full_header}
928
936
  Query: '{query}'
929
- Task: {search_prompt}
930
-
931
- Update the memory by adding new relevant information from this chunk. Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
932
- Make sure to extrafct only information relevant to be able to answer the query of the user or at least gives important contextual information that can be completed to answer the user query.
937
+ {'Start Creating a memory from the text chunk in a format adapted to answer the user Query' if memory=="" else 'Update the markdown memory by adding new information from this chunk relevant to the query.'} Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
938
+ {'Ensure the output is valid markdown matching the structure of the current memory' if memory!='' else 'Ensure the output is valid markdown matching the structure of the provided template.'}
939
+ Make sure to extract only information relevant to answering the user's query or providing important contextual information.
940
+ Return the updated markdown memory inside a markdown code block.
933
941
  {self.ai_full_header}
934
942
  """
935
943
 
936
- # Calculate static prompt tokens
937
- example_prompt = search_prompt_template.replace("{{chunk_id}}", "0")\
938
- .replace("{{memory}}", "")\
939
- .replace("{{chunk}}", "")
940
- static_tokens = len(self.tokenize(example_prompt))
941
-
942
- # Process each text (file or input)
943
- for file_name, file_text in all_texts:
944
- file_tokens = self.tokenize(file_text)
945
- start_token_idx = 0
946
-
947
- while start_token_idx < len(file_tokens):
948
- # Calculate available tokens
949
- current_memory_tokens = len(self.tokenize(memory))
950
- available_tokens = ctx_size - static_tokens - current_memory_tokens
951
- if available_tokens <= 0:
952
- raise ValueError("Memory too large - consider reducing chunk size or increasing context window")
953
-
954
- # Adjust chunk size
955
- actual_chunk_size = (
956
- min(bootstrap_chunk_size, available_tokens)
957
- if bootstrap_chunk_size is not None and bootstrap_steps is not None and chunk_id < bootstrap_steps
958
- else min(chunk_size, available_tokens)
959
- )
960
-
961
- end_token_idx = min(start_token_idx + actual_chunk_size, len(file_tokens))
962
- chunk_tokens = file_tokens[start_token_idx:end_token_idx]
963
- chunk = self.detokenize(chunk_tokens)
964
-
965
- # Generate updated memory
966
- prompt = search_prompt_template.replace("{chunk_id}", str(chunk_id))\
967
- .replace("{memory}", memory)\
968
- .replace("{chunk}", chunk)
969
- if debug:
970
- print(f"----- Chunk {chunk_id} from {file_name} ------")
971
- print(prompt)
972
-
973
- output = self.generate_text(prompt, n_predict=ctx_size // 4, streaming_callback=callback).strip()
974
- code = self.extract_code_blocks(output)
975
- memory = code[0]["content"] if code else output
976
-
977
- if debug:
978
- print("----- Updated Memory ------")
979
- print(memory)
980
- print("---------------------------")
981
-
982
- start_token_idx = end_token_idx
983
- chunk_id += 1
984
-
985
- # Aggregate findings if requested
986
- if aggregation_prompt:
987
- final_prompt = f"""{self.system_full_header}
988
- You are a search results aggregator.
944
+ def deep_analyze(
945
+ self,
946
+ query: str,
947
+ text: str = None,
948
+ files: list = None,
949
+ aggregation_prompt: str = None,
950
+ output_format: str = "markdown",
951
+ ctx_size: int = None,
952
+ chunk_size: int = None,
953
+ bootstrap_chunk_size: int = None,
954
+ bootstrap_steps: int = None,
955
+ callback=None,
956
+ debug: bool = False
957
+ ):
958
+ """
959
+ Searches for specific information related to a query in a long text or a list of files.
960
+ Processes each file separately in chunks, updates a shared markdown memory with relevant findings, and optionally aggregates them.
961
+
962
+ Parameters:
963
+ - query (str): The query to search for.
964
+ - text (str, optional): The input text to search in. Defaults to None.
965
+ - files (list, optional): List of file paths to search in. Defaults to None.
966
+ - task_prompt (str, optional): Prompt for processing each chunk. Defaults to a standard markdown extraction prompt.
967
+ - aggregation_prompt (str, optional): Prompt for aggregating findings. Defaults to None.
968
+ - output_format (str, optional): Output format. Defaults to "markdown".
969
+ - ctx_size (int, optional): Context size for the model. Defaults to None (uses self.ctx_size).
970
+ - chunk_size (int, optional): Size of each chunk. Defaults to None (ctx_size // 4). Smaller chunk sizes yield better results but are slower.
971
+ - bootstrap_chunk_size (int, optional): Size for initial chunks. Defaults to None.
972
+ - bootstrap_steps (int, optional): Number of initial chunks using bootstrap size. Defaults to None.
973
+ - callback (callable, optional): Function called after each chunk. Defaults to None.
974
+ - debug (bool, optional): Enable debug output. Defaults to False.
975
+
976
+ Returns:
977
+ - str: The search findings or aggregated output in the specified format.
978
+ """
979
+ # Set defaults
980
+ if ctx_size is None:
981
+ ctx_size = self.default_ctx_size
982
+ if chunk_size is None:
983
+ chunk_size = ctx_size // 4
984
+
985
+ # Prepare input
986
+ if files:
987
+ all_texts = [(file, open(file, 'r', encoding='utf-8').read()) for file in files]
988
+ elif text:
989
+ all_texts = [("input_text", text)]
990
+ else:
991
+ raise ValueError("Either text or files must be provided.")
989
992
 
993
+ # Set default memory template for article analysis if none provided
994
+ memory = ""
995
+
996
+ # Initialize global chunk counter
997
+ global_chunk_id = 0
998
+
999
+ # Calculate static prompt tokens
1000
+ example_prompt = self.update_memory_from_file_chunk_prompt_markdown("example.txt","0", "0", "", "", query)
1001
+ static_tokens = len(self.tokenize(example_prompt))
1002
+
1003
+ # Process each file separately
1004
+ for file_name, file_text in all_texts:
1005
+ file_tokens = self.tokenize(file_text)
1006
+ start_token_idx = 0
1007
+ file_chunk_id = 0 # Reset chunk counter for each file
1008
+
1009
+ while start_token_idx < len(file_tokens):
1010
+ # Calculate available tokens
1011
+ current_memory_tokens = len(self.tokenize(memory))
1012
+ available_tokens = ctx_size - static_tokens - current_memory_tokens
1013
+ if available_tokens <= 0:
1014
+ raise ValueError("Memory too large - consider reducing chunk size or increasing context window")
1015
+
1016
+ # Adjust chunk size
1017
+ actual_chunk_size = (
1018
+ min(bootstrap_chunk_size, available_tokens)
1019
+ if bootstrap_chunk_size is not None and bootstrap_steps is not None and global_chunk_id < bootstrap_steps
1020
+ else min(chunk_size, available_tokens)
1021
+ )
1022
+
1023
+ end_token_idx = min(start_token_idx + actual_chunk_size, len(file_tokens))
1024
+ chunk_tokens = file_tokens[start_token_idx:end_token_idx]
1025
+ chunk = self.detokenize(chunk_tokens)
1026
+
1027
+ # Generate updated memory
1028
+ prompt = self.update_memory_from_file_chunk_prompt_markdown(
1029
+ file_name=file_name,
1030
+ file_chunk_id=file_chunk_id,
1031
+ global_chunk_id=global_chunk_id,
1032
+ chunk=chunk,
1033
+ memory=memory,
1034
+ query=query)
1035
+ if debug:
1036
+ print(f"----- Chunk {file_chunk_id} (Global {global_chunk_id}) from {file_name} ------")
1037
+ print(prompt)
1038
+
1039
+ output = self.generate_text(prompt, n_predict=ctx_size // 4, streaming_callback=callback).strip()
1040
+ code = self.extract_code_blocks(output)
1041
+ if code:
1042
+ memory = code[0]["content"]
1043
+ else:
1044
+ memory = output
1045
+
1046
+ if debug:
1047
+ ASCIIColors.red("----- Updated Memory ------")
1048
+ ASCIIColors.white(memory)
1049
+ ASCIIColors.red("---------------------------")
1050
+
1051
+ start_token_idx = end_token_idx
1052
+ file_chunk_id += 1
1053
+ global_chunk_id += 1
1054
+
1055
+ # Aggregate findings if requested
1056
+ if aggregation_prompt:
1057
+ final_prompt = f"""{self.system_full_header}
1058
+ You are a search results aggregator.
990
1059
  {self.user_full_header}
991
1060
  {aggregation_prompt}
992
-
993
- Collected findings:
1061
+ Collected findings (across all files):
994
1062
  ```markdown
995
1063
  {memory}
996
1064
  ```
997
-
998
1065
  Provide the final output in {output_format} format.
999
1066
  {self.ai_full_header}
1000
1067
  """
1001
- final_output = self.generate_text(final_prompt, streaming_callback=callback)
1002
- code = self.extract_code_blocks(final_output)
1003
- return code[0]["content"] if code else final_output
1004
- return memory
1068
+ final_output = self.generate_text(final_prompt, streaming_callback=callback)
1069
+ code = self.extract_code_blocks(final_output)
1070
+ return code[0]["content"] if code else final_output
1071
+ return memory
1072
+
1005
1073
  def error(self, content, duration:int=4, client_id=None, verbose:bool=True):
1006
1074
  ASCIIColors.error(content)
1007
1075
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.10.0
3
+ Version: 0.11.0
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Home-page: https://github.com/ParisNeo/lollms_client
6
6
  Author: ParisNeo
@@ -11,6 +11,15 @@ Classifier: Operating System :: OS Independent
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
13
  Requires-Dist: requests
14
+ Dynamic: author
15
+ Dynamic: author-email
16
+ Dynamic: classifier
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license-file
21
+ Dynamic: requires-dist
22
+ Dynamic: summary
14
23
 
15
24
  # lollms_client
16
25
 
@@ -8,7 +8,7 @@ with open('requirements.txt', 'r') as f:
8
8
 
9
9
  setuptools.setup(
10
10
  name="lollms_client",
11
- version="0.10.0",
11
+ version="0.11.0",
12
12
  author="ParisNeo",
13
13
  author_email="parisneoai@gmail.com",
14
14
  description="A client library for LoLLMs generate endpoint",
File without changes
File without changes
File without changes