lollms-client 0.9.2__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -1,143 +1,109 @@
1
1
  import requests
2
2
  from ascii_colors import ASCIIColors, trace_exception
3
- from lollms_client.lollms_types import MSG_TYPE
3
+ from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
4
4
  from lollms_client.lollms_utilities import encode_image
5
+ from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
5
6
  import json
6
7
  from enum import Enum
7
- import tiktoken
8
8
  import base64
9
9
  import requests
10
10
  import pipmaster as pm
11
- from typing import List, Optional, Callable, Union
11
+ from typing import List, Optional, Callable, Union, Dict
12
12
  import numpy as np
13
13
  import pipmaster as pm
14
+ from pathlib import Path
14
15
  import os
15
-
16
- class ELF_GENERATION_FORMAT(Enum):
17
- LOLLMS = 0
18
- OPENAI = 1
19
- OLLAMA = 2
20
- LITELLM = 3
21
- TRANSFORMERS = 4
22
- VLLM = 5
23
-
24
- @classmethod
25
- def from_string(cls, format_string: str) -> 'ELF_GENERATION_FORMAT':
26
- format_mapping = {
27
- "LOLLMS": cls.LOLLMS,
28
- "OPENAI": cls.OPENAI,
29
- "OLLAMA": cls.OLLAMA,
30
- "LITELLM": cls.LITELLM,
31
- "TRANSFORMERS": cls.TRANSFORMERS,
32
- "VLLM": cls.VLLM
33
- }
34
-
35
- try:
36
- return format_mapping[format_string.upper()]
37
- except KeyError:
38
- raise ValueError(f"Invalid format string: {format_string}. Must be one of {list(format_mapping.keys())}.")
39
-
40
- def __str__(self):
41
- return self.name
42
- class ELF_COMPLETION_FORMAT(Enum):
43
- Instruct = 0
44
- Chat = 1
45
- @classmethod
46
- def from_string(cls, format_string: str) -> 'ELF_COMPLETION_FORMAT':
47
- format_mapping = {
48
- "Instruct": cls.Instruct,
49
- "Chat": cls.Chat,
50
- }
51
-
52
- try:
53
- return format_mapping[format_string.upper()]
54
- except KeyError:
55
- raise ValueError(f"Invalid format string: {format_string}. Must be one of {list(format_mapping.keys())}.")
56
-
57
- def __str__(self):
58
- return self.name
59
16
 
60
17
  class LollmsClient():
61
- def __init__(
62
- self,
63
- host_address=None,
64
- model_name=None,
65
- ctx_size=32000,
66
- personality=-1,
67
- n_predict=4096,
68
- min_n_predict=512,
69
- temperature=0.1,
70
- top_k=50,
71
- top_p=0.95,
72
- repeat_penalty=0.8,
73
- repeat_last_n=40,
74
- seed=None,
75
- n_threads=8,
76
- service_key:str="",
77
- tokenizer=None,
78
- default_generation_mode=ELF_GENERATION_FORMAT.LOLLMS,
79
- verify_ssl_certificate = True,
80
- user_name = "user",
81
- ai_name = "assistant"
82
- ) -> None:
83
- import tiktoken
18
+ """Core class for interacting with LOLLMS bindings"""
19
+ def __init__(self,
20
+ binding_name: str = "lollms",
21
+ host_address: Optional[str] = None,
22
+ model_name: str = "",
23
+ service_key: Optional[str] = None,
24
+ verify_ssl_certificate: bool = True,
25
+ personality: Optional[int] = None,
26
+ llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
27
+ binding_config: Optional[Dict[str, any]] = None,
28
+ ctx_size: Optional[int] = 8192,
29
+ n_predict: Optional[int] = 4096,
30
+ stream: bool = False,
31
+ temperature: float = 0.1,
32
+ top_k: int = 50,
33
+ top_p: float = 0.95,
34
+ repeat_penalty: float = 0.8,
35
+ repeat_last_n: int = 40,
36
+ seed: Optional[int] = None,
37
+ n_threads: int = 8,
38
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
39
+ user_name ="user",
40
+ ai_name = "assistant"):
41
+ """
42
+ Initialize the LollmsCore with a binding and generation parameters.
43
+
44
+ Args:
45
+ binding_name (str): Name of the binding to use (e.g., "lollms", "ollama").
46
+ host_address (Optional[str]): Host address for the service. Overrides binding default if provided.
47
+ model_name (str): Name of the model to use. Defaults to empty string.
48
+ service_key (Optional[str]): Authentication key for the service.
49
+ verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
50
+ personality (Optional[int]): Personality ID (used only by LOLLMS binding).
51
+ llm_bindings_dir (Path): Directory containing binding implementations.
52
+ Defaults to the "bindings" subdirectory relative to this file's location.
53
+ binding_config (Optional[Dict[str, any]]): Additional configuration for the binding.
54
+ n_predict (Optional[int]): Maximum number of tokens to generate. Default for generate_text.
55
+ stream (bool): Whether to stream the output. Defaults to False for generate_text.
56
+ temperature (float): Sampling temperature. Defaults to 0.1 for generate_text.
57
+ top_k (int): Top-k sampling parameter. Defaults to 50 for generate_text.
58
+ top_p (float): Top-p sampling parameter. Defaults to 0.95 for generate_text.
59
+ repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8 for generate_text.
60
+ repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
61
+ seed (Optional[int]): Random seed for generation. Default for generate_text.
62
+ n_threads (int): Number of threads to use. Defaults to 8 for generate_text.
63
+ streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
64
+ Default for generate_text. Takes a string chunk and an MSG_TYPE enum value.
65
+
66
+ Raises:
67
+ ValueError: If the specified binding cannot be created.
68
+ """
69
+ self.binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
70
+ self.binding_config = binding_config or {}
71
+
72
+ # Store generation parameters as instance variables
73
+ self.default_ctx_size = ctx_size
74
+ self.default_n_predict = n_predict
75
+ self.default_stream = stream
76
+ self.default_temperature = temperature
77
+ self.default_top_k = top_k
78
+ self.default_top_p = top_p
79
+ self.default_repeat_penalty = repeat_penalty
80
+ self.default_repeat_last_n = repeat_last_n
81
+ self.default_seed = seed
82
+ self.default_n_threads = n_threads
83
+ self.default_streaming_callback = streaming_callback
84
+
85
+ # Create the binding instance
86
+ self.binding = self.binding_manager.create_binding(
87
+ binding_name=binding_name,
88
+ host_address=host_address,
89
+ model_name=model_name,
90
+ service_key=service_key,
91
+ verify_ssl_certificate=verify_ssl_certificate,
92
+ personality=personality
93
+ )
94
+
95
+ if self.binding is None:
96
+ raise ValueError(f"Failed to create binding: {binding_name}. Available bindings: {self.binding_manager.get_available_bindings()}")
97
+
98
+ # Apply additional configuration if provided
99
+ if binding_config:
100
+ for key, value in binding_config.items():
101
+ setattr(self.binding, key, value)
84
102
  self.user_name = user_name
85
103
  self.ai_name = ai_name
86
- self.host_address=host_address
87
- if not self.host_address:
88
- if default_generation_mode==ELF_GENERATION_FORMAT.LOLLMS:
89
- self.host_address = "http://localhost:9600"
90
- elif default_generation_mode==ELF_GENERATION_FORMAT.OPENAI:
91
- self.host_address = "https://api.openai.com"
92
- elif default_generation_mode==ELF_GENERATION_FORMAT.OLLAMA:
93
- self.host_address = "http://localhost:11434"
94
- else:
95
- self.host_address = "http://localhost:9600"
96
-
97
- self.model_name = model_name
98
- self.ctx_size = ctx_size
99
- self.n_predict = n_predict
100
- self.min_n_predict = min_n_predict
101
- self.personality = personality
102
- self.temperature = temperature
103
- self.top_k = top_k
104
- self.top_p = top_p
105
- self.repeat_penalty = repeat_penalty
106
- self.repeat_last_n = repeat_last_n
107
- self.seed = seed
108
- self.n_threads = n_threads
109
104
  self.service_key = service_key
110
- if not self.service_key and default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
111
- self.service_key = os.getenv("OPENAI_API_KEY","")
112
- self.default_generation_mode = default_generation_mode
113
- self.verify_ssl_certificate = verify_ssl_certificate
114
- self.tokenizer = tiktoken.model.encoding_for_model("gpt-3.5-turbo-1106") if tokenizer is None else tokenizer
115
- if default_generation_mode == ELF_GENERATION_FORMAT.TRANSFORMERS:
116
- if not pm.is_installed("torch"):
117
- ASCIIColors.yellow("Diffusers: Torch not found. Installing it")
118
- pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
119
-
120
- import torch
121
- if not torch.cuda.is_available():
122
- ASCIIColors.yellow("Diffusers: Torch not using cuda. Reinstalling it")
123
- pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
124
- import torch
125
-
126
- if not pm.is_installed("transformers"):
127
- pm.install_or_update("transformers")
128
- from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
129
- self.tokenizer = AutoTokenizer.from_pretrained(
130
- str(model_name), trust_remote_code=False
131
- )
132
-
133
- self.model = AutoModelForCausalLM.from_pretrained(
134
- str(model_name),
135
- device_map="auto",
136
- load_in_4bit=True,
137
- torch_dtype=torch.bfloat16 # Load in float16 for quantization
138
- )
139
- self.generation_config = GenerationConfig.from_pretrained(str(model_name))
140
105
 
106
+ self.verify_ssl_certificate = verify_ssl_certificate
141
107
  self.start_header_id_template ="!@>"
142
108
  self.end_header_id_template =": "
143
109
  self.system_message_template ="system"
@@ -149,12 +115,6 @@ class LollmsClient():
149
115
  self.end_ai_header_id_template =": "
150
116
  self.end_ai_message_id_template =""
151
117
 
152
- if default_generation_mode==ELF_GENERATION_FORMAT.OPENAI:
153
- if not pm.is_installed("openai"):
154
- pm.install("openai")
155
- import openai
156
- self.client = openai.OpenAI(base_url=host_address)
157
-
158
118
 
159
119
  @property
160
120
  def system_full_header(self) -> str:
@@ -185,1150 +145,121 @@ class LollmsClient():
185
145
 
186
146
  def sink(self, s=None,i=None,d=None):
187
147
  pass
188
-
189
- def tokenize(self, prompt:str):
148
+ def tokenize(self, text: str) -> list:
190
149
  """
191
- Tokenizes the given prompt using the model's tokenizer.
150
+ Tokenize text using the active binding.
192
151
 
193
152
  Args:
194
- prompt (str): The input prompt to be tokenized.
153
+ text (str): The text to tokenize.
195
154
 
196
155
  Returns:
197
- list: A list of tokens representing the tokenized prompt.
156
+ list: List of tokens.
198
157
  """
199
- tokens_list = self.tokenizer.encode(prompt)
200
-
201
- return tokens_list
202
-
203
- def detokenize(self, tokens_list:list):
158
+ return self.binding.tokenize(text)
159
+
160
+ def detokenize(self, tokens: list) -> str:
204
161
  """
205
- Detokenizes the given list of tokens using the model's tokenizer.
162
+ Detokenize tokens using the active binding.
206
163
 
207
164
  Args:
208
- tokens_list (list): A list of tokens to be detokenized.
165
+ tokens (list): List of tokens to detokenize.
209
166
 
210
167
  Returns:
211
- str: The detokenized text as a string.
168
+ str: Detokenized text.
212
169
  """
213
- text = self.tokenizer.decode(tokens_list)
214
-
215
- return text
170
+ return self.binding.detokenize(tokens)
216
171
 
217
- def embed(self, text):
218
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
219
- return self.lollms_embed(text)
220
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
221
- return self.ollama_embed(text)
222
- else:
223
- return #not implemented
224
-
225
- def ollama_embed(self, text, **kwargs):
172
+ def get_model_details(self) -> dict:
226
173
  """
227
- Get embeddings for the input text using Ollama API
228
-
229
- Args:
230
- text (str or List[str]): Input text to embed
231
- **kwargs: Additional arguments like model, truncate, options, keep_alive
232
-
174
+ Get model information from the active binding.
175
+
233
176
  Returns:
234
- dict: Response containing embeddings
177
+ dict: Model information dictionary.
235
178
  """
236
- import requests
237
-
238
- url = f"{self.base_url}/api/embed"
239
-
240
- # Prepare the request payload
241
- payload = {
242
- "input": text,
243
- "model": kwargs.get("model", "llama2") # default model
244
- }
245
-
246
- # Add optional parameters if provided
247
- if "truncate" in kwargs:
248
- payload["truncate"] = kwargs["truncate"]
249
- if "options" in kwargs:
250
- payload["options"] = kwargs["options"]
251
- if "keep_alive" in kwargs:
252
- payload["keep_alive"] = kwargs["keep_alive"]
253
-
254
- try:
255
- response = requests.post(url, json=payload)
256
- response.raise_for_status() # Raise exception for bad status codes
257
- return response.json()
258
- except requests.exceptions.RequestException as e:
259
- raise Exception(f"Embedding request failed: {str(e)}")
260
-
261
-
262
- def lollms_embed(self, texts, **kwargs):
263
- api_key = kwargs.pop("api_key", None)
264
- headers = (
265
- {"Content-Type": "application/json", "Authorization": api_key}
266
- if api_key
267
- else {"Content-Type": "application/json"}
268
- )
269
- embeddings = []
270
- for text in texts:
271
- request_data = {"text": text}
272
- response = requests.post(f"{self.host_address}/lollms_embed", json=request_data, headers=headers)
273
- response.raise_for_status()
274
- result = response.json()
275
- embeddings.append(result["vector"])
276
- return np.array(embeddings)
179
+ return self.binding.get_model_info()
277
180
 
278
- def generate_with_images(self, prompt, images, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
279
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
280
- return self.lollms_generate_with_images(prompt, images, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
281
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
282
- return self.openai_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
283
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
284
- return self.ollama_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
285
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.LITELLM:
286
- return # To be implemented #self.litellm_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
287
-
288
-
289
- def generate(self, prompt, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None, completion_format = ELF_COMPLETION_FORMAT.Chat):
290
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
291
- return self.lollms_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
292
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
293
- return self.openai_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
294
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
295
- return self.ollama_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
296
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.LITELLM:
297
- return self.litellm_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
298
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.VLLM:
299
- return self.vllm_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
300
-
301
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.TRANSFORMERS:
302
- return self.transformers_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
303
-
304
-
305
- def generate_text(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
306
- return self.lollms_generate(prompt, host_address, model_name, personality, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
307
-
308
- def lollms_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
309
- # Set default values to instance variables if optional arguments are None
310
- host_address = host_address if host_address else self.host_address
311
- model_name = model_name if model_name else self.model_name
312
- n_predict = n_predict if n_predict else self.n_predict
313
- personality = personality if personality is not None else self.personality
314
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
315
- temperature = temperature if temperature is not None else self.temperature
316
- top_k = top_k if top_k is not None else self.top_k
317
- top_p = top_p if top_p is not None else self.top_p
318
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
319
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
320
- seed = seed or self.seed # Use the instance seed if not provided
321
- n_threads = n_threads if n_threads else self.n_threads
322
-
323
-
324
- url = f"{host_address}/lollms_generate"
325
- if service_key!="":
326
- headers = {
327
- 'Content-Type': 'application/json;',
328
- 'Authorization': f'Bearer {service_key}',
329
- }
330
- else:
331
- headers = {
332
- 'Content-Type': 'application/json',
333
- }
334
- data = {
335
- "prompt": prompt,
336
- "model_name": self.model_name,
337
- "personality": self.personality,
338
- "n_predict": n_predict,
339
- "stream": stream,
340
- "temperature": self.temperature,
341
- "top_k": self.top_k,
342
- "top_p": self.top_p,
343
- "repeat_penalty": repeat_penalty,
344
- "repeat_last_n": repeat_last_n,
345
- "seed": seed,
346
- "n_threads": n_threads
347
- }
348
-
349
- response = requests.post(url, json=data, headers=headers, stream=stream)
350
- if not stream:
351
- if response.status_code == 200:
352
- try:
353
- text = response.text.strip().rstrip('!')
354
- return text
355
- except Exception as ex:
356
- return {"status": False, "error": str(ex)}
357
- else:
358
- return {"status": False, "error": response.text}
359
- else:
360
- text = ""
361
- if response.status_code==200:
362
- try:
363
- for line in response.iter_lines():
364
- chunk = line.decode("utf-8")
365
- text += chunk
366
- if streaming_callback:
367
- streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
368
- return text.rstrip('!')
369
- except Exception as ex:
370
- return {"status": False, "error": str(ex)}
371
- else:
372
- return {"status": False, "error": response.text}
373
-
374
-
375
- def lollms_generate_with_images(
376
- self,
377
- prompt: str,
378
- images: List[str],
379
- host_address: Optional[str] = None,
380
- model_name: Optional[str] = None,
381
- personality: Optional[str] = None,
382
- n_predict: Optional[int] = None,
383
- stream: bool = False,
384
- temperature: float = 0.1,
385
- top_k: int = 50,
386
- top_p: float = 0.95,
387
- repeat_penalty: float = 0.8,
388
- repeat_last_n: int = 40,
389
- seed: Optional[int] = None,
390
- n_threads: int = 8,
391
- service_key: str = "",
392
- streaming_callback: Optional[Callable[[str, int], None]] = None
393
- ) -> Union[str, dict]:
181
+ def switch_model(self, model_name: str) -> bool:
394
182
  """
395
- Generates text based on a prompt and a list of images using a specified model.
183
+ Load a new model in the active binding.
396
184
 
397
185
  Args:
398
- prompt (str): The text prompt to generate responses for.
399
- images (List[str]): A list of file paths to images to be included in the generation.
400
- host_address (Optional[str]): The host address for the service. Defaults to instance variable.
401
- model_name (Optional[str]): The model name to use. Defaults to instance variable.
402
- personality (Optional[str]): The personality setting for the generation. Defaults to instance variable.
403
- n_predict (Optional[int]): The number of tokens to predict. Defaults to instance variable.
404
- stream (bool): Whether to stream the response. Defaults to False.
405
- temperature (float): Sampling temperature. Defaults to 0.1.
406
- top_k (int): Top-k sampling parameter. Defaults to 50.
407
- top_p (float): Top-p (nucleus) sampling parameter. Defaults to 0.95.
408
- repeat_penalty (float): Penalty for repeating tokens. Defaults to 0.8.
409
- repeat_last_n (int): Number of last tokens to consider for repeat penalty. Defaults to 40.
410
- seed (Optional[int]): Random seed for generation. Defaults to instance variable.
411
- n_threads (int): Number of threads to use. Defaults to 8.
412
- service_key (str): Optional service key for authorization.
413
- streaming_callback (Optional[Callable[[str, int], None]]): Callback for streaming responses.
186
+ model_name (str): Name of the model to load.
414
187
 
415
188
  Returns:
416
- Union[str, dict]: The generated text if not streaming, or a dictionary with status and error if applicable.
189
+ bool: True if model loaded successfully, False otherwise.
417
190
  """
418
-
419
- # Set default values to instance variables if optional arguments are None
420
- host_address = host_address if host_address else self.host_address
421
- model_name = model_name if model_name else self.model_name
422
- n_predict = n_predict if n_predict else self.n_predict
423
- personality = personality if personality is not None else self.personality
424
-
425
- # Set parameters to instance variables if they are not provided or None
426
- temperature = temperature if temperature is not None else self.temperature
427
- top_k = top_k if top_k is not None else self.top_k
428
- top_p = top_p if top_p is not None else self.top_p
429
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
430
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
431
- seed = seed or self.seed # Use the instance seed if not provided
432
- n_threads = n_threads if n_threads else self.n_threads
433
-
434
- def encode_image_to_base64(image_path: str) -> str:
435
- """Encodes an image file to a base64 string."""
436
- with open(image_path, "rb") as image_file:
437
- encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
438
- return encoded_string
439
-
440
- # Encode images in base64
441
- encoded_images = [encode_image_to_base64(image) for image in images]
442
-
443
- url = f"{host_address}/lollms_generate_with_images"
444
- headers = {
445
- 'Content-Type': 'application/json',
446
- 'Authorization': f'Bearer {service_key}' if service_key else '',
447
- }
448
-
449
- data = {
450
- "prompt": prompt,
451
- "model_name": model_name,
452
- "personality": personality,
453
- "n_predict": n_predict,
454
- "stream": stream,
455
- "temperature": temperature,
456
- "top_k": top_k,
457
- "top_p": top_p,
458
- "repeat_penalty": repeat_penalty,
459
- "repeat_last_n": repeat_last_n,
460
- "seed": seed,
461
- "n_threads": n_threads,
462
- "images": encoded_images # Add encoded images to the request payload
463
- }
464
-
465
- response = requests.post(url, json=data, headers=headers, stream=stream)
466
- if not stream:
467
- if response.status_code == 200:
468
- try:
469
- text = response.text.rstrip('!')
470
- return text
471
- except Exception as ex:
472
- return {"status": False, "error": str(ex)}
473
- else:
474
- return {"status": False, "error": response.text}
475
- else:
476
- text = ""
477
- if response.status_code == 200:
478
- try:
479
- for line in response.iter_lines():
480
- chunk = line.decode("utf-8")
481
- text += chunk
482
- if streaming_callback:
483
- streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
484
- if text[0] == '"':
485
- text = text[1:]
486
- if text[-1] == '"':
487
- text = text[:-1]
488
- return text
489
- except Exception as ex:
490
- return {"status": False, "error": str(ex)}
491
- else:
492
- return {"status": False, "error": response.text}
493
-
191
+ return self.binding.load_model(model_name)
494
192
 
495
- def transformers_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
496
- # Set default values to instance variables if optional arguments are None
497
- model_name = model_name if model_name else self.model_name
498
- n_predict = n_predict if n_predict else self.n_predict
499
- personality = personality if personality is not None else self.personality
500
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
501
- temperature = temperature if temperature is not None else self.temperature
502
- top_k = top_k if top_k is not None else self.top_k
503
- top_p = top_p if top_p is not None else self.top_p
504
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
505
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
506
- seed = seed or self.seed # Use the instance seed if not provided
507
- n_threads = n_threads if n_threads else self.n_threads
508
-
509
- self.generation_config.max_new_tokens = int(n_predict)
510
- self.generation_config.temperature = float(temperature)
511
- self.generation_config.top_k = int(top_k)
512
- self.generation_config.top_p = float(top_p)
513
- self.generation_config.repetition_penalty = float(repeat_penalty)
514
- self.generation_config.do_sample = True if float(temperature)>0 else False
515
- self.generation_config.pad_token_id = self.tokenizer.pad_token_id
516
- self.generation_config.eos_token_id = self.tokenizer.eos_token_id
517
- self.generation_config.output_attentions = False
518
-
519
- try:
520
- input_ids = self.tokenizer(prompt, add_special_tokens=False, return_tensors='pt').input_ids
521
- class StreamerClass:
522
- def __init__(self, tokenizer, callback):
523
- self.output = ""
524
- self.skip_prompt = True
525
- self.decode_kwargs = {}
526
- self.tokenizer = tokenizer
527
-
528
- # variables used in the streaming process
529
- self.token_cache = []
530
- self.print_len = 0
531
- self.next_tokens_are_prompt = True
532
- self.callback = callback
533
- def put(self, value):
534
- """
535
- Recives tokens, decodes them, and prints them to stdout as soon as they form entire words.
536
- """
537
- if len(value.shape)==1 and (value[0] == self.tokenizer.eos_token_id or value[0] == self.tokenizer.bos_token_id):
538
- print("eos detected")
539
- return
540
- if len(value.shape) > 1 and value.shape[0] > 1:
541
- raise ValueError("TextStreamer only supports batch size 1")
542
- elif len(value.shape) > 1:
543
- value = value[0]
544
-
545
- if self.skip_prompt and self.next_tokens_are_prompt:
546
- self.next_tokens_are_prompt = False
547
- return
548
-
549
- # Add the new token to the cache and decodes the entire thing.
550
- self.token_cache.extend(value.tolist())
551
- text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
552
-
553
- # After the symbol for a new line, we flush the cache.
554
- if text.endswith("\n"):
555
- printable_text = text[self.print_len :]
556
- self.token_cache = []
557
- self.print_len = 0
558
- # If the last token is a CJK character, we print the characters.
559
- elif len(text) > 0 and self._is_chinese_char(ord(text[-1])):
560
- printable_text = text[self.print_len :]
561
- self.print_len += len(printable_text)
562
- # Otherwise, prints until the last space char (simple heuristic to avoid printing incomplete words,
563
- # which may change with the subsequent token -- there are probably smarter ways to do this!)
564
- else:
565
- printable_text = text[self.print_len : text.rfind(" ") + 1]
566
- self.print_len += len(printable_text)
567
-
568
- self.output += printable_text
569
- if self.callback:
570
- if not self.callback(printable_text, 0):
571
- raise Exception("canceled")
572
-
573
- def _is_chinese_char(self, cp):
574
- """Checks whether CP is the codepoint of a CJK character."""
575
- # This defines a "chinese character" as anything in the CJK Unicode block:
576
- # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
577
- #
578
- # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
579
- # despite its name. The modern Korean Hangul alphabet is a different block,
580
- # as is Japanese Hiragana and Katakana. Those alphabets are used to write
581
- # space-separated words, so they are not treated specially and handled
582
- # like the all of the other languages.
583
- if (
584
- (cp >= 0x4E00 and cp <= 0x9FFF)
585
- or (cp >= 0x3400 and cp <= 0x4DBF) #
586
- or (cp >= 0x20000 and cp <= 0x2A6DF) #
587
- or (cp >= 0x2A700 and cp <= 0x2B73F) #
588
- or (cp >= 0x2B740 and cp <= 0x2B81F) #
589
- or (cp >= 0x2B820 and cp <= 0x2CEAF) #
590
- or (cp >= 0xF900 and cp <= 0xFAFF)
591
- or (cp >= 0x2F800 and cp <= 0x2FA1F) #
592
- ): #
593
- return True
594
-
595
- return False
596
- def end(self):
597
- """Flushes any remaining cache and prints a newline to stdout."""
598
- # Flush the cache, if it exists
599
- if len(self.token_cache) > 0:
600
- text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
601
- printable_text = text[self.print_len :]
602
- self.token_cache = []
603
- self.print_len = 0
604
- else:
605
- printable_text = ""
606
-
607
- self.next_tokens_are_prompt = True
608
- if self.callback:
609
- if self.callback(printable_text, 0):
610
- raise Exception("canceled")
611
- streamer = StreamerClass(self.tokenizer, streaming_callback)
612
- self.generate(
613
- inputs=input_ids,
614
- generation_config=self.generation_config,
615
- streamer = streamer,
616
- )
617
- return streamer.output.rstrip('!')
618
- except Exception as ex:
619
- return {"status": False, "error": str(ex)}
620
-
621
- def openai_generate(self,
622
- prompt,
623
- host_address=None,
624
- model_name=None,
625
- personality=None,
626
- n_predict=None,
627
- stream=False,
628
- temperature=0.1,
629
- top_k=50,
630
- top_p=0.95,
631
- repeat_penalty=0.8,
632
- repeat_last_n=40,
633
- seed=None,
634
- n_threads=8,
635
- completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
636
- service_key: str = "",
637
- streaming_callback=None):
193
+ def get_available_bindings(self) -> List[str]:
638
194
  """
639
- Generates text using the OpenAI API based on the provided prompt and parameters.
640
-
641
- Parameters:
642
- prompt (str): The input text prompt to generate completions for.
643
- host_address (str, optional): The API host address. Defaults to instance variable.
644
- model_name (str, optional): The model to use for generation. Defaults to instance variable.
645
- personality (str, optional): The personality setting for the model. Defaults to instance variable.
646
- n_predict (int, optional): The number of tokens to predict. Defaults to instance variable.
647
- stream (bool, optional): Whether to stream the response. Defaults to False.
648
- temperature (float, optional): Sampling temperature. Higher values mean more randomness. Defaults to 0.1.
649
- top_k (int, optional): The number of highest probability vocabulary tokens to keep for top-k filtering. Defaults to 50.
650
- top_p (float, optional): The cumulative probability of parameter options to keep for nucleus sampling. Defaults to 0.95.
651
- repeat_penalty (float, optional): The penalty for repeating tokens. Defaults to 0.8.
652
- repeat_last_n (int, optional): The number of last tokens to consider for repeat penalty. Defaults to 40.
653
- seed (int, optional): Random seed for reproducibility. Defaults to instance variable.
654
- n_threads (int, optional): The number of threads to use for generation. Defaults to 8.
655
- completion_format (ELF_COMPLETION_FORMAT, optional): The format of the completion request (Instruct or Chat). Defaults to ELF_COMPLETION_FORMAT.Instruct.
656
- service_key (str, optional): The API service key for authorization. Defaults to an empty string.
657
- streaming_callback (callable, optional): A callback function to handle streaming responses.
195
+ Get list of available bindings.
658
196
 
659
197
  Returns:
660
- str: The generated text response from the OpenAI API.
198
+ List[str]: List of binding names that can be used.
661
199
  """
662
- # Set default values to instance variables if optional arguments are None
663
- host_address = host_address if host_address else self.host_address
664
- model_name = model_name if model_name else self.model_name
665
- n_predict = n_predict if n_predict else self.n_predict
666
- personality = personality if personality is not None else self.personality
667
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
668
- temperature = temperature if temperature is not None else self.temperature
669
- top_k = top_k if top_k is not None else self.top_k
670
- top_p = top_p if top_p is not None else self.top_p
671
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
672
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
673
- seed = seed or self.seed # Use the instance seed if not provided
674
- n_threads = n_threads if n_threads else self.n_threads
675
- service_key = service_key if service_key else self.service_key
676
- self.client.api_key = service_key
677
- count = 0
678
- output= ""
679
-
680
-
681
- if "vision" in self.model_name:
682
- messages = [
683
- {
684
- "role": "user",
685
- "content": [
686
- {
687
- "type":"text",
688
- "text":prompt
689
- }
690
- ]
691
- }
692
- ]
693
- else:
694
- messages = [{"role": "user", "content": prompt}]
695
-
696
-
697
- if completion_format == ELF_COMPLETION_FORMAT.Chat:
698
- if "o1" in self.model_name:
699
- chat_completion = self.client.chat.completions.create(
700
- model=self.model_name, # Choose the engine according to your OpenAI plan
701
- messages=messages,
702
- n=1, # Specify the number of responses you want
703
- )
704
- output = chat_completion.choices[0].message.content
705
- else:
706
- chat_completion = self.client.chat.completions.create(
707
- model=self.model_name, # Choose the engine according to your OpenAI plan
708
- messages=messages,
709
- max_tokens=n_predict-7 if n_predict>512 else n_predict, # Adjust the desired length of the generated response
710
- n=1, # Specify the number of responses you want
711
- temperature=float(self.temperature), # Adjust the temperature for more or less randomness in the output
712
- stream=True)
713
-
714
- for resp in chat_completion:
715
- if count >= n_predict:
716
- break
717
- try:
718
- word = resp.choices[0].delta.content
719
- except Exception as ex:
720
- word = ""
721
- if streaming_callback is not None:
722
- if not streaming_callback(word):
723
- break
724
- if word:
725
- output += word
726
- count += 1
727
- else:
728
- completion = self.client.completions.create(
729
- model=self.model_name, # Choose the engine according to your OpenAI plan
730
- prompt=prompt,
731
- max_tokens=n_predict-7 if n_predict>512 else n_predict, # Adjust the desired length of the generated response
732
- n=1, # Specify the number of responses you want
733
- temperature=float(self.temperature), # Adjust the temperature for more or less randomness in the output
734
- stream=True)
735
-
736
- for resp in completion:
737
- if count >= n_predict:
738
- break
739
- try:
740
- word = resp.choices[0].text
741
- except Exception as ex:
742
- word = ""
743
- if streaming_callback is not None:
744
- if not streaming_callback(word):
745
- break
746
- if word:
747
- output += word
748
- count += 1
749
-
750
- return output
751
-
752
-
753
- def vllm_generate(self,
754
- prompt,
755
- host_address=None,
756
- model_name=None,
757
- personality=None,
758
- n_predict=None,
759
- stream=False,
760
- temperature=0.1,
761
- top_k=50,
762
- top_p=0.95,
763
- repeat_penalty=0.8,
764
- repeat_last_n=40,
765
- seed=None,
766
- n_threads=8,
767
- completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Instruct,
768
- service_key: str = "",
769
- streaming_callback=None):
200
+ return self.binding_manager.get_available_bindings()
201
+
202
+ def generate_text(self,
203
+ prompt: str,
204
+ images: Optional[List[str]] = None,
205
+ n_predict: Optional[int] = None,
206
+ stream: Optional[bool] = None,
207
+ temperature: Optional[float] = None,
208
+ top_k: Optional[int] = None,
209
+ top_p: Optional[float] = None,
210
+ repeat_penalty: Optional[float] = None,
211
+ repeat_last_n: Optional[int] = None,
212
+ seed: Optional[int] = None,
213
+ n_threads: Optional[int] = None,
214
+ ctx_size: int | None = None,
215
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> str:
770
216
  """
771
- Generates text using the OpenAI API based on the provided prompt and parameters.
217
+ Generate text using the active binding, using instance defaults if parameters are not provided.
772
218
 
773
- Parameters:
774
- prompt (str): The input text prompt to generate completions for.
775
- host_address (str, optional): The API host address. Defaults to instance variable.
776
- model_name (str, optional): The model to use for generation. Defaults to instance variable.
777
- personality (str, optional): The personality setting for the model. Defaults to instance variable.
778
- n_predict (int, optional): The number of tokens to predict. Defaults to instance variable.
779
- stream (bool, optional): Whether to stream the response. Defaults to False.
780
- temperature (float, optional): Sampling temperature. Higher values mean more randomness. Defaults to 0.1.
781
- top_k (int, optional): The number of highest probability vocabulary tokens to keep for top-k filtering. Defaults to 50.
782
- top_p (float, optional): The cumulative probability of parameter options to keep for nucleus sampling. Defaults to 0.95.
783
- repeat_penalty (float, optional): The penalty for repeating tokens. Defaults to 0.8.
784
- repeat_last_n (int, optional): The number of last tokens to consider for repeat penalty. Defaults to 40.
785
- seed (int, optional): Random seed for reproducibility. Defaults to instance variable.
786
- n_threads (int, optional): The number of threads to use for generation. Defaults to 8.
787
- completion_format (ELF_COMPLETION_FORMAT, optional): The format of the completion request (Instruct or Chat). Defaults to ELF_COMPLETION_FORMAT.Instruct.
788
- service_key (str, optional): The API service key for authorization. Defaults to an empty string.
789
- streaming_callback (callable, optional): A callback function to handle streaming responses.
219
+ Args:
220
+ prompt (str): The input prompt for text generation.
221
+ images (Optional[List[str]]): List of image file paths for multimodal generation.
222
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
223
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
224
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
225
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
226
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
227
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
228
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
229
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
230
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
231
+ streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
232
+ Uses instance default if None.
233
+ - First parameter (str): The chunk of text received from the stream.
234
+ - Second parameter (MSG_TYPE): The message type enum (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
790
235
 
791
236
  Returns:
792
- str: The generated text response from the OpenAI API.
237
+ Union[str, dict]: Generated text or error dictionary if failed.
793
238
  """
794
- # Set default values to instance variables if optional arguments are None
795
- host_address = host_address if host_address else self.host_address
796
- model_name = model_name if model_name else self.model_name
797
- n_predict = n_predict if n_predict else self.n_predict
798
- personality = personality if personality is not None else self.personality
799
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
800
- temperature = temperature if temperature is not None else self.temperature
801
- top_k = top_k if top_k is not None else self.top_k
802
- top_p = top_p if top_p is not None else self.top_p
803
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
804
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
805
- seed = seed or self.seed # Use the instance seed if not provided
806
- n_threads = n_threads if n_threads else self.n_threads
807
-
808
- if service_key != "":
809
- headers = {
810
- 'Content-Type': 'application/json',
811
- 'Authorization': f'Bearer {service_key}',
812
- }
813
- else:
814
- headers = {
815
- 'Content-Type': 'application/json',
816
- }
239
+ return self.binding.generate_text(
240
+ prompt=prompt,
241
+ images=images,
242
+ n_predict=n_predict if n_predict is not None else self.default_n_predict,
243
+ stream=stream if stream is not None else self.default_stream,
244
+ temperature=temperature if temperature is not None else self.default_temperature,
245
+ top_k=top_k if top_k is not None else self.default_top_k,
246
+ top_p=top_p if top_p is not None else self.default_top_p,
247
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
248
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
249
+ seed=seed if seed is not None else self.default_seed,
250
+ n_threads=n_threads if n_threads is not None else self.default_n_threads,
251
+ ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
252
+ streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
253
+ )
817
254
 
818
- if completion_format == ELF_COMPLETION_FORMAT.Instruct:
819
- data = {
820
- 'model': model_name,
821
- 'prompt': prompt,
822
- "stream": True,
823
- "temperature": float(temperature),
824
- "max_tokens": n_predict
825
- }
826
- completion_format_path = "/v1/completions"
827
- elif completion_format == ELF_COMPLETION_FORMAT.Chat:
828
- data = {
829
- 'model': model_name,
830
- 'messages': [{
831
- 'role': "user",
832
- 'content': prompt
833
- }],
834
- "stream": True,
835
- "temperature": float(temperature),
836
- "max_tokens": n_predict
837
- }
838
- completion_format_path = "/v1/chat/completions"
839
-
840
- if host_address.endswith("/"):
841
- host_address = host_address[:-1]
842
-
843
- url = f'{host_address}{completion_format_path}'
844
-
845
- response = requests.post(url, headers=headers, data=json.dumps(data), stream=True, verify=self.verify_ssl_certificate)
846
-
847
- if response.status_code == 400:
848
- try:
849
- content = response.content.decode("utf8")
850
- content = json.loads(content)
851
- self.error(content["error"]["message"])
852
- return
853
- except:
854
- content = response.content.decode("utf8")
855
- content = json.loads(content)
856
- self.error(content["message"])
857
- return
858
- elif response.status_code == 404:
859
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
860
-
861
- text = ""
862
- for line in response.iter_lines():
863
- decoded = line.decode("utf-8")
864
- if decoded.startswith("data: "):
865
- try:
866
- json_data = json.loads(decoded[5:].strip())
867
- if completion_format == ELF_COMPLETION_FORMAT.Chat:
868
- try:
869
- chunk = json_data["choices"][0]["delta"]["content"]
870
- except:
871
- chunk = ""
872
- else:
873
- chunk = json_data["choices"][0]["text"]
874
- # Process the JSON data here
875
- text += chunk
876
- if streaming_callback:
877
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
878
- break
879
- except:
880
- break
881
- else:
882
- if decoded.startswith("{"):
883
- for line_ in response.iter_lines():
884
- decoded += line_.decode("utf-8")
885
- try:
886
- json_data = json.loads(decoded)
887
- if json_data["object"] == "error":
888
- self.error(json_data["message"])
889
- break
890
- except:
891
- self.error("Couldn't generate text, verify your key or model name")
892
- else:
893
- text += decoded
894
- if streaming_callback:
895
- if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
896
- break
897
- return text
898
255
 
899
- def openai_generate_with_images(self,
900
- prompt,
901
- images,
902
- host_address=None,
903
- model_name=None,
904
- personality=None,
905
- n_predict=None,
906
- stream=False,
907
- temperature=0.1,
908
- top_k=50,
909
- top_p=0.95,
910
- repeat_penalty=0.8,
911
- repeat_last_n=40,
912
- seed=None,
913
- n_threads=8,
914
- max_image_width=-1,
915
- service_key: str = "",
916
- streaming_callback=None,):
917
- """Generates text out of a prompt
918
-
919
- Args:
920
- prompt (str): The prompt to use for generation
921
- n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
922
- callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
923
- verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
924
- """
925
- # Set default values to instance variables if optional arguments are None
926
- host_address = host_address if host_address else self.host_address
927
- model_name = model_name if model_name else self.model_name
928
- n_predict = n_predict if n_predict else self.n_predict
929
- personality = personality if personality is not None else self.personality
930
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
931
- temperature = temperature if temperature is not None else self.temperature
932
- top_k = top_k if top_k is not None else self.top_k
933
- top_p = top_p if top_p is not None else self.top_p
934
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
935
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
936
- seed = seed or self.seed # Use the instance seed if not provided
937
- n_threads = n_threads if n_threads else self.n_threads
938
-
939
- count = 0
940
- output = ""
941
-
942
- messages = [
943
- {
944
- "role": "user",
945
- "content": [
946
- {
947
- "type":"text",
948
- "text":prompt
949
- }
950
- ]+[
951
- {
952
- "type": "image_url",
953
- "image_url": {
954
- "url": f"data:image/jpeg;base64,{encode_image(image_path, max_image_width)}"
955
- }
956
- }
957
- for image_path in images
958
- ]
959
- }
960
- ]
961
- chat_completion = self.client.chat.completions.create(
962
- model=self.model_name, # Choose the engine according to your OpenAI plan
963
- messages=messages,
964
- max_tokens=n_predict, # Adjust the desired length of the generated response
965
- n=1, # Specify the number of responses you want
966
- temperature=temperature, # Adjust the temperature for more or less randomness in the output
967
- stream=True
968
- )
969
-
970
- for resp in chat_completion:
971
- if count >= n_predict:
972
- break
973
- try:
974
- word = resp.choices[0].delta.content
975
- except Exception as ex:
976
- word = ""
977
- if streaming_callback is not None:
978
- if not streaming_callback(word):
979
- break
980
- if word:
981
- output += word
982
- count += 1
983
- return output
984
-
985
-
986
- def ollama_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, completion_format:ELF_COMPLETION_FORMAT=ELF_COMPLETION_FORMAT.Instruct, service_key:str="", streaming_callback=None):
987
- # Set default values to instance variables if optional arguments are None
988
- host_address = host_address if host_address else self.host_address
989
- model_name = model_name if model_name else self.model_name
990
- n_predict = n_predict if n_predict else self.n_predict
991
- personality = personality if personality is not None else self.personality
992
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
993
- temperature = temperature if temperature is not None else self.temperature
994
- top_k = top_k if top_k is not None else self.top_k
995
- top_p = top_p if top_p is not None else self.top_p
996
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
997
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
998
- seed = seed or self.seed # Use the instance seed if not provided
999
- n_threads = n_threads if n_threads else self.n_threads
1000
-
1001
- if service_key!="":
1002
- headers = {
1003
- 'Content-Type': 'application/json',
1004
- 'Authorization': f'Bearer {service_key}',
1005
- }
1006
- else:
1007
- headers = {
1008
- 'Content-Type': 'application/json',
1009
- }
1010
-
1011
- data = {
1012
- 'model':model_name,
1013
- 'prompt': prompt,
1014
- "stream":stream,
1015
- "temperature": float(temperature),
1016
- "max_tokens": n_predict
1017
- }
1018
- completion_format_path = "/api/generate"
1019
- if host_address.endswith("/"):
1020
- host_address = host_address[:-1]
1021
- url = f'{host_address}{completion_format_path}'
1022
-
1023
- response = requests.post(url, json=data, headers=headers)
1024
-
1025
- if response.status_code==404:
1026
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
1027
- text = ""
1028
- if stream:
1029
- for line in response.iter_lines():
1030
- decoded = line.decode("utf-8")
1031
- json_data = json.loads(decoded)
1032
- chunk = json_data["response"]
1033
- ## Process the JSON data here
1034
- text +=chunk
1035
- if streaming_callback:
1036
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
1037
- break
1038
- return text
1039
- else:
1040
- return response.json()["response"]
1041
-
1042
- def ollama_generate_with_images(self,
1043
- prompt,
1044
- images,
1045
- host_address=None,
1046
- model_name=None,
1047
- personality=None,
1048
- n_predict=None,
1049
- stream=False,
1050
- temperature=0.1,
1051
- top_k=50,
1052
- top_p=0.95,
1053
- repeat_penalty=0.8,
1054
- repeat_last_n=40,
1055
- seed=None,
1056
- n_threads=8,
1057
- max_image_width=-1,
1058
- service_key: str = "",
1059
- streaming_callback=None,):
1060
- """Generates text out of a prompt
1061
-
1062
- Args:
1063
- prompt (str): The prompt to use for generation
1064
- n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
1065
- callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
1066
- verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
1067
- """
1068
- # Set default values to instance variables if optional arguments are None
1069
- host_address = host_address if host_address else self.host_address
1070
- model_name = model_name if model_name else self.model_name
1071
- n_predict = n_predict if n_predict else self.n_predict
1072
- personality = personality if personality is not None else self.personality
1073
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
1074
- temperature = temperature if temperature is not None else self.temperature
1075
- top_k = top_k if top_k is not None else self.top_k
1076
- top_p = top_p if top_p is not None else self.top_p
1077
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
1078
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
1079
- seed = seed or self.seed # Use the instance seed if not provided
1080
- n_threads = n_threads if n_threads else self.n_threads
1081
- if service_key != "":
1082
- headers = {
1083
- 'Content-Type': 'application/json',
1084
- 'Authorization': f'Bearer {service_key}',
1085
- }
1086
- else:
1087
- headers = {
1088
- 'Content-Type': 'application/json',
1089
- }
1090
-
1091
- images_list = []
1092
- for image in images:
1093
- images_list.append(f"{encode_image(image, max_image_width)}")
1094
-
1095
- data = {
1096
- 'model': model_name,
1097
- 'prompt': prompt,
1098
- 'images': images_list,
1099
- "raw": True,
1100
- "stream":True,
1101
- "temperature": float(temperature),
1102
- "max_tokens": n_predict
1103
- }
1104
-
1105
-
1106
- data = {
1107
- 'model': model_name,
1108
- 'messages': [
1109
- {
1110
- "role": "user",
1111
- "content": [
1112
- {
1113
- "type":"text",
1114
- "text":prompt
1115
- }
1116
- ]+[
1117
- {
1118
- "type": "image_url",
1119
- "image_url": {
1120
- "url": f"data:image/jpeg;base64,{encode_image(image_path, max_image_width)}"
1121
- }
1122
- }
1123
- for image_path in images
1124
- ]
1125
- }
1126
- ],
1127
- "stream": True,
1128
- "temperature": float(temperature),
1129
- "max_tokens": n_predict
1130
- }
1131
-
1132
- completion_format_path = "/api"
1133
-
1134
- if host_address.endswith("/"):
1135
- host_address = host_address[:-1]
1136
- url = f'{host_address}{completion_format_path}'
1137
-
1138
- response = requests.post(url, json=data, headers=headers)
1139
-
1140
- if response.status_code == 400:
1141
- try:
1142
- content = response.content.decode("utf8")
1143
- content = json.loads(content)
1144
- self.error(content["error"]["message"])
1145
- return
1146
- except:
1147
- content = response.content.decode("utf8")
1148
- content = json.loads(content)
1149
- self.error(content["message"])
1150
- return
1151
- elif response.status_code == 404:
1152
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
1153
-
1154
- text = ""
1155
- for line in response.iter_lines():
1156
- decoded = line.decode("utf-8")
1157
- if decoded.startswith("data: "):
1158
- try:
1159
- json_data = json.loads(decoded[5:].strip())
1160
- try:
1161
- chunk = json_data["choices"][0]["delta"]["content"]
1162
- except:
1163
- chunk = ""
1164
- # Process the JSON data here
1165
- text += chunk
1166
- if streaming_callback:
1167
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
1168
- break
1169
- except:
1170
- break
1171
- else:
1172
- if decoded.startswith("{"):
1173
- for line_ in response.iter_lines():
1174
- decoded += line_.decode("utf-8")
1175
- try:
1176
- json_data = json.loads(decoded)
1177
- if json_data["object"] == "error":
1178
- self.error(json_data["message"])
1179
- break
1180
- except:
1181
- self.error("Couldn't generate text, verify your key or model name")
1182
- else:
1183
- text += decoded
1184
- if streaming_callback:
1185
- if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
1186
- break
1187
- return text
1188
-
1189
- def litellm_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, completion_format:ELF_COMPLETION_FORMAT=ELF_COMPLETION_FORMAT.Instruct, service_key:str="", streaming_callback=None):
1190
- # Set default values to instance variables if optional arguments are None
1191
- host_address = host_address if host_address else self.host_address
1192
- model_name = model_name if model_name else self.model_name
1193
- n_predict = n_predict if n_predict else self.n_predict
1194
- personality = personality if personality is not None else self.personality
1195
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
1196
- temperature = temperature if temperature is not None else self.temperature
1197
- top_k = top_k if top_k is not None else self.top_k
1198
- top_p = top_p if top_p is not None else self.top_p
1199
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
1200
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
1201
- seed = seed or self.seed # Use the instance seed if not provided
1202
- n_threads = n_threads if n_threads else self.n_threads
1203
-
1204
- if service_key!="":
1205
- headers = {
1206
- 'Content-Type': 'application/json',
1207
- 'Authorization': f'Bearer {service_key}',
1208
- }
1209
- else:
1210
- headers = {
1211
- 'Content-Type': 'application/json',
1212
- }
256
+ def embed(self, text):
257
+ self.binding.embed(text)
1213
258
 
1214
- data = {
1215
- 'model':model_name,
1216
- 'prompt': prompt,
1217
- "stream":True,
1218
- "temperature": float(temperature),
1219
- "max_tokens": n_predict
1220
- }
1221
- completion_format_path = "/api/generate"
1222
- if host_address.endswith("/"):
1223
- host_address = host_address[:-1]
1224
- url = f'{host_address}{completion_format_path}'
1225
-
1226
- response = requests.post(url, json=data, headers=headers)
1227
-
1228
- if response.status_code==404:
1229
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
1230
- text = ""
1231
- for line in response.iter_lines():
1232
- decoded = line.decode("utf-8")
1233
- if decoded.startswith("{"):
1234
- json_data = json.loads(decoded)
1235
- if "error" in json_data:
1236
- self.error(json_data["error"]["message"])
1237
- break
1238
- else:
1239
- text +=decoded
1240
- if streaming_callback:
1241
- if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
1242
- break
1243
-
1244
- return text
1245
-
1246
-
1247
- def lollms_listMountedPersonalities(self, host_address:str=None):
1248
- host_address = host_address if host_address else self.host_address
1249
- url = f"{host_address}/list_mounted_personalities"
1250
-
1251
- response = requests.get(url)
1252
-
1253
- if response.status_code == 200:
1254
- try:
1255
- text = json.loads(response.content.decode("utf-8"))
1256
- return text
1257
- except Exception as ex:
1258
- return {"status": False, "error": str(ex)}
1259
- else:
1260
- return {"status": False, "error": response.text}
1261
-
1262
- def listModels(self, host_address:str=None):
1263
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
1264
- return self.lollms_listModels(host_address)
1265
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
1266
- return self.ollama_listModels(host_address)
1267
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
1268
- return self.openai_listModels(host_address)
1269
-
1270
- def lollms_listModels(self, host_address:str=None):
1271
- host_address = host_address if host_address else self.host_address
1272
- url = f"{host_address}/list_models"
1273
-
1274
- response = requests.get(url)
1275
-
1276
- if response.status_code == 200:
1277
- try:
1278
- text = json.loads(response.content.decode("utf-8"))
1279
- return text
1280
- except Exception as ex:
1281
- return {"status": False, "error": str(ex)}
1282
- else:
1283
- return {"status": False, "error": response.text}
1284
-
1285
- def ollama_listModels(self, host_address:str=None):
1286
- if host_address is None:
1287
- host_address = self.host_address
1288
- url = f'{host_address}/api/tags'
1289
- headers = {
1290
- 'accept': 'application/json',
1291
- 'Authorization': f'Bearer {self.service_key}'
1292
- }
1293
- response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
1294
- try:
1295
- data = response.json()
1296
- model_info = []
1297
-
1298
- for model in data['models']:
1299
- model_name = model['name']
1300
- owned_by = ""
1301
- created_datetime = model["modified_at"]
1302
- model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
1303
-
1304
- return model_info
1305
- except Exception as ex:
1306
- trace_exception(ex)
1307
- return []
1308
-
1309
- def openai_listModels(self, host_address:str=None):
1310
- if host_address is None:
1311
- host_address = self.host_address
1312
- url = f'{host_address}/v1/models'
1313
- headers = {
1314
- 'accept': 'application/json',
1315
- 'Authorization': f'Bearer {self.service_key}'
1316
- }
1317
- response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
1318
- try:
1319
- data = response.json()
1320
- model_info = []
1321
259
 
1322
- for model in data["data"]:
1323
- model_name = model['id']
1324
- owned_by = model['owned_by']
1325
- created_datetime = model["created"]
1326
- model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
260
+ def listModels(self):
261
+ return self.binding.listModels()
1327
262
 
1328
- return model_info
1329
- except Exception as ex:
1330
- trace_exception(ex)
1331
- return []
1332
263
 
1333
264
 
1334
265
  def generate_codes(
@@ -1376,11 +307,11 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
1376
307
  {self.ai_full_header}"""
1377
308
 
1378
309
  if len(self.image_files)>0:
1379
- response = self.generate_with_images(full_prompt, self.image_files, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
310
+ response = self.generate_text_with_images(full_prompt, self.image_files, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
1380
311
  elif len(images)>0:
1381
- response = self.generate_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
312
+ response = self.generate_text_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
1382
313
  else:
1383
- response = self.generate(full_prompt, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
314
+ response = self.generate_text(full_prompt, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
1384
315
  response_full += response
1385
316
  codes = self.extract_code_blocks(response)
1386
317
  return codes
@@ -1428,16 +359,13 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
1428
359
  full_prompt += f"""You must return a single code tag.
1429
360
  Do not split the code in multiple tags.
1430
361
  {self.ai_full_header}"""
1431
- if len(images)>0:
1432
- response = self.generate_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
1433
- else:
1434
- response = self.generate(full_prompt, max_size, False, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
362
+ response = self.generate_text(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
1435
363
  codes = self.extract_code_blocks(response)
1436
364
  if len(codes)>0:
1437
365
  if not codes[-1]["is_complete"]:
1438
366
  code = "\n".join(codes[-1]["content"].split("\n")[:-1])
1439
367
  while not codes[-1]["is_complete"]:
1440
- response = self.generate(prompt+code+self.user_full_header+"continue the code. Start from last line and continue the code. Put the code inside a markdown code tag."+self.separator_template+self.ai_full_header, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
368
+ response = self.generate_text(prompt+code+self.user_full_header+"continue the code. Start from last line and continue the code. Put the code inside a markdown code tag."+self.separator_template+self.ai_full_header, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
1441
369
  codes = self.extract_code_blocks(response)
1442
370
  if len(codes)==0:
1443
371
  break
@@ -1453,103 +381,135 @@ Do not split the code in multiple tags.
1453
381
  else:
1454
382
  return None
1455
383
 
1456
- def extract_code_blocks(self, text: str) -> List[dict]:
384
+ def extract_code_blocks(self, text: str, format: str = "markdown") -> List[dict]:
1457
385
  """
1458
- This function extracts code blocks from a given text.
386
+ Extracts code blocks from text in Markdown or HTML format.
1459
387
 
1460
388
  Parameters:
1461
- text (str): The text from which to extract code blocks. Code blocks are identified by triple backticks (```).
389
+ text (str): The text to extract code blocks from.
390
+ format (str): The format of code blocks ("markdown" for ``` or "html" for <code class="">).
1462
391
 
1463
392
  Returns:
1464
- List[dict]: A list of dictionaries where each dictionary represents a code block and contains the following keys:
1465
- - 'index' (int): The index of the code block in the text.
1466
- - 'file_name' (str): The name of the file extracted from the preceding line, if available.
1467
- - 'content' (str): The content of the code block.
1468
- - 'type' (str): The type of the code block. If the code block starts with a language specifier (like 'python' or 'java'), this field will contain that specifier. Otherwise, it will be set to 'language-specific'.
1469
- - 'is_complete' (bool): True if the block has a closing tag, False otherwise.
1470
-
1471
- Note:
1472
- The function assumes that the number of triple backticks in the text is even.
1473
- If the number of triple backticks is odd, it will consider the rest of the text as the last code block.
1474
- """
393
+ List[dict]: A list of dictionaries with:
394
+ - 'index' (int): Index of the code block.
395
+ - 'file_name' (str): File name from preceding text, if available.
396
+ - 'content' (str): Code block content.
397
+ - 'type' (str): Language type (from Markdown first line or HTML class).
398
+ - 'is_complete' (bool): True if block has a closing tag.
399
+ """
400
+ code_blocks = []
1475
401
  remaining = text
1476
- bloc_index = 0
1477
402
  first_index = 0
1478
403
  indices = []
1479
- while len(remaining) > 0:
1480
- try:
1481
- index = remaining.index("```")
1482
- indices.append(index + first_index)
1483
- remaining = remaining[index + 3:]
1484
- first_index += index + 3
1485
- bloc_index += 1
1486
- except Exception as ex:
1487
- if bloc_index % 2 == 1:
1488
- index = len(remaining)
1489
- indices.append(index)
1490
- remaining = ""
1491
404
 
1492
- code_blocks = []
1493
- is_start = True
1494
- for index, code_delimiter_position in enumerate(indices):
405
+ if format.lower() == "markdown":
406
+ # Markdown: Find triple backtick positions
407
+ while remaining:
408
+ try:
409
+ index = remaining.index("```")
410
+ indices.append(index + first_index)
411
+ remaining = remaining[index + 3:]
412
+ first_index += index + 3
413
+ except ValueError:
414
+ if len(indices) % 2 == 1: # Odd number of delimiters
415
+ indices.append(first_index + len(remaining))
416
+ break
417
+
418
+ elif format.lower() == "html":
419
+ # HTML: Find <code> and </code> positions, handling nested tags
420
+ while remaining:
421
+ try:
422
+ # Look for opening <code> tag
423
+ start_index = remaining.index("<code")
424
+ end_of_opening = remaining.index(">", start_index)
425
+ indices.append(start_index + first_index)
426
+ opening_tag = remaining[start_index:end_of_opening + 1]
427
+ remaining = remaining[end_of_opening + 1:]
428
+ first_index += end_of_opening + 1
429
+
430
+ # Look for matching </code>, accounting for nested <code>
431
+ nest_level = 0
432
+ temp_index = 0
433
+ while temp_index < len(remaining):
434
+ if remaining[temp_index:].startswith("<code"):
435
+ nest_level += 1
436
+ temp_index += remaining[temp_index:].index(">") + 1
437
+ elif remaining[temp_index:].startswith("</code>"):
438
+ if nest_level == 0:
439
+ indices.append(first_index + temp_index)
440
+ remaining = remaining[temp_index + len("</code>"):]
441
+ first_index += temp_index + len("</code>")
442
+ break
443
+ nest_level -= 1
444
+ temp_index += len("</code>")
445
+ else:
446
+ temp_index += 1
447
+ else:
448
+ indices.append(first_index + len(remaining))
449
+ break
450
+ except ValueError:
451
+ break
452
+
453
+ else:
454
+ raise ValueError("Format must be 'markdown' or 'html'")
455
+
456
+ for i in range(0, len(indices), 2):
1495
457
  block_infos = {
1496
- 'index': index,
458
+ 'index': i // 2,
1497
459
  'file_name': "",
1498
- 'section': "",
1499
460
  'content': "",
1500
- 'type': "",
461
+ 'type': 'language-specific',
1501
462
  'is_complete': False
1502
463
  }
1503
- if is_start:
1504
- # Check the preceding line for file name
1505
- preceding_text = text[:code_delimiter_position].strip().splitlines()
1506
- if preceding_text:
1507
- last_line = preceding_text[-1].strip()
1508
- if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
1509
- file_name = last_line[len("<file_name>"):-len("</file_name>")].strip()
1510
- block_infos['file_name'] = file_name
1511
- elif last_line.startswith("## filename:"):
1512
- file_name = last_line[len("## filename:"):].strip()
1513
- block_infos['file_name'] = file_name
1514
- if last_line.startswith("<section>") and last_line.endswith("</section>"):
1515
- section = last_line[len("<section>"):-len("</section>")].strip()
1516
- block_infos['section'] = section
1517
-
1518
- sub_text = text[code_delimiter_position + 3:]
1519
- if len(sub_text) > 0:
1520
- try:
1521
- find_space = sub_text.index(" ")
1522
- except:
1523
- find_space = int(1e10)
1524
- try:
1525
- find_return = sub_text.index("\n")
1526
- except:
1527
- find_return = int(1e10)
1528
- next_index = min(find_return, find_space)
1529
- if '{' in sub_text[:next_index]:
1530
- next_index = 0
1531
- start_pos = next_index
1532
- if code_delimiter_position + 3 < len(text) and text[code_delimiter_position + 3] in ["\n", " ", "\t"]:
1533
- block_infos["type"] = 'language-specific'
1534
- else:
1535
- block_infos["type"] = sub_text[:next_index]
1536
464
 
1537
- if index + 1 < len(indices):
1538
- next_pos = indices[index + 1] - code_delimiter_position
1539
- if next_pos - 3 < len(sub_text) and sub_text[next_pos - 3] == "`":
1540
- block_infos["content"] = sub_text[start_pos:next_pos - 3].strip()
1541
- block_infos["is_complete"] = True
1542
- else:
1543
- block_infos["content"] = sub_text[start_pos:next_pos].strip()
1544
- block_infos["is_complete"] = False
1545
- else:
1546
- block_infos["content"] = sub_text[start_pos:].strip()
1547
- block_infos["is_complete"] = False
1548
- code_blocks.append(block_infos)
1549
- is_start = False
1550
- else:
1551
- is_start = True
1552
- continue
465
+ # Extract preceding text for file name
466
+ start_pos = indices[i]
467
+ preceding_text = text[:start_pos].strip().splitlines()
468
+ if preceding_text:
469
+ last_line = preceding_text[-1].strip()
470
+ if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
471
+ block_infos['file_name'] = last_line[len("<file_name>"):-len("</file_name>")].strip()
472
+ elif last_line.startswith("## filename:"):
473
+ block_infos['file_name'] = last_line[len("## filename:"):].strip()
474
+
475
+ # Extract content and type
476
+ if format.lower() == "markdown":
477
+ sub_text = text[start_pos + 3:]
478
+ if i + 1 < len(indices):
479
+ end_pos = indices[i + 1]
480
+ content = text[start_pos + 3:end_pos].strip()
481
+ block_infos['is_complete'] = True
482
+ else:
483
+ content = sub_text.strip()
484
+ block_infos['is_complete'] = False
485
+
486
+ if content:
487
+ first_line = content.split('\n', 1)[0].strip()
488
+ if first_line and not first_line.startswith(('{', ' ', '\t')):
489
+ block_infos['type'] = first_line
490
+ content = content[len(first_line):].strip()
491
+
492
+ elif format.lower() == "html":
493
+ opening_tag = text[start_pos:text.index(">", start_pos) + 1]
494
+ sub_text = text[start_pos + len(opening_tag):]
495
+ if i + 1 < len(indices):
496
+ end_pos = indices[i + 1]
497
+ content = text[start_pos + len(opening_tag):end_pos].strip()
498
+ block_infos['is_complete'] = True
499
+ else:
500
+ content = sub_text.strip()
501
+ block_infos['is_complete'] = False
502
+
503
+ # Extract language from class attribute
504
+ if 'class="' in opening_tag:
505
+ class_start = opening_tag.index('class="') + len('class="')
506
+ class_end = opening_tag.index('"', class_start)
507
+ class_value = opening_tag[class_start:class_end]
508
+ if class_value.startswith("language-"):
509
+ block_infos['type'] = class_value[len("language-"):]
510
+
511
+ block_infos['content'] = content
512
+ code_blocks.append(block_infos)
1553
513
 
1554
514
  return code_blocks
1555
515
 
@@ -1631,7 +591,7 @@ Do not split the code in multiple tags.
1631
591
  }
1632
592
  """
1633
593
 
1634
- response = self.generate_code(
594
+ response = self.generate_text_code(
1635
595
  prompt=prompt,
1636
596
  template=template,
1637
597
  language="json",
@@ -1699,7 +659,7 @@ Do not split the code in multiple tags.
1699
659
  else:
1700
660
  prompt += "{\"index\": (the selected answer index)}"
1701
661
 
1702
- response = self.generate_code(prompt, language="json", max_size=max_answer_length,
662
+ response = self.generate_text_code(prompt, language="json", max_size=max_answer_length,
1703
663
  accept_all_if_no_code_tags_is_present=True, return_full_generated_code=False, callback=callback)
1704
664
 
1705
665
  try:
@@ -1761,7 +721,7 @@ Do not split the code in multiple tags.
1761
721
  else:
1762
722
  prompt += "{\"ranking\": (list of indices ordered from best to worst)}"
1763
723
 
1764
- response = self.generate_code(prompt, language="json", return_full_generated_code=False, callback=callback)
724
+ response = self.generate_text_code(prompt, language="json", return_full_generated_code=False, callback=callback)
1765
725
 
1766
726
  try:
1767
727
  result = json.loads(response)
@@ -1883,7 +843,7 @@ Do not discuss the information inside thememory, just put the relevant informati
1883
843
  ASCIIColors.yellow(f" ----- {chunk_id-1} ------")
1884
844
  ASCIIColors.red(prompt)
1885
845
 
1886
- memory = self.generate(prompt, n_predict=ctx_size//4, streaming_callback=callback).strip()
846
+ memory = self.generate_text(prompt, n_predict=ctx_size//4, streaming_callback=callback).strip()
1887
847
  code = self.extract_code_blocks(memory)
1888
848
  if code:
1889
849
  memory=code[0]["content"]
@@ -1919,171 +879,206 @@ The updated memory must be put in a {chunk_processing_output_format} markdown ta
1919
879
 
1920
880
  # Generate final summary
1921
881
  final_prompt = final_prompt_template
1922
- memory = self.generate(final_prompt, streaming_callback=callback)
882
+ memory = self.generate_text(final_prompt, streaming_callback=callback)
1923
883
  code = self.extract_code_blocks(memory)
1924
884
  if code:
1925
885
  memory=code[0]["content"]
1926
886
  return memory
1927
887
 
1928
- def deepsearch(
1929
- self,
1930
- query: str,
1931
- text: str = None,
1932
- files: list = None,
1933
- search_prompt: str = "Extract information related to the query from the current text chunk and update the memory with new findings.",
1934
- aggregation_prompt: str = None,
1935
- output_format: str = "markdown",
1936
- ctx_size: int = None,
1937
- chunk_size: int = None,
1938
- bootstrap_chunk_size: int = None,
1939
- bootstrap_steps: int = None,
1940
- callback=None,
1941
- debug: bool = False
1942
- ):
1943
- """
1944
- Searches for specific information related to a query in a long text or a list of files.
1945
- Processes the input in chunks, updates a memory with relevant findings, and optionally aggregates them.
1946
-
1947
- Parameters:
1948
- - query (str): The query to search for.
1949
- - text (str, optional): The input text to search in. Defaults to None.
1950
- - files (list, optional): List of file paths to search in. Defaults to None.
1951
- - search_prompt (str, optional): Prompt for processing each chunk. Defaults to a standard extraction prompt.
1952
- - aggregation_prompt (str, optional): Prompt for aggregating findings. Defaults to None.
1953
- - output_format (str, optional): Output format. Defaults to "markdown".
1954
- - ctx_size (int, optional): Context size for the model. Defaults to None (uses self.ctx_size).
1955
- - chunk_size (int, optional): Size of each chunk. Defaults to None (ctx_size // 4). Smaller chunk sizes yield better results but is slower.
1956
- - bootstrap_chunk_size (int, optional): Size for initial chunks. Defaults to None.
1957
- - bootstrap_steps (int, optional): Number of initial chunks using bootstrap size. Defaults to None.
1958
- - callback (callable, optional): Function called after each chunk. Defaults to None.
1959
- - debug (bool, optional): Enable debug output. Defaults to False.
1960
888
 
1961
- Returns:
1962
- - str: The search findings or aggregated output in the specified format.
1963
- """
1964
- # Set defaults
1965
- if ctx_size is None:
1966
- ctx_size = self.ctx_size
1967
- if chunk_size is None:
1968
- chunk_size = ctx_size // 4
1969
-
1970
- # Prepare input
1971
- if files:
1972
- all_texts = [(file, open(file, 'r', encoding='utf-8').read()) for file in files]
1973
- elif text:
1974
- all_texts = [("input_text", text)]
1975
- else:
1976
- raise ValueError("Either text or files must be provided.")
889
+ def update_memory_from_file_chunk_prompt(self, file_name, file_chunk_id, global_chunk_id, chunk, memory, memory_template, query, task_prompt):
890
+ return f"""{self.system_full_header}
891
+ You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a markdown memory of findings at each step.
1977
892
 
1978
- # Initialize memory and chunk counter
1979
- memory = ""
1980
- chunk_id = 0
893
+ Your goal is to extract relevant information from each text chunk and update the provided markdown memory structure, ensuring no key details are omitted or invented. Maintain the structure of the JSON template.
1981
894
 
1982
- # Define search prompt template using f-string and the provided search_prompt
1983
- search_prompt_template = f"""{self.system_full_header}
1984
- You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a memory of findings at each step.
895
+ ----
896
+ # Current file: {file_name}
897
+ # Chunk number in this file: {file_chunk_id}
898
+ # Global chunk number: {global_chunk_id}
899
+ # Text chunk:
900
+ ```markdown
901
+ {chunk}
902
+ ```
903
+ {'Current findings memory (cumulative across all files):' if memory!="" else 'Memory template:'}
904
+ ```markdown
905
+ {memory if memory!="" else memory_template}
906
+ ```
907
+ {self.user_full_header}
908
+ Query: '{query}'
909
+ Task: {task_prompt}
910
+ Update the markdown memory by adding new information from this chunk relevant to the query. Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
911
+ Ensure the output is valid markdown matching the structure of the provided template.
912
+ Make sure to extract only information relevant to answering the user's query or providing important contextual information.
913
+ Return the updated markdown memory inside a markdown code block.
914
+ {self.ai_full_header}
915
+ """
1985
916
 
1986
- Your goal is to extract and combine relevant information from each text chunk with the existing memory, ensuring no key details are omitted or invented.
917
+ def update_memory_from_file_chunk_prompt_markdown(self, file_name, file_chunk_id, global_chunk_id, chunk, memory, query):
918
+ return f"""{self.system_full_header}
919
+ You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a markdown memory of findings at each step.
1987
920
 
921
+ Your goal is to extract relevant information from each text chunk and update the provided markdown memory structure, ensuring no key details are omitted or invented. Maintain the structure of the markdown template.
1988
922
 
1989
923
  ----
1990
- # Chunk number: {{chunk_id}}
924
+ # Current file: {file_name}
925
+ # Chunk number in this file: {file_chunk_id}
926
+ # Global chunk number: {global_chunk_id}
1991
927
  # Text chunk:
1992
928
  ```markdown
1993
- {{chunk}}
929
+ {chunk}
1994
930
  ```
1995
-
1996
- Current findings memory:
931
+ Current findings memory (cumulative across all files):
1997
932
  ```markdown
1998
- {{memory}}
933
+ {memory}
1999
934
  ```
2000
935
  {self.user_full_header}
2001
936
  Query: '{query}'
2002
- Task: {search_prompt}
2003
-
2004
- Update the memory by adding new relevant information from this chunk. Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
2005
- Make sure to extrafct only information relevant to be able to answer the query of the user or at least gives important contextual information that can be completed to answer the user query.
937
+ {'Start Creating a memory from the text chunk in a format adapted to answer the user Query' if memory=="" else 'Update the markdown memory by adding new information from this chunk relevant to the query.'} Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
938
+ {'Ensure the output is valid markdown matching the structure of the current memory' if memory!='' else 'Ensure the output is valid markdown matching the structure of the provided template.'}
939
+ Make sure to extract only information relevant to answering the user's query or providing important contextual information.
940
+ Return the updated markdown memory inside a markdown code block.
2006
941
  {self.ai_full_header}
2007
942
  """
2008
943
 
2009
- # Calculate static prompt tokens
2010
- example_prompt = search_prompt_template.replace("{{chunk_id}}", "0")\
2011
- .replace("{{memory}}", "")\
2012
- .replace("{{chunk}}", "")
2013
- static_tokens = len(self.tokenize(example_prompt))
2014
-
2015
- # Process each text (file or input)
2016
- for file_name, file_text in all_texts:
2017
- file_tokens = self.tokenize(file_text)
2018
- start_token_idx = 0
2019
-
2020
- while start_token_idx < len(file_tokens):
2021
- # Calculate available tokens
2022
- current_memory_tokens = len(self.tokenize(memory))
2023
- available_tokens = ctx_size - static_tokens - current_memory_tokens
2024
- if available_tokens <= 0:
2025
- raise ValueError("Memory too large - consider reducing chunk size or increasing context window")
2026
-
2027
- # Adjust chunk size
2028
- actual_chunk_size = (
2029
- min(bootstrap_chunk_size, available_tokens)
2030
- if bootstrap_chunk_size is not None and bootstrap_steps is not None and chunk_id < bootstrap_steps
2031
- else min(chunk_size, available_tokens)
2032
- )
2033
-
2034
- end_token_idx = min(start_token_idx + actual_chunk_size, len(file_tokens))
2035
- chunk_tokens = file_tokens[start_token_idx:end_token_idx]
2036
- chunk = self.detokenize(chunk_tokens)
2037
-
2038
- # Generate updated memory
2039
- prompt = search_prompt_template.replace("{chunk_id}", str(chunk_id))\
2040
- .replace("{memory}", memory)\
2041
- .replace("{chunk}", chunk)
2042
- if debug:
2043
- print(f"----- Chunk {chunk_id} from {file_name} ------")
2044
- print(prompt)
2045
-
2046
- output = self.generate(prompt, n_predict=ctx_size // 4, streaming_callback=callback).strip()
2047
- code = self.extract_code_blocks(output)
2048
- memory = code[0]["content"] if code else output
2049
-
2050
- if debug:
2051
- print("----- Updated Memory ------")
2052
- print(memory)
2053
- print("---------------------------")
2054
-
2055
- start_token_idx = end_token_idx
2056
- chunk_id += 1
2057
-
2058
- # Aggregate findings if requested
2059
- if aggregation_prompt:
2060
- final_prompt = f"""{self.system_full_header}
2061
- You are a search results aggregator.
944
+ def deep_analyze(
945
+ self,
946
+ query: str,
947
+ text: str = None,
948
+ files: list = None,
949
+ aggregation_prompt: str = None,
950
+ output_format: str = "markdown",
951
+ ctx_size: int = None,
952
+ chunk_size: int = None,
953
+ bootstrap_chunk_size: int = None,
954
+ bootstrap_steps: int = None,
955
+ callback=None,
956
+ debug: bool = False
957
+ ):
958
+ """
959
+ Searches for specific information related to a query in a long text or a list of files.
960
+ Processes each file separately in chunks, updates a shared markdown memory with relevant findings, and optionally aggregates them.
961
+
962
+ Parameters:
963
+ - query (str): The query to search for.
964
+ - text (str, optional): The input text to search in. Defaults to None.
965
+ - files (list, optional): List of file paths to search in. Defaults to None.
966
+ - task_prompt (str, optional): Prompt for processing each chunk. Defaults to a standard markdown extraction prompt.
967
+ - aggregation_prompt (str, optional): Prompt for aggregating findings. Defaults to None.
968
+ - output_format (str, optional): Output format. Defaults to "markdown".
969
+ - ctx_size (int, optional): Context size for the model. Defaults to None (uses self.ctx_size).
970
+ - chunk_size (int, optional): Size of each chunk. Defaults to None (ctx_size // 4). Smaller chunk sizes yield better results but are slower.
971
+ - bootstrap_chunk_size (int, optional): Size for initial chunks. Defaults to None.
972
+ - bootstrap_steps (int, optional): Number of initial chunks using bootstrap size. Defaults to None.
973
+ - callback (callable, optional): Function called after each chunk. Defaults to None.
974
+ - debug (bool, optional): Enable debug output. Defaults to False.
975
+
976
+ Returns:
977
+ - str: The search findings or aggregated output in the specified format.
978
+ """
979
+ # Set defaults
980
+ if ctx_size is None:
981
+ ctx_size = self.default_ctx_size
982
+ if chunk_size is None:
983
+ chunk_size = ctx_size // 4
984
+
985
+ # Prepare input
986
+ if files:
987
+ all_texts = [(file, open(file, 'r', encoding='utf-8').read()) for file in files]
988
+ elif text:
989
+ all_texts = [("input_text", text)]
990
+ else:
991
+ raise ValueError("Either text or files must be provided.")
992
+
993
+ # Set default memory template for article analysis if none provided
994
+ memory = ""
2062
995
 
996
+ # Initialize global chunk counter
997
+ global_chunk_id = 0
998
+
999
+ # Calculate static prompt tokens
1000
+ example_prompt = self.update_memory_from_file_chunk_prompt_markdown("example.txt","0", "0", "", "", query)
1001
+ static_tokens = len(self.tokenize(example_prompt))
1002
+
1003
+ # Process each file separately
1004
+ for file_name, file_text in all_texts:
1005
+ file_tokens = self.tokenize(file_text)
1006
+ start_token_idx = 0
1007
+ file_chunk_id = 0 # Reset chunk counter for each file
1008
+
1009
+ while start_token_idx < len(file_tokens):
1010
+ # Calculate available tokens
1011
+ current_memory_tokens = len(self.tokenize(memory))
1012
+ available_tokens = ctx_size - static_tokens - current_memory_tokens
1013
+ if available_tokens <= 0:
1014
+ raise ValueError("Memory too large - consider reducing chunk size or increasing context window")
1015
+
1016
+ # Adjust chunk size
1017
+ actual_chunk_size = (
1018
+ min(bootstrap_chunk_size, available_tokens)
1019
+ if bootstrap_chunk_size is not None and bootstrap_steps is not None and global_chunk_id < bootstrap_steps
1020
+ else min(chunk_size, available_tokens)
1021
+ )
1022
+
1023
+ end_token_idx = min(start_token_idx + actual_chunk_size, len(file_tokens))
1024
+ chunk_tokens = file_tokens[start_token_idx:end_token_idx]
1025
+ chunk = self.detokenize(chunk_tokens)
1026
+
1027
+ # Generate updated memory
1028
+ prompt = self.update_memory_from_file_chunk_prompt_markdown(
1029
+ file_name=file_name,
1030
+ file_chunk_id=file_chunk_id,
1031
+ global_chunk_id=global_chunk_id,
1032
+ chunk=chunk,
1033
+ memory=memory,
1034
+ query=query)
1035
+ if debug:
1036
+ print(f"----- Chunk {file_chunk_id} (Global {global_chunk_id}) from {file_name} ------")
1037
+ print(prompt)
1038
+
1039
+ output = self.generate_text(prompt, n_predict=ctx_size // 4, streaming_callback=callback).strip()
1040
+ code = self.extract_code_blocks(output)
1041
+ if code:
1042
+ memory = code[0]["content"]
1043
+ else:
1044
+ memory = output
1045
+
1046
+ if debug:
1047
+ ASCIIColors.red("----- Updated Memory ------")
1048
+ ASCIIColors.white(memory)
1049
+ ASCIIColors.red("---------------------------")
1050
+
1051
+ start_token_idx = end_token_idx
1052
+ file_chunk_id += 1
1053
+ global_chunk_id += 1
1054
+
1055
+ # Aggregate findings if requested
1056
+ if aggregation_prompt:
1057
+ final_prompt = f"""{self.system_full_header}
1058
+ You are a search results aggregator.
2063
1059
  {self.user_full_header}
2064
1060
  {aggregation_prompt}
2065
-
2066
- Collected findings:
1061
+ Collected findings (across all files):
2067
1062
  ```markdown
2068
1063
  {memory}
2069
1064
  ```
2070
-
2071
1065
  Provide the final output in {output_format} format.
2072
1066
  {self.ai_full_header}
2073
1067
  """
2074
- final_output = self.generate(final_prompt, streaming_callback=callback)
2075
- code = self.extract_code_blocks(final_output)
2076
- return code[0]["content"] if code else final_output
2077
- return memory
1068
+ final_output = self.generate_text(final_prompt, streaming_callback=callback)
1069
+ code = self.extract_code_blocks(final_output)
1070
+ return code[0]["content"] if code else final_output
1071
+ return memory
1072
+
2078
1073
  def error(self, content, duration:int=4, client_id=None, verbose:bool=True):
2079
1074
  ASCIIColors.error(content)
2080
1075
 
2081
1076
 
2082
1077
 
2083
1078
  if __name__=="__main__":
2084
- #lc = LollmsClient("http://localhost:9600")
1079
+ lc = LollmsClient("ollama", model_name="mistral-nemo:latest")
2085
1080
  #lc = LollmsClient("http://localhost:11434", model_name="mistral-nemo:latest", default_generation_mode=ELF_GENERATION_FORMAT.OLLAMA)
2086
- lc = LollmsClient(model_name="gpt-3.5-turbo-0125", default_generation_mode=ELF_GENERATION_FORMAT.OPENAI)
1081
+ #lc = LollmsClient(model_name="gpt-3.5-turbo-0125", default_generation_mode=ELF_GENERATION_FORMAT.OPENAI)
2087
1082
  print(lc.listModels())
2088
1083
  code = lc.generate_code("Build a simple json that containes name and age. put the output inside a json markdown tag")
2089
1084
  print(code)