lollms-client 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -1,138 +1,110 @@
1
1
  import requests
2
2
  from ascii_colors import ASCIIColors, trace_exception
3
- from lollms_client.lollms_types import MSG_TYPE
3
+ from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
4
4
  from lollms_client.lollms_utilities import encode_image
5
+ from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
5
6
  import json
6
7
  from enum import Enum
7
- import tiktoken
8
8
  import base64
9
9
  import requests
10
10
  import pipmaster as pm
11
- from typing import List, Optional, Callable, Union
11
+ from typing import List, Optional, Callable, Union, Dict
12
12
  import numpy as np
13
13
  import pipmaster as pm
14
+ from pathlib import Path
14
15
  import os
15
16
 
16
- class ELF_GENERATION_FORMAT(Enum):
17
- LOLLMS = 0
18
- OPENAI = 1
19
- OLLAMA = 2
20
- LITELLM = 3
21
- TRANSFORMERS = 4
22
- VLLM = 5
23
-
24
- @classmethod
25
- def from_string(cls, format_string: str) -> 'ELF_GENERATION_FORMAT':
26
- format_mapping = {
27
- "LOLLMS": cls.LOLLMS,
28
- "OPENAI": cls.OPENAI,
29
- "OLLAMA": cls.OLLAMA,
30
- "LITELLM": cls.LITELLM,
31
- "TRANSFORMERS": cls.TRANSFORMERS,
32
- "VLLM": cls.VLLM
33
- }
34
-
35
- try:
36
- return format_mapping[format_string.upper()]
37
- except KeyError:
38
- raise ValueError(f"Invalid format string: {format_string}. Must be one of {list(format_mapping.keys())}.")
39
17
 
40
- class ELF_COMPLETION_FORMAT(Enum):
41
- Instruct = 0
42
- Chat = 1
43
- @classmethod
44
- def from_string(cls, format_string: str) -> 'ELF_COMPLETION_FORMAT':
45
- format_mapping = {
46
- "Instruct": cls.Instruct,
47
- "Chat": cls.Chat,
48
- }
18
+ class LollmsClient():
19
+ """Core class for interacting with LOLLMS bindings"""
20
+ def __init__(self,
21
+ binding_name: str = "lollms",
22
+ host_address: Optional[str] = None,
23
+ model_name: str = "",
24
+ service_key: Optional[str] = None,
25
+ verify_ssl_certificate: bool = True,
26
+ personality: Optional[int] = None,
27
+ llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
28
+ binding_config: Optional[Dict[str, any]] = None,
29
+ ctx_size: Optional[int] = 8192,
30
+ n_predict: Optional[int] = 4096,
31
+ stream: bool = False,
32
+ temperature: float = 0.1,
33
+ top_k: int = 50,
34
+ top_p: float = 0.95,
35
+ repeat_penalty: float = 0.8,
36
+ repeat_last_n: int = 40,
37
+ seed: Optional[int] = None,
38
+ n_threads: int = 8,
39
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
40
+ user_name ="user",
41
+ ai_name = "assistant"):
42
+ """
43
+ Initialize the LollmsCore with a binding and generation parameters.
49
44
 
50
- try:
51
- return format_mapping[format_string.upper()]
52
- except KeyError:
53
- raise ValueError(f"Invalid format string: {format_string}. Must be one of {list(format_mapping.keys())}.")
45
+ Args:
46
+ binding_name (str): Name of the binding to use (e.g., "lollms", "ollama").
47
+ host_address (Optional[str]): Host address for the service. Overrides binding default if provided.
48
+ model_name (str): Name of the model to use. Defaults to empty string.
49
+ service_key (Optional[str]): Authentication key for the service.
50
+ verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
51
+ personality (Optional[int]): Personality ID (used only by LOLLMS binding).
52
+ llm_bindings_dir (Path): Directory containing binding implementations.
53
+ Defaults to the "bindings" subdirectory relative to this file's location.
54
+ binding_config (Optional[Dict[str, any]]): Additional configuration for the binding.
55
+ n_predict (Optional[int]): Maximum number of tokens to generate. Default for generate_text.
56
+ stream (bool): Whether to stream the output. Defaults to False for generate_text.
57
+ temperature (float): Sampling temperature. Defaults to 0.1 for generate_text.
58
+ top_k (int): Top-k sampling parameter. Defaults to 50 for generate_text.
59
+ top_p (float): Top-p sampling parameter. Defaults to 0.95 for generate_text.
60
+ repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8 for generate_text.
61
+ repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
62
+ seed (Optional[int]): Random seed for generation. Default for generate_text.
63
+ n_threads (int): Number of threads to use. Defaults to 8 for generate_text.
64
+ streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
65
+ Default for generate_text. Takes a string chunk and an MSG_TYPE enum value.
66
+
67
+ Raises:
68
+ ValueError: If the specified binding cannot be created.
69
+ """
70
+ self.binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
71
+ self.binding_config = binding_config or {}
54
72
 
55
- class LollmsClient():
56
- def __init__(
57
- self,
58
- host_address=None,
59
- model_name=None,
60
- ctx_size=32000,
61
- personality=-1,
62
- n_predict=4096,
63
- min_n_predict=512,
64
- temperature=0.1,
65
- top_k=50,
66
- top_p=0.95,
67
- repeat_penalty=0.8,
68
- repeat_last_n=40,
69
- seed=None,
70
- n_threads=8,
71
- service_key:str="",
72
- tokenizer=None,
73
- default_generation_mode=ELF_GENERATION_FORMAT.LOLLMS,
74
- verify_ssl_certificate = True,
75
- user_name = "user",
76
- ai_name = "assistant"
77
- ) -> None:
78
- import tiktoken
73
+ # Store generation parameters as instance variables
74
+ self.default_ctx_size = ctx_size
75
+ self.default_n_predict = n_predict
76
+ self.default_stream = stream
77
+ self.default_temperature = temperature
78
+ self.default_top_k = top_k
79
+ self.default_top_p = top_p
80
+ self.default_repeat_penalty = repeat_penalty
81
+ self.default_repeat_last_n = repeat_last_n
82
+ self.default_seed = seed
83
+ self.default_n_threads = n_threads
84
+ self.default_streaming_callback = streaming_callback
85
+
86
+ # Create the binding instance
87
+ self.binding = self.binding_manager.create_binding(
88
+ binding_name=binding_name,
89
+ host_address=host_address,
90
+ model_name=model_name,
91
+ service_key=service_key,
92
+ verify_ssl_certificate=verify_ssl_certificate,
93
+ personality=personality
94
+ )
95
+
96
+ if self.binding is None:
97
+ raise ValueError(f"Failed to create binding: {binding_name}. Available bindings: {self.binding_manager.get_available_bindings()}")
98
+
99
+ # Apply additional configuration if provided
100
+ if binding_config:
101
+ for key, value in binding_config.items():
102
+ setattr(self.binding, key, value)
79
103
  self.user_name = user_name
80
104
  self.ai_name = ai_name
81
- self.host_address=host_address
82
- if not self.host_address:
83
- if default_generation_mode==ELF_GENERATION_FORMAT.LOLLMS:
84
- self.host_address = "http://localhost:9600"
85
- elif default_generation_mode==ELF_GENERATION_FORMAT.OPENAI:
86
- self.host_address = "https://api.openai.com"
87
- elif default_generation_mode==ELF_GENERATION_FORMAT.OLLAMA:
88
- self.host_address = "http://localhost:11434"
89
- else:
90
- self.host_address = "http://localhost:9600"
91
-
92
- self.model_name = model_name
93
- self.ctx_size = ctx_size
94
- self.n_predict = n_predict
95
- self.min_n_predict = min_n_predict
96
- self.personality = personality
97
- self.temperature = temperature
98
- self.top_k = top_k
99
- self.top_p = top_p
100
- self.repeat_penalty = repeat_penalty
101
- self.repeat_last_n = repeat_last_n
102
- self.seed = seed
103
- self.n_threads = n_threads
104
105
  self.service_key = service_key
105
- if not self.service_key and default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
106
- self.service_key = os.getenv("OPENAI_API_KEY","")
107
- self.default_generation_mode = default_generation_mode
108
- self.verify_ssl_certificate = verify_ssl_certificate
109
- self.tokenizer = tiktoken.model.encoding_for_model("gpt-3.5-turbo-1106") if tokenizer is None else tokenizer
110
- if default_generation_mode == ELF_GENERATION_FORMAT.TRANSFORMERS:
111
- if not pm.is_installed("torch"):
112
- ASCIIColors.yellow("Diffusers: Torch not found. Installing it")
113
- pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
114
-
115
- import torch
116
- if not torch.cuda.is_available():
117
- ASCIIColors.yellow("Diffusers: Torch not using cuda. Reinstalling it")
118
- pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
119
- import torch
120
-
121
- if not pm.is_installed("transformers"):
122
- pm.install_or_update("transformers")
123
- from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
124
- self.tokenizer = AutoTokenizer.from_pretrained(
125
- str(model_name), trust_remote_code=False
126
- )
127
-
128
- self.model = AutoModelForCausalLM.from_pretrained(
129
- str(model_name),
130
- device_map="auto",
131
- load_in_4bit=True,
132
- torch_dtype=torch.bfloat16 # Load in float16 for quantization
133
- )
134
- self.generation_config = GenerationConfig.from_pretrained(str(model_name))
135
106
 
107
+ self.verify_ssl_certificate = verify_ssl_certificate
136
108
  self.start_header_id_template ="!@>"
137
109
  self.end_header_id_template =": "
138
110
  self.system_message_template ="system"
@@ -144,1178 +116,149 @@ class LollmsClient():
144
116
  self.end_ai_header_id_template =": "
145
117
  self.end_ai_message_id_template =""
146
118
 
147
- if default_generation_mode==ELF_GENERATION_FORMAT.OPENAI:
148
- if not pm.is_installed("openai"):
149
- pm.install("openai")
150
- import openai
151
- self.client = openai.OpenAI(base_url=host_address)
152
-
153
119
 
154
120
  @property
155
121
  def system_full_header(self) -> str:
156
122
  """Get the start_header_id_template."""
157
123
  return f"{self.start_header_id_template}{self.system_message_template}{self.end_header_id_template}"
124
+
125
+ def system_custom_header(self, ai_name) -> str:
126
+ """Get the start_header_id_template."""
127
+ return f"{self.start_header_id_template}{ai_name}{self.end_header_id_template}"
128
+
158
129
  @property
159
130
  def user_full_header(self) -> str:
160
131
  """Get the start_header_id_template."""
161
132
  return f"{self.start_user_header_id_template}{self.user_name}{self.end_user_header_id_template}"
133
+
134
+ def user_custom_header(self, user_name="user") -> str:
135
+ """Get the start_header_id_template."""
136
+ return f"{self.start_user_header_id_template}{user_name}{self.end_user_header_id_template}"
137
+
162
138
  @property
163
139
  def ai_full_header(self) -> str:
164
140
  """Get the start_header_id_template."""
165
141
  return f"{self.start_ai_header_id_template}{self.ai_name}{self.end_ai_header_id_template}"
166
142
 
167
- def system_custom_header(self, ai_name) -> str:
168
- """Get the start_header_id_template."""
169
- return f"{self.start_header_id_template}{ai_name}{self.end_header_id_template}"
170
-
171
143
  def ai_custom_header(self, ai_name) -> str:
172
144
  """Get the start_header_id_template."""
173
145
  return f"{self.start_ai_header_id_template}{ai_name}{self.end_ai_header_id_template}"
174
146
 
175
-
176
- def tokenize(self, prompt:str):
147
+ def sink(self, s=None,i=None,d=None):
148
+ pass
149
+ def tokenize(self, text: str) -> list:
177
150
  """
178
- Tokenizes the given prompt using the model's tokenizer.
151
+ Tokenize text using the active binding.
179
152
 
180
153
  Args:
181
- prompt (str): The input prompt to be tokenized.
154
+ text (str): The text to tokenize.
182
155
 
183
156
  Returns:
184
- list: A list of tokens representing the tokenized prompt.
157
+ list: List of tokens.
185
158
  """
186
- tokens_list = self.tokenizer.encode(prompt)
187
-
188
- return tokens_list
189
-
190
- def detokenize(self, tokens_list:list):
159
+ return self.binding.tokenize(text)
160
+
161
+ def detokenize(self, tokens: list) -> str:
191
162
  """
192
- Detokenizes the given list of tokens using the model's tokenizer.
163
+ Detokenize tokens using the active binding.
193
164
 
194
165
  Args:
195
- tokens_list (list): A list of tokens to be detokenized.
166
+ tokens (list): List of tokens to detokenize.
196
167
 
197
168
  Returns:
198
- str: The detokenized text as a string.
169
+ str: Detokenized text.
199
170
  """
200
- text = self.tokenizer.decode(tokens_list)
201
-
202
- return text
171
+ return self.binding.detokenize(tokens)
203
172
 
204
- def embed(self, text):
205
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
206
- return self.lollms_embed(text)
207
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
208
- return self.ollama_embed(text)
209
- else:
210
- return #not implemented
211
-
212
- def ollama_embed(self, text, **kwargs):
173
+ def get_model_details(self) -> dict:
213
174
  """
214
- Get embeddings for the input text using Ollama API
215
-
216
- Args:
217
- text (str or List[str]): Input text to embed
218
- **kwargs: Additional arguments like model, truncate, options, keep_alive
219
-
175
+ Get model information from the active binding.
176
+
220
177
  Returns:
221
- dict: Response containing embeddings
178
+ dict: Model information dictionary.
222
179
  """
223
- import requests
224
-
225
- url = f"{self.base_url}/api/embed"
226
-
227
- # Prepare the request payload
228
- payload = {
229
- "input": text,
230
- "model": kwargs.get("model", "llama2") # default model
231
- }
232
-
233
- # Add optional parameters if provided
234
- if "truncate" in kwargs:
235
- payload["truncate"] = kwargs["truncate"]
236
- if "options" in kwargs:
237
- payload["options"] = kwargs["options"]
238
- if "keep_alive" in kwargs:
239
- payload["keep_alive"] = kwargs["keep_alive"]
240
-
241
- try:
242
- response = requests.post(url, json=payload)
243
- response.raise_for_status() # Raise exception for bad status codes
244
- return response.json()
245
- except requests.exceptions.RequestException as e:
246
- raise Exception(f"Embedding request failed: {str(e)}")
247
-
248
-
249
- def lollms_embed(self, texts, **kwargs):
250
- api_key = kwargs.pop("api_key", None)
251
- headers = (
252
- {"Content-Type": "application/json", "Authorization": api_key}
253
- if api_key
254
- else {"Content-Type": "application/json"}
255
- )
256
- embeddings = []
257
- for text in texts:
258
- request_data = {"text": text}
259
- response = requests.post(f"{self.host_address}/lollms_embed", json=request_data, headers=headers)
260
- response.raise_for_status()
261
- result = response.json()
262
- embeddings.append(result["vector"])
263
- return np.array(embeddings)
180
+ return self.binding.get_model_info()
264
181
 
265
- def generate_with_images(self, prompt, images, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
266
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
267
- return self.lollms_generate_with_images(prompt, images, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
268
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
269
- return self.openai_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
270
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
271
- return self.ollama_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
272
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.LITELLM:
273
- return # To be implemented #self.litellm_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
274
-
275
-
276
- def generate(self, prompt, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None, completion_format = ELF_COMPLETION_FORMAT.Chat):
277
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
278
- return self.lollms_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
279
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
280
- return self.openai_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
281
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
282
- return self.ollama_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
283
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.LITELLM:
284
- return self.litellm_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
285
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.VLLM:
286
- return self.vllm_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
287
-
288
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.TRANSFORMERS:
289
- return self.transformers_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
290
-
291
-
292
- def generate_text(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
293
- return self.lollms_generate(prompt, host_address, model_name, personality, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
294
-
295
- def lollms_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
296
- # Set default values to instance variables if optional arguments are None
297
- host_address = host_address if host_address else self.host_address
298
- model_name = model_name if model_name else self.model_name
299
- n_predict = n_predict if n_predict else self.n_predict
300
- personality = personality if personality is not None else self.personality
301
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
302
- temperature = temperature if temperature is not None else self.temperature
303
- top_k = top_k if top_k is not None else self.top_k
304
- top_p = top_p if top_p is not None else self.top_p
305
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
306
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
307
- seed = seed or self.seed # Use the instance seed if not provided
308
- n_threads = n_threads if n_threads else self.n_threads
309
-
310
-
311
- url = f"{host_address}/lollms_generate"
312
- if service_key!="":
313
- headers = {
314
- 'Content-Type': 'application/json;',
315
- 'Authorization': f'Bearer {service_key}',
316
- }
317
- else:
318
- headers = {
319
- 'Content-Type': 'application/json',
320
- }
321
- data = {
322
- "prompt": prompt,
323
- "model_name": self.model_name,
324
- "personality": self.personality,
325
- "n_predict": n_predict,
326
- "stream": stream,
327
- "temperature": self.temperature,
328
- "top_k": self.top_k,
329
- "top_p": self.top_p,
330
- "repeat_penalty": repeat_penalty,
331
- "repeat_last_n": repeat_last_n,
332
- "seed": seed,
333
- "n_threads": n_threads
334
- }
335
-
336
- response = requests.post(url, json=data, headers=headers, stream=stream)
337
- if not stream:
338
- if response.status_code == 200:
339
- try:
340
- text = response.text.strip().rstrip('!')
341
- return text
342
- except Exception as ex:
343
- return {"status": False, "error": str(ex)}
344
- else:
345
- return {"status": False, "error": response.text}
346
- else:
347
- text = ""
348
- if response.status_code==200:
349
- try:
350
- for line in response.iter_lines():
351
- chunk = line.decode("utf-8")
352
- text += chunk
353
- if streaming_callback:
354
- streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
355
- return text.rstrip('!')
356
- except Exception as ex:
357
- return {"status": False, "error": str(ex)}
358
- else:
359
- return {"status": False, "error": response.text}
360
-
361
-
362
- def lollms_generate_with_images(
363
- self,
364
- prompt: str,
365
- images: List[str],
366
- host_address: Optional[str] = None,
367
- model_name: Optional[str] = None,
368
- personality: Optional[str] = None,
369
- n_predict: Optional[int] = None,
370
- stream: bool = False,
371
- temperature: float = 0.1,
372
- top_k: int = 50,
373
- top_p: float = 0.95,
374
- repeat_penalty: float = 0.8,
375
- repeat_last_n: int = 40,
376
- seed: Optional[int] = None,
377
- n_threads: int = 8,
378
- service_key: str = "",
379
- streaming_callback: Optional[Callable[[str, int], None]] = None
380
- ) -> Union[str, dict]:
182
+ def switch_model(self, model_name: str) -> bool:
381
183
  """
382
- Generates text based on a prompt and a list of images using a specified model.
184
+ Load a new model in the active binding.
383
185
 
384
186
  Args:
385
- prompt (str): The text prompt to generate responses for.
386
- images (List[str]): A list of file paths to images to be included in the generation.
387
- host_address (Optional[str]): The host address for the service. Defaults to instance variable.
388
- model_name (Optional[str]): The model name to use. Defaults to instance variable.
389
- personality (Optional[str]): The personality setting for the generation. Defaults to instance variable.
390
- n_predict (Optional[int]): The number of tokens to predict. Defaults to instance variable.
391
- stream (bool): Whether to stream the response. Defaults to False.
392
- temperature (float): Sampling temperature. Defaults to 0.1.
393
- top_k (int): Top-k sampling parameter. Defaults to 50.
394
- top_p (float): Top-p (nucleus) sampling parameter. Defaults to 0.95.
395
- repeat_penalty (float): Penalty for repeating tokens. Defaults to 0.8.
396
- repeat_last_n (int): Number of last tokens to consider for repeat penalty. Defaults to 40.
397
- seed (Optional[int]): Random seed for generation. Defaults to instance variable.
398
- n_threads (int): Number of threads to use. Defaults to 8.
399
- service_key (str): Optional service key for authorization.
400
- streaming_callback (Optional[Callable[[str, int], None]]): Callback for streaming responses.
187
+ model_name (str): Name of the model to load.
401
188
 
402
189
  Returns:
403
- Union[str, dict]: The generated text if not streaming, or a dictionary with status and error if applicable.
190
+ bool: True if model loaded successfully, False otherwise.
404
191
  """
405
-
406
- # Set default values to instance variables if optional arguments are None
407
- host_address = host_address if host_address else self.host_address
408
- model_name = model_name if model_name else self.model_name
409
- n_predict = n_predict if n_predict else self.n_predict
410
- personality = personality if personality is not None else self.personality
411
-
412
- # Set parameters to instance variables if they are not provided or None
413
- temperature = temperature if temperature is not None else self.temperature
414
- top_k = top_k if top_k is not None else self.top_k
415
- top_p = top_p if top_p is not None else self.top_p
416
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
417
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
418
- seed = seed or self.seed # Use the instance seed if not provided
419
- n_threads = n_threads if n_threads else self.n_threads
420
-
421
- def encode_image_to_base64(image_path: str) -> str:
422
- """Encodes an image file to a base64 string."""
423
- with open(image_path, "rb") as image_file:
424
- encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
425
- return encoded_string
426
-
427
- # Encode images in base64
428
- encoded_images = [encode_image_to_base64(image) for image in images]
429
-
430
- url = f"{host_address}/lollms_generate_with_images"
431
- headers = {
432
- 'Content-Type': 'application/json',
433
- 'Authorization': f'Bearer {service_key}' if service_key else '',
434
- }
435
-
436
- data = {
437
- "prompt": prompt,
438
- "model_name": model_name,
439
- "personality": personality,
440
- "n_predict": n_predict,
441
- "stream": stream,
442
- "temperature": temperature,
443
- "top_k": top_k,
444
- "top_p": top_p,
445
- "repeat_penalty": repeat_penalty,
446
- "repeat_last_n": repeat_last_n,
447
- "seed": seed,
448
- "n_threads": n_threads,
449
- "images": encoded_images # Add encoded images to the request payload
450
- }
451
-
452
- response = requests.post(url, json=data, headers=headers, stream=stream)
453
- if not stream:
454
- if response.status_code == 200:
455
- try:
456
- text = response.text.rstrip('!')
457
- return text
458
- except Exception as ex:
459
- return {"status": False, "error": str(ex)}
460
- else:
461
- return {"status": False, "error": response.text}
462
- else:
463
- text = ""
464
- if response.status_code == 200:
465
- try:
466
- for line in response.iter_lines():
467
- chunk = line.decode("utf-8")
468
- text += chunk
469
- if streaming_callback:
470
- streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
471
- if text[0] == '"':
472
- text = text[1:]
473
- if text[-1] == '"':
474
- text = text[:-1]
475
- return text
476
- except Exception as ex:
477
- return {"status": False, "error": str(ex)}
478
- else:
479
- return {"status": False, "error": response.text}
480
-
192
+ return self.binding.load_model(model_name)
481
193
 
482
- def transformers_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
483
- # Set default values to instance variables if optional arguments are None
484
- model_name = model_name if model_name else self.model_name
485
- n_predict = n_predict if n_predict else self.n_predict
486
- personality = personality if personality is not None else self.personality
487
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
488
- temperature = temperature if temperature is not None else self.temperature
489
- top_k = top_k if top_k is not None else self.top_k
490
- top_p = top_p if top_p is not None else self.top_p
491
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
492
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
493
- seed = seed or self.seed # Use the instance seed if not provided
494
- n_threads = n_threads if n_threads else self.n_threads
495
-
496
- self.generation_config.max_new_tokens = int(n_predict)
497
- self.generation_config.temperature = float(temperature)
498
- self.generation_config.top_k = int(top_k)
499
- self.generation_config.top_p = float(top_p)
500
- self.generation_config.repetition_penalty = float(repeat_penalty)
501
- self.generation_config.do_sample = True if float(temperature)>0 else False
502
- self.generation_config.pad_token_id = self.tokenizer.pad_token_id
503
- self.generation_config.eos_token_id = self.tokenizer.eos_token_id
504
- self.generation_config.output_attentions = False
505
-
506
- try:
507
- input_ids = self.tokenizer(prompt, add_special_tokens=False, return_tensors='pt').input_ids
508
- class StreamerClass:
509
- def __init__(self, tokenizer, callback):
510
- self.output = ""
511
- self.skip_prompt = True
512
- self.decode_kwargs = {}
513
- self.tokenizer = tokenizer
514
-
515
- # variables used in the streaming process
516
- self.token_cache = []
517
- self.print_len = 0
518
- self.next_tokens_are_prompt = True
519
- self.callback = callback
520
- def put(self, value):
521
- """
522
- Recives tokens, decodes them, and prints them to stdout as soon as they form entire words.
523
- """
524
- if len(value.shape)==1 and (value[0] == self.tokenizer.eos_token_id or value[0] == self.tokenizer.bos_token_id):
525
- print("eos detected")
526
- return
527
- if len(value.shape) > 1 and value.shape[0] > 1:
528
- raise ValueError("TextStreamer only supports batch size 1")
529
- elif len(value.shape) > 1:
530
- value = value[0]
531
-
532
- if self.skip_prompt and self.next_tokens_are_prompt:
533
- self.next_tokens_are_prompt = False
534
- return
535
-
536
- # Add the new token to the cache and decodes the entire thing.
537
- self.token_cache.extend(value.tolist())
538
- text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
539
-
540
- # After the symbol for a new line, we flush the cache.
541
- if text.endswith("\n"):
542
- printable_text = text[self.print_len :]
543
- self.token_cache = []
544
- self.print_len = 0
545
- # If the last token is a CJK character, we print the characters.
546
- elif len(text) > 0 and self._is_chinese_char(ord(text[-1])):
547
- printable_text = text[self.print_len :]
548
- self.print_len += len(printable_text)
549
- # Otherwise, prints until the last space char (simple heuristic to avoid printing incomplete words,
550
- # which may change with the subsequent token -- there are probably smarter ways to do this!)
551
- else:
552
- printable_text = text[self.print_len : text.rfind(" ") + 1]
553
- self.print_len += len(printable_text)
554
-
555
- self.output += printable_text
556
- if self.callback:
557
- if not self.callback(printable_text, 0):
558
- raise Exception("canceled")
559
-
560
- def _is_chinese_char(self, cp):
561
- """Checks whether CP is the codepoint of a CJK character."""
562
- # This defines a "chinese character" as anything in the CJK Unicode block:
563
- # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
564
- #
565
- # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
566
- # despite its name. The modern Korean Hangul alphabet is a different block,
567
- # as is Japanese Hiragana and Katakana. Those alphabets are used to write
568
- # space-separated words, so they are not treated specially and handled
569
- # like the all of the other languages.
570
- if (
571
- (cp >= 0x4E00 and cp <= 0x9FFF)
572
- or (cp >= 0x3400 and cp <= 0x4DBF) #
573
- or (cp >= 0x20000 and cp <= 0x2A6DF) #
574
- or (cp >= 0x2A700 and cp <= 0x2B73F) #
575
- or (cp >= 0x2B740 and cp <= 0x2B81F) #
576
- or (cp >= 0x2B820 and cp <= 0x2CEAF) #
577
- or (cp >= 0xF900 and cp <= 0xFAFF)
578
- or (cp >= 0x2F800 and cp <= 0x2FA1F) #
579
- ): #
580
- return True
581
-
582
- return False
583
- def end(self):
584
- """Flushes any remaining cache and prints a newline to stdout."""
585
- # Flush the cache, if it exists
586
- if len(self.token_cache) > 0:
587
- text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
588
- printable_text = text[self.print_len :]
589
- self.token_cache = []
590
- self.print_len = 0
591
- else:
592
- printable_text = ""
593
-
594
- self.next_tokens_are_prompt = True
595
- if self.callback:
596
- if self.callback(printable_text, 0):
597
- raise Exception("canceled")
598
- streamer = StreamerClass(self.tokenizer, streaming_callback)
599
- self.generate(
600
- inputs=input_ids,
601
- generation_config=self.generation_config,
602
- streamer = streamer,
603
- )
604
- return streamer.output.rstrip('!')
605
- except Exception as ex:
606
- return {"status": False, "error": str(ex)}
607
-
608
- def openai_generate(self,
609
- prompt,
610
- host_address=None,
611
- model_name=None,
612
- personality=None,
613
- n_predict=None,
614
- stream=False,
615
- temperature=0.1,
616
- top_k=50,
617
- top_p=0.95,
618
- repeat_penalty=0.8,
619
- repeat_last_n=40,
620
- seed=None,
621
- n_threads=8,
622
- completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
623
- service_key: str = "",
624
- streaming_callback=None):
194
+ def get_available_bindings(self) -> List[str]:
625
195
  """
626
- Generates text using the OpenAI API based on the provided prompt and parameters.
627
-
628
- Parameters:
629
- prompt (str): The input text prompt to generate completions for.
630
- host_address (str, optional): The API host address. Defaults to instance variable.
631
- model_name (str, optional): The model to use for generation. Defaults to instance variable.
632
- personality (str, optional): The personality setting for the model. Defaults to instance variable.
633
- n_predict (int, optional): The number of tokens to predict. Defaults to instance variable.
634
- stream (bool, optional): Whether to stream the response. Defaults to False.
635
- temperature (float, optional): Sampling temperature. Higher values mean more randomness. Defaults to 0.1.
636
- top_k (int, optional): The number of highest probability vocabulary tokens to keep for top-k filtering. Defaults to 50.
637
- top_p (float, optional): The cumulative probability of parameter options to keep for nucleus sampling. Defaults to 0.95.
638
- repeat_penalty (float, optional): The penalty for repeating tokens. Defaults to 0.8.
639
- repeat_last_n (int, optional): The number of last tokens to consider for repeat penalty. Defaults to 40.
640
- seed (int, optional): Random seed for reproducibility. Defaults to instance variable.
641
- n_threads (int, optional): The number of threads to use for generation. Defaults to 8.
642
- completion_format (ELF_COMPLETION_FORMAT, optional): The format of the completion request (Instruct or Chat). Defaults to ELF_COMPLETION_FORMAT.Instruct.
643
- service_key (str, optional): The API service key for authorization. Defaults to an empty string.
644
- streaming_callback (callable, optional): A callback function to handle streaming responses.
196
+ Get list of available bindings.
645
197
 
646
198
  Returns:
647
- str: The generated text response from the OpenAI API.
199
+ List[str]: List of binding names that can be used.
648
200
  """
649
- # Set default values to instance variables if optional arguments are None
650
- host_address = host_address if host_address else self.host_address
651
- model_name = model_name if model_name else self.model_name
652
- n_predict = n_predict if n_predict else self.n_predict
653
- personality = personality if personality is not None else self.personality
654
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
655
- temperature = temperature if temperature is not None else self.temperature
656
- top_k = top_k if top_k is not None else self.top_k
657
- top_p = top_p if top_p is not None else self.top_p
658
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
659
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
660
- seed = seed or self.seed # Use the instance seed if not provided
661
- n_threads = n_threads if n_threads else self.n_threads
662
- service_key = service_key if service_key else self.service_key
663
- self.client.api_key = service_key
664
- count = 0
665
- output= ""
666
-
667
-
668
- if "vision" in self.model_name:
669
- messages = [
670
- {
671
- "role": "user",
672
- "content": [
673
- {
674
- "type":"text",
675
- "text":prompt
676
- }
677
- ]
678
- }
679
- ]
680
- else:
681
- messages = [{"role": "user", "content": prompt}]
682
-
683
-
684
- if completion_format == ELF_COMPLETION_FORMAT.Chat:
685
- if "o1" in self.model_name:
686
- chat_completion = self.client.chat.completions.create(
687
- model=self.model_name, # Choose the engine according to your OpenAI plan
688
- messages=messages,
689
- n=1, # Specify the number of responses you want
690
- )
691
- output = chat_completion.choices[0].message.content
692
- else:
693
- chat_completion = self.client.chat.completions.create(
694
- model=self.model_name, # Choose the engine according to your OpenAI plan
695
- messages=messages,
696
- max_tokens=n_predict-7 if n_predict>512 else n_predict, # Adjust the desired length of the generated response
697
- n=1, # Specify the number of responses you want
698
- temperature=float(self.temperature), # Adjust the temperature for more or less randomness in the output
699
- stream=True)
700
-
701
- for resp in chat_completion:
702
- if count >= n_predict:
703
- break
704
- try:
705
- word = resp.choices[0].delta.content
706
- except Exception as ex:
707
- word = ""
708
- if streaming_callback is not None:
709
- if not streaming_callback(word):
710
- break
711
- if word:
712
- output += word
713
- count += 1
714
- else:
715
- completion = self.client.completions.create(
716
- model=self.model_name, # Choose the engine according to your OpenAI plan
717
- prompt=prompt,
718
- max_tokens=n_predict-7 if n_predict>512 else n_predict, # Adjust the desired length of the generated response
719
- n=1, # Specify the number of responses you want
720
- temperature=float(self.temperature), # Adjust the temperature for more or less randomness in the output
721
- stream=True)
722
-
723
- for resp in completion:
724
- if count >= n_predict:
725
- break
726
- try:
727
- word = resp.choices[0].text
728
- except Exception as ex:
729
- word = ""
730
- if streaming_callback is not None:
731
- if not streaming_callback(word):
732
- break
733
- if word:
734
- output += word
735
- count += 1
736
-
737
- return output
738
-
739
-
740
- def vllm_generate(self,
741
- prompt,
742
- host_address=None,
743
- model_name=None,
744
- personality=None,
745
- n_predict=None,
746
- stream=False,
747
- temperature=0.1,
748
- top_k=50,
749
- top_p=0.95,
750
- repeat_penalty=0.8,
751
- repeat_last_n=40,
752
- seed=None,
753
- n_threads=8,
754
- completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Instruct,
755
- service_key: str = "",
756
- streaming_callback=None):
201
+ return self.binding_manager.get_available_bindings()
202
+
203
+ def generate_text(self,
204
+ prompt: str,
205
+ images: Optional[List[str]] = None,
206
+ n_predict: Optional[int] = None,
207
+ stream: Optional[bool] = None,
208
+ temperature: Optional[float] = None,
209
+ top_k: Optional[int] = None,
210
+ top_p: Optional[float] = None,
211
+ repeat_penalty: Optional[float] = None,
212
+ repeat_last_n: Optional[int] = None,
213
+ seed: Optional[int] = None,
214
+ n_threads: Optional[int] = None,
215
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> str:
757
216
  """
758
- Generates text using the OpenAI API based on the provided prompt and parameters.
217
+ Generate text using the active binding, using instance defaults if parameters are not provided.
759
218
 
760
- Parameters:
761
- prompt (str): The input text prompt to generate completions for.
762
- host_address (str, optional): The API host address. Defaults to instance variable.
763
- model_name (str, optional): The model to use for generation. Defaults to instance variable.
764
- personality (str, optional): The personality setting for the model. Defaults to instance variable.
765
- n_predict (int, optional): The number of tokens to predict. Defaults to instance variable.
766
- stream (bool, optional): Whether to stream the response. Defaults to False.
767
- temperature (float, optional): Sampling temperature. Higher values mean more randomness. Defaults to 0.1.
768
- top_k (int, optional): The number of highest probability vocabulary tokens to keep for top-k filtering. Defaults to 50.
769
- top_p (float, optional): The cumulative probability of parameter options to keep for nucleus sampling. Defaults to 0.95.
770
- repeat_penalty (float, optional): The penalty for repeating tokens. Defaults to 0.8.
771
- repeat_last_n (int, optional): The number of last tokens to consider for repeat penalty. Defaults to 40.
772
- seed (int, optional): Random seed for reproducibility. Defaults to instance variable.
773
- n_threads (int, optional): The number of threads to use for generation. Defaults to 8.
774
- completion_format (ELF_COMPLETION_FORMAT, optional): The format of the completion request (Instruct or Chat). Defaults to ELF_COMPLETION_FORMAT.Instruct.
775
- service_key (str, optional): The API service key for authorization. Defaults to an empty string.
776
- streaming_callback (callable, optional): A callback function to handle streaming responses.
219
+ Args:
220
+ prompt (str): The input prompt for text generation.
221
+ images (Optional[List[str]]): List of image file paths for multimodal generation.
222
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
223
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
224
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
225
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
226
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
227
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
228
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
229
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
230
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
231
+ streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
232
+ Uses instance default if None.
233
+ - First parameter (str): The chunk of text received from the stream.
234
+ - Second parameter (MSG_TYPE): The message type enum (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
777
235
 
778
236
  Returns:
779
- str: The generated text response from the OpenAI API.
237
+ Union[str, dict]: Generated text or error dictionary if failed.
780
238
  """
781
- # Set default values to instance variables if optional arguments are None
782
- host_address = host_address if host_address else self.host_address
783
- model_name = model_name if model_name else self.model_name
784
- n_predict = n_predict if n_predict else self.n_predict
785
- personality = personality if personality is not None else self.personality
786
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
787
- temperature = temperature if temperature is not None else self.temperature
788
- top_k = top_k if top_k is not None else self.top_k
789
- top_p = top_p if top_p is not None else self.top_p
790
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
791
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
792
- seed = seed or self.seed # Use the instance seed if not provided
793
- n_threads = n_threads if n_threads else self.n_threads
794
-
795
- if service_key != "":
796
- headers = {
797
- 'Content-Type': 'application/json',
798
- 'Authorization': f'Bearer {service_key}',
799
- }
800
- else:
801
- headers = {
802
- 'Content-Type': 'application/json',
803
- }
804
-
805
- if completion_format == ELF_COMPLETION_FORMAT.Instruct:
806
- data = {
807
- 'model': model_name,
808
- 'prompt': prompt,
809
- "stream": True,
810
- "temperature": float(temperature),
811
- "max_tokens": n_predict
812
- }
813
- completion_format_path = "/v1/completions"
814
- elif completion_format == ELF_COMPLETION_FORMAT.Chat:
815
- data = {
816
- 'model': model_name,
817
- 'messages': [{
818
- 'role': "user",
819
- 'content': prompt
820
- }],
821
- "stream": True,
822
- "temperature": float(temperature),
823
- "max_tokens": n_predict
824
- }
825
- completion_format_path = "/v1/chat/completions"
826
-
827
- if host_address.endswith("/"):
828
- host_address = host_address[:-1]
829
-
830
- url = f'{host_address}{completion_format_path}'
831
-
832
- response = requests.post(url, headers=headers, data=json.dumps(data), stream=True, verify=self.verify_ssl_certificate)
833
-
834
- if response.status_code == 400:
835
- try:
836
- content = response.content.decode("utf8")
837
- content = json.loads(content)
838
- self.error(content["error"]["message"])
839
- return
840
- except:
841
- content = response.content.decode("utf8")
842
- content = json.loads(content)
843
- self.error(content["message"])
844
- return
845
- elif response.status_code == 404:
846
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
847
-
848
- text = ""
849
- for line in response.iter_lines():
850
- decoded = line.decode("utf-8")
851
- if decoded.startswith("data: "):
852
- try:
853
- json_data = json.loads(decoded[5:].strip())
854
- if completion_format == ELF_COMPLETION_FORMAT.Chat:
855
- try:
856
- chunk = json_data["choices"][0]["delta"]["content"]
857
- except:
858
- chunk = ""
859
- else:
860
- chunk = json_data["choices"][0]["text"]
861
- # Process the JSON data here
862
- text += chunk
863
- if streaming_callback:
864
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
865
- break
866
- except:
867
- break
868
- else:
869
- if decoded.startswith("{"):
870
- for line_ in response.iter_lines():
871
- decoded += line_.decode("utf-8")
872
- try:
873
- json_data = json.loads(decoded)
874
- if json_data["object"] == "error":
875
- self.error(json_data["message"])
876
- break
877
- except:
878
- self.error("Couldn't generate text, verify your key or model name")
879
- else:
880
- text += decoded
881
- if streaming_callback:
882
- if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
883
- break
884
- return text
885
-
886
- def openai_generate_with_images(self,
887
- prompt,
888
- images,
889
- host_address=None,
890
- model_name=None,
891
- personality=None,
892
- n_predict=None,
893
- stream=False,
894
- temperature=0.1,
895
- top_k=50,
896
- top_p=0.95,
897
- repeat_penalty=0.8,
898
- repeat_last_n=40,
899
- seed=None,
900
- n_threads=8,
901
- max_image_width=-1,
902
- service_key: str = "",
903
- streaming_callback=None,):
904
- """Generates text out of a prompt
239
+ return self.binding.generate_text(
240
+ prompt=prompt,
241
+ images=images,
242
+ n_predict=n_predict if n_predict is not None else self.default_n_predict,
243
+ stream=stream if stream is not None else self.default_stream,
244
+ temperature=temperature if temperature is not None else self.default_temperature,
245
+ top_k=top_k if top_k is not None else self.default_top_k,
246
+ top_p=top_p if top_p is not None else self.default_top_p,
247
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
248
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
249
+ seed=seed if seed is not None else self.default_seed,
250
+ n_threads=n_threads if n_threads is not None else self.default_n_threads,
251
+ streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
252
+ )
905
253
 
906
- Args:
907
- prompt (str): The prompt to use for generation
908
- n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
909
- callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
910
- verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
911
- """
912
- # Set default values to instance variables if optional arguments are None
913
- host_address = host_address if host_address else self.host_address
914
- model_name = model_name if model_name else self.model_name
915
- n_predict = n_predict if n_predict else self.n_predict
916
- personality = personality if personality is not None else self.personality
917
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
918
- temperature = temperature if temperature is not None else self.temperature
919
- top_k = top_k if top_k is not None else self.top_k
920
- top_p = top_p if top_p is not None else self.top_p
921
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
922
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
923
- seed = seed or self.seed # Use the instance seed if not provided
924
- n_threads = n_threads if n_threads else self.n_threads
925
-
926
- count = 0
927
- output = ""
928
-
929
- messages = [
930
- {
931
- "role": "user",
932
- "content": [
933
- {
934
- "type":"text",
935
- "text":prompt
936
- }
937
- ]+[
938
- {
939
- "type": "image_url",
940
- "image_url": {
941
- "url": f"data:image/jpeg;base64,{encode_image(image_path, max_image_width)}"
942
- }
943
- }
944
- for image_path in images
945
- ]
946
- }
947
- ]
948
- chat_completion = self.client.chat.completions.create(
949
- model=self.model_name, # Choose the engine according to your OpenAI plan
950
- messages=messages,
951
- max_tokens=n_predict, # Adjust the desired length of the generated response
952
- n=1, # Specify the number of responses you want
953
- temperature=temperature, # Adjust the temperature for more or less randomness in the output
954
- stream=True
955
- )
956
-
957
- for resp in chat_completion:
958
- if count >= n_predict:
959
- break
960
- try:
961
- word = resp.choices[0].delta.content
962
- except Exception as ex:
963
- word = ""
964
- if streaming_callback is not None:
965
- if not streaming_callback(word):
966
- break
967
- if word:
968
- output += word
969
- count += 1
970
- return output
971
-
972
254
 
973
- def ollama_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, completion_format:ELF_COMPLETION_FORMAT=ELF_COMPLETION_FORMAT.Instruct, service_key:str="", streaming_callback=None):
974
- # Set default values to instance variables if optional arguments are None
975
- host_address = host_address if host_address else self.host_address
976
- model_name = model_name if model_name else self.model_name
977
- n_predict = n_predict if n_predict else self.n_predict
978
- personality = personality if personality is not None else self.personality
979
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
980
- temperature = temperature if temperature is not None else self.temperature
981
- top_k = top_k if top_k is not None else self.top_k
982
- top_p = top_p if top_p is not None else self.top_p
983
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
984
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
985
- seed = seed or self.seed # Use the instance seed if not provided
986
- n_threads = n_threads if n_threads else self.n_threads
987
-
988
- if service_key!="":
989
- headers = {
990
- 'Content-Type': 'application/json',
991
- 'Authorization': f'Bearer {service_key}',
992
- }
993
- else:
994
- headers = {
995
- 'Content-Type': 'application/json',
996
- }
997
-
998
- data = {
999
- 'model':model_name,
1000
- 'prompt': prompt,
1001
- "stream":stream,
1002
- "temperature": float(temperature),
1003
- "max_tokens": n_predict
1004
- }
1005
- completion_format_path = "/api/generate"
1006
- if host_address.endswith("/"):
1007
- host_address = host_address[:-1]
1008
- url = f'{host_address}{completion_format_path}'
1009
-
1010
- response = requests.post(url, json=data, headers=headers)
1011
-
1012
- if response.status_code==404:
1013
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
1014
- text = ""
1015
- if stream:
1016
- for line in response.iter_lines():
1017
- decoded = line.decode("utf-8")
1018
- json_data = json.loads(decoded)
1019
- chunk = json_data["response"]
1020
- ## Process the JSON data here
1021
- text +=chunk
1022
- if streaming_callback:
1023
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
1024
- break
1025
- return text
1026
- else:
1027
- return response.json()["response"]
1028
-
1029
- def ollama_generate_with_images(self,
1030
- prompt,
1031
- images,
1032
- host_address=None,
1033
- model_name=None,
1034
- personality=None,
1035
- n_predict=None,
1036
- stream=False,
1037
- temperature=0.1,
1038
- top_k=50,
1039
- top_p=0.95,
1040
- repeat_penalty=0.8,
1041
- repeat_last_n=40,
1042
- seed=None,
1043
- n_threads=8,
1044
- max_image_width=-1,
1045
- service_key: str = "",
1046
- streaming_callback=None,):
1047
- """Generates text out of a prompt
1048
-
1049
- Args:
1050
- prompt (str): The prompt to use for generation
1051
- n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
1052
- callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
1053
- verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
1054
- """
1055
- # Set default values to instance variables if optional arguments are None
1056
- host_address = host_address if host_address else self.host_address
1057
- model_name = model_name if model_name else self.model_name
1058
- n_predict = n_predict if n_predict else self.n_predict
1059
- personality = personality if personality is not None else self.personality
1060
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
1061
- temperature = temperature if temperature is not None else self.temperature
1062
- top_k = top_k if top_k is not None else self.top_k
1063
- top_p = top_p if top_p is not None else self.top_p
1064
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
1065
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
1066
- seed = seed or self.seed # Use the instance seed if not provided
1067
- n_threads = n_threads if n_threads else self.n_threads
1068
- if service_key != "":
1069
- headers = {
1070
- 'Content-Type': 'application/json',
1071
- 'Authorization': f'Bearer {service_key}',
1072
- }
1073
- else:
1074
- headers = {
1075
- 'Content-Type': 'application/json',
1076
- }
1077
-
1078
- images_list = []
1079
- for image in images:
1080
- images_list.append(f"{encode_image(image, max_image_width)}")
1081
-
1082
- data = {
1083
- 'model': model_name,
1084
- 'prompt': prompt,
1085
- 'images': images_list,
1086
- "raw": True,
1087
- "stream":True,
1088
- "temperature": float(temperature),
1089
- "max_tokens": n_predict
1090
- }
1091
-
1092
-
1093
- data = {
1094
- 'model': model_name,
1095
- 'messages': [
1096
- {
1097
- "role": "user",
1098
- "content": [
1099
- {
1100
- "type":"text",
1101
- "text":prompt
1102
- }
1103
- ]+[
1104
- {
1105
- "type": "image_url",
1106
- "image_url": {
1107
- "url": f"data:image/jpeg;base64,{encode_image(image_path, max_image_width)}"
1108
- }
1109
- }
1110
- for image_path in images
1111
- ]
1112
- }
1113
- ],
1114
- "stream": True,
1115
- "temperature": float(temperature),
1116
- "max_tokens": n_predict
1117
- }
1118
-
1119
- completion_format_path = "/api"
1120
-
1121
- if host_address.endswith("/"):
1122
- host_address = host_address[:-1]
1123
- url = f'{host_address}{completion_format_path}'
1124
-
1125
- response = requests.post(url, json=data, headers=headers)
1126
-
1127
- if response.status_code == 400:
1128
- try:
1129
- content = response.content.decode("utf8")
1130
- content = json.loads(content)
1131
- self.error(content["error"]["message"])
1132
- return
1133
- except:
1134
- content = response.content.decode("utf8")
1135
- content = json.loads(content)
1136
- self.error(content["message"])
1137
- return
1138
- elif response.status_code == 404:
1139
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
1140
-
1141
- text = ""
1142
- for line in response.iter_lines():
1143
- decoded = line.decode("utf-8")
1144
- if decoded.startswith("data: "):
1145
- try:
1146
- json_data = json.loads(decoded[5:].strip())
1147
- try:
1148
- chunk = json_data["choices"][0]["delta"]["content"]
1149
- except:
1150
- chunk = ""
1151
- # Process the JSON data here
1152
- text += chunk
1153
- if streaming_callback:
1154
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
1155
- break
1156
- except:
1157
- break
1158
- else:
1159
- if decoded.startswith("{"):
1160
- for line_ in response.iter_lines():
1161
- decoded += line_.decode("utf-8")
1162
- try:
1163
- json_data = json.loads(decoded)
1164
- if json_data["object"] == "error":
1165
- self.error(json_data["message"])
1166
- break
1167
- except:
1168
- self.error("Couldn't generate text, verify your key or model name")
1169
- else:
1170
- text += decoded
1171
- if streaming_callback:
1172
- if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
1173
- break
1174
- return text
1175
-
1176
- def litellm_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, completion_format:ELF_COMPLETION_FORMAT=ELF_COMPLETION_FORMAT.Instruct, service_key:str="", streaming_callback=None):
1177
- # Set default values to instance variables if optional arguments are None
1178
- host_address = host_address if host_address else self.host_address
1179
- model_name = model_name if model_name else self.model_name
1180
- n_predict = n_predict if n_predict else self.n_predict
1181
- personality = personality if personality is not None else self.personality
1182
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
1183
- temperature = temperature if temperature is not None else self.temperature
1184
- top_k = top_k if top_k is not None else self.top_k
1185
- top_p = top_p if top_p is not None else self.top_p
1186
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
1187
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
1188
- seed = seed or self.seed # Use the instance seed if not provided
1189
- n_threads = n_threads if n_threads else self.n_threads
1190
-
1191
- if service_key!="":
1192
- headers = {
1193
- 'Content-Type': 'application/json',
1194
- 'Authorization': f'Bearer {service_key}',
1195
- }
1196
- else:
1197
- headers = {
1198
- 'Content-Type': 'application/json',
1199
- }
1200
-
1201
- data = {
1202
- 'model':model_name,
1203
- 'prompt': prompt,
1204
- "stream":True,
1205
- "temperature": float(temperature),
1206
- "max_tokens": n_predict
1207
- }
1208
- completion_format_path = "/api/generate"
1209
- if host_address.endswith("/"):
1210
- host_address = host_address[:-1]
1211
- url = f'{host_address}{completion_format_path}'
1212
-
1213
- response = requests.post(url, json=data, headers=headers)
1214
-
1215
- if response.status_code==404:
1216
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
1217
- text = ""
1218
- for line in response.iter_lines():
1219
- decoded = line.decode("utf-8")
1220
- if decoded.startswith("{"):
1221
- json_data = json.loads(decoded)
1222
- if "error" in json_data:
1223
- self.error(json_data["error"]["message"])
1224
- break
1225
- else:
1226
- text +=decoded
1227
- if streaming_callback:
1228
- if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
1229
- break
1230
-
1231
- return text
1232
-
1233
-
1234
- def lollms_listMountedPersonalities(self, host_address:str=None):
1235
- host_address = host_address if host_address else self.host_address
1236
- url = f"{host_address}/list_mounted_personalities"
1237
-
1238
- response = requests.get(url)
255
+ def embed(self, text):
256
+ self.binding.embed(text)
1239
257
 
1240
- if response.status_code == 200:
1241
- try:
1242
- text = json.loads(response.content.decode("utf-8"))
1243
- return text
1244
- except Exception as ex:
1245
- return {"status": False, "error": str(ex)}
1246
- else:
1247
- return {"status": False, "error": response.text}
1248
258
 
1249
- def listModels(self, host_address:str=None):
1250
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
1251
- return self.lollms_listModels(host_address)
1252
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
1253
- return self.ollama_listModels(host_address)
1254
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
1255
- return self.openai_listModels(host_address)
259
+ def listModels(self):
260
+ self.binding.listModels()
1256
261
 
1257
- def lollms_listModels(self, host_address:str=None):
1258
- host_address = host_address if host_address else self.host_address
1259
- url = f"{host_address}/list_models"
1260
-
1261
- response = requests.get(url)
1262
-
1263
- if response.status_code == 200:
1264
- try:
1265
- text = json.loads(response.content.decode("utf-8"))
1266
- return text
1267
- except Exception as ex:
1268
- return {"status": False, "error": str(ex)}
1269
- else:
1270
- return {"status": False, "error": response.text}
1271
-
1272
- def ollama_listModels(self, host_address:str=None):
1273
- if host_address is None:
1274
- host_address = self.host_address
1275
- url = f'{host_address}/api/tags'
1276
- headers = {
1277
- 'accept': 'application/json',
1278
- 'Authorization': f'Bearer {self.service_key}'
1279
- }
1280
- response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
1281
- try:
1282
- data = response.json()
1283
- model_info = []
1284
-
1285
- for model in data['models']:
1286
- model_name = model['name']
1287
- owned_by = ""
1288
- created_datetime = model["modified_at"]
1289
- model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
1290
-
1291
- return model_info
1292
- except Exception as ex:
1293
- trace_exception(ex)
1294
- return []
1295
-
1296
- def openai_listModels(self, host_address:str=None):
1297
- if host_address is None:
1298
- host_address = self.host_address
1299
- url = f'{host_address}/v1/models'
1300
- headers = {
1301
- 'accept': 'application/json',
1302
- 'Authorization': f'Bearer {self.service_key}'
1303
- }
1304
- response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
1305
- try:
1306
- data = response.json()
1307
- model_info = []
1308
-
1309
- for model in data["data"]:
1310
- model_name = model['id']
1311
- owned_by = model['owned_by']
1312
- created_datetime = model["created"]
1313
- model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
1314
-
1315
- return model_info
1316
- except Exception as ex:
1317
- trace_exception(ex)
1318
- return []
1319
262
 
1320
263
 
1321
264
  def generate_codes(
@@ -1363,11 +306,11 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
1363
306
  {self.ai_full_header}"""
1364
307
 
1365
308
  if len(self.image_files)>0:
1366
- response = self.generate_with_images(full_prompt, self.image_files, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
309
+ response = self.generate_text_with_images(full_prompt, self.image_files, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
1367
310
  elif len(images)>0:
1368
- response = self.generate_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
311
+ response = self.generate_text_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
1369
312
  else:
1370
- response = self.generate(full_prompt, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
313
+ response = self.generate_text(full_prompt, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
1371
314
  response_full += response
1372
315
  codes = self.extract_code_blocks(response)
1373
316
  return codes
@@ -1415,16 +358,13 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
1415
358
  full_prompt += f"""You must return a single code tag.
1416
359
  Do not split the code in multiple tags.
1417
360
  {self.ai_full_header}"""
1418
- if len(images)>0:
1419
- response = self.generate_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
1420
- else:
1421
- response = self.generate(full_prompt, max_size, False, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
361
+ response = self.generate_text(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
1422
362
  codes = self.extract_code_blocks(response)
1423
363
  if len(codes)>0:
1424
364
  if not codes[-1]["is_complete"]:
1425
365
  code = "\n".join(codes[-1]["content"].split("\n")[:-1])
1426
366
  while not codes[-1]["is_complete"]:
1427
- response = self.generate(prompt+code+self.user_full_header+"continue the code. Start from last line and continue the code. Put the code inside a markdown code tag."+self.separator_template+self.ai_full_header, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
367
+ response = self.generate_text(prompt+code+self.user_full_header+"continue the code. Start from last line and continue the code. Put the code inside a markdown code tag."+self.separator_template+self.ai_full_header, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
1428
368
  codes = self.extract_code_blocks(response)
1429
369
  if len(codes)==0:
1430
370
  break
@@ -1582,6 +522,182 @@ Do not split the code in multiple tags.
1582
522
 
1583
523
  return cleaned_text
1584
524
 
525
+ def yes_no(
526
+ self,
527
+ question: str,
528
+ context: str = "",
529
+ max_answer_length: int = None,
530
+ conditionning: str = "",
531
+ return_explanation: bool = False,
532
+ callback = None
533
+ ) -> bool | dict:
534
+ """
535
+ Answers a yes/no question.
536
+
537
+ Args:
538
+ question (str): The yes/no question to answer.
539
+ context (str, optional): Additional context to provide for the question.
540
+ max_answer_length (int, optional): Maximum string length allowed for the response. Defaults to None.
541
+ conditionning (str, optional): An optional system message to put at the beginning of the prompt.
542
+ return_explanation (bool, optional): If True, returns a dictionary with the answer and explanation. Defaults to False.
543
+
544
+ Returns:
545
+ bool or dict:
546
+ - If return_explanation is False, returns a boolean (True for 'yes', False for 'no').
547
+ - If return_explanation is True, returns a dictionary with the answer and explanation.
548
+ """
549
+ if not callback:
550
+ callback=self.sink
551
+
552
+ prompt = f"{conditionning}\nQuestion: {question}\nContext: {context}\n"
553
+
554
+ template = """
555
+ {
556
+ "answer": true | false,
557
+ "explanation": "Optional explanation if return_explanation is True"
558
+ }
559
+ """
560
+
561
+ response = self.generate_text_code(
562
+ prompt=prompt,
563
+ template=template,
564
+ language="json",
565
+ code_tag_format="markdown",
566
+ max_size=max_answer_length,
567
+ callback=callback
568
+ )
569
+
570
+ try:
571
+ parsed_response = json.loads(response)
572
+ answer = parsed_response.get("answer", False)
573
+ explanation = parsed_response.get("explanation", "")
574
+
575
+ if return_explanation:
576
+ return {"answer": answer, "explanation": explanation}
577
+ else:
578
+ return answer
579
+ except json.JSONDecodeError:
580
+ return False
581
+
582
+ def multichoice_question(
583
+ self,
584
+ question: str,
585
+ possible_answers: list,
586
+ context: str = "",
587
+ max_answer_length: int = None,
588
+ conditionning: str = "",
589
+ return_explanation: bool = False,
590
+ callback = None
591
+ ) -> dict:
592
+ """
593
+ Interprets a multi-choice question from a user's response. This function expects only one choice as true.
594
+ All other choices are considered false. If none are correct, returns -1.
595
+
596
+ Args:
597
+ question (str): The multi-choice question posed by the user.
598
+ possible_answers (List[Any]): A list containing all valid options for the chosen value.
599
+ context (str, optional): Additional context to provide for the question.
600
+ max_answer_length (int, optional): Maximum string length allowed while interpreting the user's responses. Defaults to None.
601
+ conditionning (str, optional): An optional system message to put at the beginning of the prompt.
602
+ return_explanation (bool, optional): If True, returns a dictionary with the choice and explanation. Defaults to False.
603
+
604
+ Returns:
605
+ dict:
606
+ - If return_explanation is False, returns a JSON object with only the selected choice index.
607
+ - If return_explanation is True, returns a JSON object with the selected choice index and an explanation.
608
+ - Returns {"index": -1} if no match is found among the possible answers.
609
+ """
610
+ if not callback:
611
+ callback=self.sink
612
+
613
+ prompt = f"""
614
+ {conditionning}\n
615
+ QUESTION:\n{question}\n
616
+ POSSIBLE ANSWERS:\n"""
617
+ for i, answer in enumerate(possible_answers):
618
+ prompt += f"{i}. {answer}\n"
619
+
620
+ if context:
621
+ prompt += f"\nADDITIONAL CONTEXT:\n{context}\n"
622
+
623
+ prompt += "\nRespond with a JSON object containing:\n"
624
+ if return_explanation:
625
+ prompt += "{\"index\": (the selected answer index), \"explanation\": (reasoning for selection)}"
626
+ else:
627
+ prompt += "{\"index\": (the selected answer index)}"
628
+
629
+ response = self.generate_text_code(prompt, language="json", max_size=max_answer_length,
630
+ accept_all_if_no_code_tags_is_present=True, return_full_generated_code=False, callback=callback)
631
+
632
+ try:
633
+ result = json.loads(response)
634
+ if return_explanation:
635
+ if "index" in result and isinstance(result["index"], int):
636
+ return result["index"], result["index"]
637
+ else:
638
+ if "index" in result and isinstance(result["index"], int):
639
+ return result["index"]
640
+ except json.JSONDecodeError:
641
+ if return_explanation:
642
+ return -1, "failed to decide"
643
+ else:
644
+ return -1
645
+
646
+ def multichoice_ranking(
647
+ self,
648
+ question: str,
649
+ possible_answers: list,
650
+ context: str = "",
651
+ max_answer_length: int = 512,
652
+ conditionning: str = "",
653
+ return_explanation: bool = False,
654
+ callback = None
655
+ ) -> dict:
656
+ """
657
+ Ranks answers for a question from best to worst. Returns a JSON object containing the ranked order.
658
+
659
+ Args:
660
+ question (str): The question for which the answers are being ranked.
661
+ possible_answers (List[Any]): A list of possible answers to rank.
662
+ context (str, optional): Additional context to provide for the question.
663
+ max_answer_length (int, optional): Maximum string length allowed for the response. Defaults to 50.
664
+ conditionning (str, optional): An optional system message to put at the beginning of the prompt.
665
+ return_explanation (bool, optional): If True, returns a dictionary with the ranked order and explanations. Defaults to False.
666
+
667
+ Returns:
668
+ dict:
669
+ - If return_explanation is False, returns a JSON object with only the ranked order.
670
+ - If return_explanation is True, returns a JSON object with the ranked order and explanations.
671
+ """
672
+ if not callback:
673
+ callback=self.sink
674
+
675
+ prompt = f"""
676
+ {conditionning}\n
677
+ QUESTION:\n{question}\n
678
+ POSSIBLE ANSWERS:\n"""
679
+ for i, answer in enumerate(possible_answers):
680
+ prompt += f"{i}. {answer}\n"
681
+
682
+ if context:
683
+ prompt += f"\nADDITIONAL CONTEXT:\n{context}\n"
684
+
685
+ prompt += "\nRespond with a JSON object containing:\n"
686
+ if return_explanation:
687
+ prompt += "{\"ranking\": (list of indices ordered from best to worst), \"explanations\": (list of reasons for each ranking)}"
688
+ else:
689
+ prompt += "{\"ranking\": (list of indices ordered from best to worst)}"
690
+
691
+ response = self.generate_text_code(prompt, language="json", return_full_generated_code=False, callback=callback)
692
+
693
+ try:
694
+ result = json.loads(response)
695
+ if "ranking" in result and isinstance(result["ranking"], list):
696
+ return result
697
+ except json.JSONDecodeError:
698
+ return {"ranking": []}
699
+
700
+
1585
701
  def sequential_summarize(
1586
702
  self,
1587
703
  text:str,
@@ -1694,7 +810,7 @@ Do not discuss the information inside thememory, just put the relevant informati
1694
810
  ASCIIColors.yellow(f" ----- {chunk_id-1} ------")
1695
811
  ASCIIColors.red(prompt)
1696
812
 
1697
- memory = self.generate(prompt, n_predict=ctx_size//4, streaming_callback=callback).strip()
813
+ memory = self.generate_text(prompt, n_predict=ctx_size//4, streaming_callback=callback).strip()
1698
814
  code = self.extract_code_blocks(memory)
1699
815
  if code:
1700
816
  memory=code[0]["content"]
@@ -1730,21 +846,171 @@ The updated memory must be put in a {chunk_processing_output_format} markdown ta
1730
846
 
1731
847
  # Generate final summary
1732
848
  final_prompt = final_prompt_template
1733
- memory = self.generate(final_prompt, streaming_callback=callback)
849
+ memory = self.generate_text(final_prompt, streaming_callback=callback)
1734
850
  code = self.extract_code_blocks(memory)
1735
851
  if code:
1736
852
  memory=code[0]["content"]
1737
853
  return memory
1738
854
 
855
+ def deep_analyze(
856
+ self,
857
+ query: str,
858
+ text: str = None,
859
+ files: list = None,
860
+ search_prompt: str = "Extract information related to the query from the current text chunk and update the memory with new findings.",
861
+ aggregation_prompt: str = None,
862
+ output_format: str = "markdown",
863
+ ctx_size: int = None,
864
+ chunk_size: int = None,
865
+ bootstrap_chunk_size: int = None,
866
+ bootstrap_steps: int = None,
867
+ callback=None,
868
+ debug: bool = False
869
+ ):
870
+ """
871
+ Searches for specific information related to a query in a long text or a list of files.
872
+ Processes the input in chunks, updates a memory with relevant findings, and optionally aggregates them.
873
+
874
+ Parameters:
875
+ - query (str): The query to search for.
876
+ - text (str, optional): The input text to search in. Defaults to None.
877
+ - files (list, optional): List of file paths to search in. Defaults to None.
878
+ - search_prompt (str, optional): Prompt for processing each chunk. Defaults to a standard extraction prompt.
879
+ - aggregation_prompt (str, optional): Prompt for aggregating findings. Defaults to None.
880
+ - output_format (str, optional): Output format. Defaults to "markdown".
881
+ - ctx_size (int, optional): Context size for the model. Defaults to None (uses self.ctx_size).
882
+ - chunk_size (int, optional): Size of each chunk. Defaults to None (ctx_size // 4). Smaller chunk sizes yield better results but is slower.
883
+ - bootstrap_chunk_size (int, optional): Size for initial chunks. Defaults to None.
884
+ - bootstrap_steps (int, optional): Number of initial chunks using bootstrap size. Defaults to None.
885
+ - callback (callable, optional): Function called after each chunk. Defaults to None.
886
+ - debug (bool, optional): Enable debug output. Defaults to False.
887
+
888
+ Returns:
889
+ - str: The search findings or aggregated output in the specified format.
890
+ """
891
+ # Set defaults
892
+ if ctx_size is None:
893
+ ctx_size = self.ctx_size
894
+ if chunk_size is None:
895
+ chunk_size = ctx_size // 4
896
+
897
+ # Prepare input
898
+ if files:
899
+ all_texts = [(file, open(file, 'r', encoding='utf-8').read()) for file in files]
900
+ elif text:
901
+ all_texts = [("input_text", text)]
902
+ else:
903
+ raise ValueError("Either text or files must be provided.")
904
+
905
+ # Initialize memory and chunk counter
906
+ memory = ""
907
+ chunk_id = 0
908
+
909
+ # Define search prompt template using f-string and the provided search_prompt
910
+ search_prompt_template = f"""{self.system_full_header}
911
+ You are a search assistant that processes documents chunk by chunk to find information related to a query, updating a memory of findings at each step.
912
+
913
+ Your goal is to extract and combine relevant information from each text chunk with the existing memory, ensuring no key details are omitted or invented.
914
+
915
+
916
+ ----
917
+ # Chunk number: {{chunk_id}}
918
+ # Text chunk:
919
+ ```markdown
920
+ {{chunk}}
921
+ ```
922
+
923
+ Current findings memory:
924
+ ```markdown
925
+ {{memory}}
926
+ ```
927
+ {self.user_full_header}
928
+ Query: '{query}'
929
+ Task: {search_prompt}
930
+
931
+ Update the memory by adding new relevant information from this chunk. Retain all prior findings unless contradicted or updated. Only include explicitly relevant details.
932
+ Make sure to extrafct only information relevant to be able to answer the query of the user or at least gives important contextual information that can be completed to answer the user query.
933
+ {self.ai_full_header}
934
+ """
935
+
936
+ # Calculate static prompt tokens
937
+ example_prompt = search_prompt_template.replace("{{chunk_id}}", "0")\
938
+ .replace("{{memory}}", "")\
939
+ .replace("{{chunk}}", "")
940
+ static_tokens = len(self.tokenize(example_prompt))
941
+
942
+ # Process each text (file or input)
943
+ for file_name, file_text in all_texts:
944
+ file_tokens = self.tokenize(file_text)
945
+ start_token_idx = 0
946
+
947
+ while start_token_idx < len(file_tokens):
948
+ # Calculate available tokens
949
+ current_memory_tokens = len(self.tokenize(memory))
950
+ available_tokens = ctx_size - static_tokens - current_memory_tokens
951
+ if available_tokens <= 0:
952
+ raise ValueError("Memory too large - consider reducing chunk size or increasing context window")
953
+
954
+ # Adjust chunk size
955
+ actual_chunk_size = (
956
+ min(bootstrap_chunk_size, available_tokens)
957
+ if bootstrap_chunk_size is not None and bootstrap_steps is not None and chunk_id < bootstrap_steps
958
+ else min(chunk_size, available_tokens)
959
+ )
960
+
961
+ end_token_idx = min(start_token_idx + actual_chunk_size, len(file_tokens))
962
+ chunk_tokens = file_tokens[start_token_idx:end_token_idx]
963
+ chunk = self.detokenize(chunk_tokens)
964
+
965
+ # Generate updated memory
966
+ prompt = search_prompt_template.replace("{chunk_id}", str(chunk_id))\
967
+ .replace("{memory}", memory)\
968
+ .replace("{chunk}", chunk)
969
+ if debug:
970
+ print(f"----- Chunk {chunk_id} from {file_name} ------")
971
+ print(prompt)
972
+
973
+ output = self.generate_text(prompt, n_predict=ctx_size // 4, streaming_callback=callback).strip()
974
+ code = self.extract_code_blocks(output)
975
+ memory = code[0]["content"] if code else output
976
+
977
+ if debug:
978
+ print("----- Updated Memory ------")
979
+ print(memory)
980
+ print("---------------------------")
981
+
982
+ start_token_idx = end_token_idx
983
+ chunk_id += 1
984
+
985
+ # Aggregate findings if requested
986
+ if aggregation_prompt:
987
+ final_prompt = f"""{self.system_full_header}
988
+ You are a search results aggregator.
989
+
990
+ {self.user_full_header}
991
+ {aggregation_prompt}
992
+
993
+ Collected findings:
994
+ ```markdown
995
+ {memory}
996
+ ```
997
+
998
+ Provide the final output in {output_format} format.
999
+ {self.ai_full_header}
1000
+ """
1001
+ final_output = self.generate_text(final_prompt, streaming_callback=callback)
1002
+ code = self.extract_code_blocks(final_output)
1003
+ return code[0]["content"] if code else final_output
1004
+ return memory
1739
1005
  def error(self, content, duration:int=4, client_id=None, verbose:bool=True):
1740
1006
  ASCIIColors.error(content)
1741
1007
 
1742
1008
 
1743
1009
 
1744
1010
  if __name__=="__main__":
1745
- #lc = LollmsClient("http://localhost:9600")
1011
+ lc = LollmsClient("ollama", model_name="mistral-nemo:latest")
1746
1012
  #lc = LollmsClient("http://localhost:11434", model_name="mistral-nemo:latest", default_generation_mode=ELF_GENERATION_FORMAT.OLLAMA)
1747
- lc = LollmsClient(model_name="gpt-3.5-turbo-0125", default_generation_mode=ELF_GENERATION_FORMAT.OPENAI)
1013
+ #lc = LollmsClient(model_name="gpt-3.5-turbo-0125", default_generation_mode=ELF_GENERATION_FORMAT.OPENAI)
1748
1014
  print(lc.listModels())
1749
1015
  code = lc.generate_code("Build a simple json that containes name and age. put the output inside a json markdown tag")
1750
1016
  print(code)