lollms-client 0.9.2__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -1,143 +1,110 @@
1
1
  import requests
2
2
  from ascii_colors import ASCIIColors, trace_exception
3
- from lollms_client.lollms_types import MSG_TYPE
3
+ from lollms_client.lollms_types import MSG_TYPE, ELF_COMPLETION_FORMAT
4
4
  from lollms_client.lollms_utilities import encode_image
5
+ from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
5
6
  import json
6
7
  from enum import Enum
7
- import tiktoken
8
8
  import base64
9
9
  import requests
10
10
  import pipmaster as pm
11
- from typing import List, Optional, Callable, Union
11
+ from typing import List, Optional, Callable, Union, Dict
12
12
  import numpy as np
13
13
  import pipmaster as pm
14
+ from pathlib import Path
14
15
  import os
15
16
 
16
- class ELF_GENERATION_FORMAT(Enum):
17
- LOLLMS = 0
18
- OPENAI = 1
19
- OLLAMA = 2
20
- LITELLM = 3
21
- TRANSFORMERS = 4
22
- VLLM = 5
23
-
24
- @classmethod
25
- def from_string(cls, format_string: str) -> 'ELF_GENERATION_FORMAT':
26
- format_mapping = {
27
- "LOLLMS": cls.LOLLMS,
28
- "OPENAI": cls.OPENAI,
29
- "OLLAMA": cls.OLLAMA,
30
- "LITELLM": cls.LITELLM,
31
- "TRANSFORMERS": cls.TRANSFORMERS,
32
- "VLLM": cls.VLLM
33
- }
34
-
35
- try:
36
- return format_mapping[format_string.upper()]
37
- except KeyError:
38
- raise ValueError(f"Invalid format string: {format_string}. Must be one of {list(format_mapping.keys())}.")
39
-
40
- def __str__(self):
41
- return self.name
42
- class ELF_COMPLETION_FORMAT(Enum):
43
- Instruct = 0
44
- Chat = 1
45
- @classmethod
46
- def from_string(cls, format_string: str) -> 'ELF_COMPLETION_FORMAT':
47
- format_mapping = {
48
- "Instruct": cls.Instruct,
49
- "Chat": cls.Chat,
50
- }
51
-
52
- try:
53
- return format_mapping[format_string.upper()]
54
- except KeyError:
55
- raise ValueError(f"Invalid format string: {format_string}. Must be one of {list(format_mapping.keys())}.")
56
-
57
- def __str__(self):
58
- return self.name
59
17
 
60
18
  class LollmsClient():
61
- def __init__(
62
- self,
63
- host_address=None,
64
- model_name=None,
65
- ctx_size=32000,
66
- personality=-1,
67
- n_predict=4096,
68
- min_n_predict=512,
69
- temperature=0.1,
70
- top_k=50,
71
- top_p=0.95,
72
- repeat_penalty=0.8,
73
- repeat_last_n=40,
74
- seed=None,
75
- n_threads=8,
76
- service_key:str="",
77
- tokenizer=None,
78
- default_generation_mode=ELF_GENERATION_FORMAT.LOLLMS,
79
- verify_ssl_certificate = True,
80
- user_name = "user",
81
- ai_name = "assistant"
82
- ) -> None:
83
- import tiktoken
19
+ """Core class for interacting with LOLLMS bindings"""
20
+ def __init__(self,
21
+ binding_name: str = "lollms",
22
+ host_address: Optional[str] = None,
23
+ model_name: str = "",
24
+ service_key: Optional[str] = None,
25
+ verify_ssl_certificate: bool = True,
26
+ personality: Optional[int] = None,
27
+ llm_bindings_dir: Path = Path(__file__).parent / "llm_bindings",
28
+ binding_config: Optional[Dict[str, any]] = None,
29
+ ctx_size: Optional[int] = 8192,
30
+ n_predict: Optional[int] = 4096,
31
+ stream: bool = False,
32
+ temperature: float = 0.1,
33
+ top_k: int = 50,
34
+ top_p: float = 0.95,
35
+ repeat_penalty: float = 0.8,
36
+ repeat_last_n: int = 40,
37
+ seed: Optional[int] = None,
38
+ n_threads: int = 8,
39
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
40
+ user_name ="user",
41
+ ai_name = "assistant"):
42
+ """
43
+ Initialize the LollmsCore with a binding and generation parameters.
44
+
45
+ Args:
46
+ binding_name (str): Name of the binding to use (e.g., "lollms", "ollama").
47
+ host_address (Optional[str]): Host address for the service. Overrides binding default if provided.
48
+ model_name (str): Name of the model to use. Defaults to empty string.
49
+ service_key (Optional[str]): Authentication key for the service.
50
+ verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
51
+ personality (Optional[int]): Personality ID (used only by LOLLMS binding).
52
+ llm_bindings_dir (Path): Directory containing binding implementations.
53
+ Defaults to the "bindings" subdirectory relative to this file's location.
54
+ binding_config (Optional[Dict[str, any]]): Additional configuration for the binding.
55
+ n_predict (Optional[int]): Maximum number of tokens to generate. Default for generate_text.
56
+ stream (bool): Whether to stream the output. Defaults to False for generate_text.
57
+ temperature (float): Sampling temperature. Defaults to 0.1 for generate_text.
58
+ top_k (int): Top-k sampling parameter. Defaults to 50 for generate_text.
59
+ top_p (float): Top-p sampling parameter. Defaults to 0.95 for generate_text.
60
+ repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8 for generate_text.
61
+ repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
62
+ seed (Optional[int]): Random seed for generation. Default for generate_text.
63
+ n_threads (int): Number of threads to use. Defaults to 8 for generate_text.
64
+ streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
65
+ Default for generate_text. Takes a string chunk and an MSG_TYPE enum value.
66
+
67
+ Raises:
68
+ ValueError: If the specified binding cannot be created.
69
+ """
70
+ self.binding_manager = LollmsLLMBindingManager(llm_bindings_dir)
71
+ self.binding_config = binding_config or {}
72
+
73
+ # Store generation parameters as instance variables
74
+ self.default_ctx_size = ctx_size
75
+ self.default_n_predict = n_predict
76
+ self.default_stream = stream
77
+ self.default_temperature = temperature
78
+ self.default_top_k = top_k
79
+ self.default_top_p = top_p
80
+ self.default_repeat_penalty = repeat_penalty
81
+ self.default_repeat_last_n = repeat_last_n
82
+ self.default_seed = seed
83
+ self.default_n_threads = n_threads
84
+ self.default_streaming_callback = streaming_callback
85
+
86
+ # Create the binding instance
87
+ self.binding = self.binding_manager.create_binding(
88
+ binding_name=binding_name,
89
+ host_address=host_address,
90
+ model_name=model_name,
91
+ service_key=service_key,
92
+ verify_ssl_certificate=verify_ssl_certificate,
93
+ personality=personality
94
+ )
95
+
96
+ if self.binding is None:
97
+ raise ValueError(f"Failed to create binding: {binding_name}. Available bindings: {self.binding_manager.get_available_bindings()}")
98
+
99
+ # Apply additional configuration if provided
100
+ if binding_config:
101
+ for key, value in binding_config.items():
102
+ setattr(self.binding, key, value)
84
103
  self.user_name = user_name
85
104
  self.ai_name = ai_name
86
- self.host_address=host_address
87
- if not self.host_address:
88
- if default_generation_mode==ELF_GENERATION_FORMAT.LOLLMS:
89
- self.host_address = "http://localhost:9600"
90
- elif default_generation_mode==ELF_GENERATION_FORMAT.OPENAI:
91
- self.host_address = "https://api.openai.com"
92
- elif default_generation_mode==ELF_GENERATION_FORMAT.OLLAMA:
93
- self.host_address = "http://localhost:11434"
94
- else:
95
- self.host_address = "http://localhost:9600"
96
-
97
- self.model_name = model_name
98
- self.ctx_size = ctx_size
99
- self.n_predict = n_predict
100
- self.min_n_predict = min_n_predict
101
- self.personality = personality
102
- self.temperature = temperature
103
- self.top_k = top_k
104
- self.top_p = top_p
105
- self.repeat_penalty = repeat_penalty
106
- self.repeat_last_n = repeat_last_n
107
- self.seed = seed
108
- self.n_threads = n_threads
109
105
  self.service_key = service_key
110
- if not self.service_key and default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
111
- self.service_key = os.getenv("OPENAI_API_KEY","")
112
- self.default_generation_mode = default_generation_mode
113
- self.verify_ssl_certificate = verify_ssl_certificate
114
- self.tokenizer = tiktoken.model.encoding_for_model("gpt-3.5-turbo-1106") if tokenizer is None else tokenizer
115
- if default_generation_mode == ELF_GENERATION_FORMAT.TRANSFORMERS:
116
- if not pm.is_installed("torch"):
117
- ASCIIColors.yellow("Diffusers: Torch not found. Installing it")
118
- pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
119
-
120
- import torch
121
- if not torch.cuda.is_available():
122
- ASCIIColors.yellow("Diffusers: Torch not using cuda. Reinstalling it")
123
- pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
124
- import torch
125
-
126
- if not pm.is_installed("transformers"):
127
- pm.install_or_update("transformers")
128
- from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
129
- self.tokenizer = AutoTokenizer.from_pretrained(
130
- str(model_name), trust_remote_code=False
131
- )
132
-
133
- self.model = AutoModelForCausalLM.from_pretrained(
134
- str(model_name),
135
- device_map="auto",
136
- load_in_4bit=True,
137
- torch_dtype=torch.bfloat16 # Load in float16 for quantization
138
- )
139
- self.generation_config = GenerationConfig.from_pretrained(str(model_name))
140
106
 
107
+ self.verify_ssl_certificate = verify_ssl_certificate
141
108
  self.start_header_id_template ="!@>"
142
109
  self.end_header_id_template =": "
143
110
  self.system_message_template ="system"
@@ -149,12 +116,6 @@ class LollmsClient():
149
116
  self.end_ai_header_id_template =": "
150
117
  self.end_ai_message_id_template =""
151
118
 
152
- if default_generation_mode==ELF_GENERATION_FORMAT.OPENAI:
153
- if not pm.is_installed("openai"):
154
- pm.install("openai")
155
- import openai
156
- self.client = openai.OpenAI(base_url=host_address)
157
-
158
119
 
159
120
  @property
160
121
  def system_full_header(self) -> str:
@@ -185,1150 +146,119 @@ class LollmsClient():
185
146
 
186
147
  def sink(self, s=None,i=None,d=None):
187
148
  pass
188
-
189
- def tokenize(self, prompt:str):
149
+ def tokenize(self, text: str) -> list:
190
150
  """
191
- Tokenizes the given prompt using the model's tokenizer.
151
+ Tokenize text using the active binding.
192
152
 
193
153
  Args:
194
- prompt (str): The input prompt to be tokenized.
154
+ text (str): The text to tokenize.
195
155
 
196
156
  Returns:
197
- list: A list of tokens representing the tokenized prompt.
157
+ list: List of tokens.
198
158
  """
199
- tokens_list = self.tokenizer.encode(prompt)
200
-
201
- return tokens_list
202
-
203
- def detokenize(self, tokens_list:list):
159
+ return self.binding.tokenize(text)
160
+
161
+ def detokenize(self, tokens: list) -> str:
204
162
  """
205
- Detokenizes the given list of tokens using the model's tokenizer.
163
+ Detokenize tokens using the active binding.
206
164
 
207
165
  Args:
208
- tokens_list (list): A list of tokens to be detokenized.
166
+ tokens (list): List of tokens to detokenize.
209
167
 
210
168
  Returns:
211
- str: The detokenized text as a string.
169
+ str: Detokenized text.
212
170
  """
213
- text = self.tokenizer.decode(tokens_list)
214
-
215
- return text
171
+ return self.binding.detokenize(tokens)
216
172
 
217
- def embed(self, text):
218
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
219
- return self.lollms_embed(text)
220
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
221
- return self.ollama_embed(text)
222
- else:
223
- return #not implemented
224
-
225
- def ollama_embed(self, text, **kwargs):
173
+ def get_model_details(self) -> dict:
226
174
  """
227
- Get embeddings for the input text using Ollama API
228
-
229
- Args:
230
- text (str or List[str]): Input text to embed
231
- **kwargs: Additional arguments like model, truncate, options, keep_alive
232
-
175
+ Get model information from the active binding.
176
+
233
177
  Returns:
234
- dict: Response containing embeddings
178
+ dict: Model information dictionary.
235
179
  """
236
- import requests
237
-
238
- url = f"{self.base_url}/api/embed"
239
-
240
- # Prepare the request payload
241
- payload = {
242
- "input": text,
243
- "model": kwargs.get("model", "llama2") # default model
244
- }
245
-
246
- # Add optional parameters if provided
247
- if "truncate" in kwargs:
248
- payload["truncate"] = kwargs["truncate"]
249
- if "options" in kwargs:
250
- payload["options"] = kwargs["options"]
251
- if "keep_alive" in kwargs:
252
- payload["keep_alive"] = kwargs["keep_alive"]
253
-
254
- try:
255
- response = requests.post(url, json=payload)
256
- response.raise_for_status() # Raise exception for bad status codes
257
- return response.json()
258
- except requests.exceptions.RequestException as e:
259
- raise Exception(f"Embedding request failed: {str(e)}")
260
-
261
-
262
- def lollms_embed(self, texts, **kwargs):
263
- api_key = kwargs.pop("api_key", None)
264
- headers = (
265
- {"Content-Type": "application/json", "Authorization": api_key}
266
- if api_key
267
- else {"Content-Type": "application/json"}
268
- )
269
- embeddings = []
270
- for text in texts:
271
- request_data = {"text": text}
272
- response = requests.post(f"{self.host_address}/lollms_embed", json=request_data, headers=headers)
273
- response.raise_for_status()
274
- result = response.json()
275
- embeddings.append(result["vector"])
276
- return np.array(embeddings)
180
+ return self.binding.get_model_info()
277
181
 
278
- def generate_with_images(self, prompt, images, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
279
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
280
- return self.lollms_generate_with_images(prompt, images, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
281
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
282
- return self.openai_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
283
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
284
- return self.ollama_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
285
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.LITELLM:
286
- return # To be implemented #self.litellm_generate_with_images(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.Instruct, service_key, streaming_callback)
287
-
288
-
289
- def generate(self, prompt, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None, completion_format = ELF_COMPLETION_FORMAT.Chat):
290
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
291
- return self.lollms_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
292
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
293
- return self.openai_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
294
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
295
- return self.ollama_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
296
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.LITELLM:
297
- return self.litellm_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
298
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.VLLM:
299
- return self.vllm_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, completion_format, service_key, streaming_callback)
300
-
301
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.TRANSFORMERS:
302
- return self.transformers_generate(prompt, self.host_address, self.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
303
-
304
-
305
- def generate_text(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
306
- return self.lollms_generate(prompt, host_address, model_name, personality, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streaming_callback)
307
-
308
- def lollms_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
309
- # Set default values to instance variables if optional arguments are None
310
- host_address = host_address if host_address else self.host_address
311
- model_name = model_name if model_name else self.model_name
312
- n_predict = n_predict if n_predict else self.n_predict
313
- personality = personality if personality is not None else self.personality
314
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
315
- temperature = temperature if temperature is not None else self.temperature
316
- top_k = top_k if top_k is not None else self.top_k
317
- top_p = top_p if top_p is not None else self.top_p
318
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
319
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
320
- seed = seed or self.seed # Use the instance seed if not provided
321
- n_threads = n_threads if n_threads else self.n_threads
322
-
323
-
324
- url = f"{host_address}/lollms_generate"
325
- if service_key!="":
326
- headers = {
327
- 'Content-Type': 'application/json;',
328
- 'Authorization': f'Bearer {service_key}',
329
- }
330
- else:
331
- headers = {
332
- 'Content-Type': 'application/json',
333
- }
334
- data = {
335
- "prompt": prompt,
336
- "model_name": self.model_name,
337
- "personality": self.personality,
338
- "n_predict": n_predict,
339
- "stream": stream,
340
- "temperature": self.temperature,
341
- "top_k": self.top_k,
342
- "top_p": self.top_p,
343
- "repeat_penalty": repeat_penalty,
344
- "repeat_last_n": repeat_last_n,
345
- "seed": seed,
346
- "n_threads": n_threads
347
- }
348
-
349
- response = requests.post(url, json=data, headers=headers, stream=stream)
350
- if not stream:
351
- if response.status_code == 200:
352
- try:
353
- text = response.text.strip().rstrip('!')
354
- return text
355
- except Exception as ex:
356
- return {"status": False, "error": str(ex)}
357
- else:
358
- return {"status": False, "error": response.text}
359
- else:
360
- text = ""
361
- if response.status_code==200:
362
- try:
363
- for line in response.iter_lines():
364
- chunk = line.decode("utf-8")
365
- text += chunk
366
- if streaming_callback:
367
- streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
368
- return text.rstrip('!')
369
- except Exception as ex:
370
- return {"status": False, "error": str(ex)}
371
- else:
372
- return {"status": False, "error": response.text}
373
-
374
-
375
- def lollms_generate_with_images(
376
- self,
377
- prompt: str,
378
- images: List[str],
379
- host_address: Optional[str] = None,
380
- model_name: Optional[str] = None,
381
- personality: Optional[str] = None,
382
- n_predict: Optional[int] = None,
383
- stream: bool = False,
384
- temperature: float = 0.1,
385
- top_k: int = 50,
386
- top_p: float = 0.95,
387
- repeat_penalty: float = 0.8,
388
- repeat_last_n: int = 40,
389
- seed: Optional[int] = None,
390
- n_threads: int = 8,
391
- service_key: str = "",
392
- streaming_callback: Optional[Callable[[str, int], None]] = None
393
- ) -> Union[str, dict]:
182
+ def switch_model(self, model_name: str) -> bool:
394
183
  """
395
- Generates text based on a prompt and a list of images using a specified model.
184
+ Load a new model in the active binding.
396
185
 
397
186
  Args:
398
- prompt (str): The text prompt to generate responses for.
399
- images (List[str]): A list of file paths to images to be included in the generation.
400
- host_address (Optional[str]): The host address for the service. Defaults to instance variable.
401
- model_name (Optional[str]): The model name to use. Defaults to instance variable.
402
- personality (Optional[str]): The personality setting for the generation. Defaults to instance variable.
403
- n_predict (Optional[int]): The number of tokens to predict. Defaults to instance variable.
404
- stream (bool): Whether to stream the response. Defaults to False.
405
- temperature (float): Sampling temperature. Defaults to 0.1.
406
- top_k (int): Top-k sampling parameter. Defaults to 50.
407
- top_p (float): Top-p (nucleus) sampling parameter. Defaults to 0.95.
408
- repeat_penalty (float): Penalty for repeating tokens. Defaults to 0.8.
409
- repeat_last_n (int): Number of last tokens to consider for repeat penalty. Defaults to 40.
410
- seed (Optional[int]): Random seed for generation. Defaults to instance variable.
411
- n_threads (int): Number of threads to use. Defaults to 8.
412
- service_key (str): Optional service key for authorization.
413
- streaming_callback (Optional[Callable[[str, int], None]]): Callback for streaming responses.
187
+ model_name (str): Name of the model to load.
414
188
 
415
189
  Returns:
416
- Union[str, dict]: The generated text if not streaming, or a dictionary with status and error if applicable.
190
+ bool: True if model loaded successfully, False otherwise.
417
191
  """
418
-
419
- # Set default values to instance variables if optional arguments are None
420
- host_address = host_address if host_address else self.host_address
421
- model_name = model_name if model_name else self.model_name
422
- n_predict = n_predict if n_predict else self.n_predict
423
- personality = personality if personality is not None else self.personality
424
-
425
- # Set parameters to instance variables if they are not provided or None
426
- temperature = temperature if temperature is not None else self.temperature
427
- top_k = top_k if top_k is not None else self.top_k
428
- top_p = top_p if top_p is not None else self.top_p
429
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
430
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
431
- seed = seed or self.seed # Use the instance seed if not provided
432
- n_threads = n_threads if n_threads else self.n_threads
433
-
434
- def encode_image_to_base64(image_path: str) -> str:
435
- """Encodes an image file to a base64 string."""
436
- with open(image_path, "rb") as image_file:
437
- encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
438
- return encoded_string
439
-
440
- # Encode images in base64
441
- encoded_images = [encode_image_to_base64(image) for image in images]
442
-
443
- url = f"{host_address}/lollms_generate_with_images"
444
- headers = {
445
- 'Content-Type': 'application/json',
446
- 'Authorization': f'Bearer {service_key}' if service_key else '',
447
- }
448
-
449
- data = {
450
- "prompt": prompt,
451
- "model_name": model_name,
452
- "personality": personality,
453
- "n_predict": n_predict,
454
- "stream": stream,
455
- "temperature": temperature,
456
- "top_k": top_k,
457
- "top_p": top_p,
458
- "repeat_penalty": repeat_penalty,
459
- "repeat_last_n": repeat_last_n,
460
- "seed": seed,
461
- "n_threads": n_threads,
462
- "images": encoded_images # Add encoded images to the request payload
463
- }
464
-
465
- response = requests.post(url, json=data, headers=headers, stream=stream)
466
- if not stream:
467
- if response.status_code == 200:
468
- try:
469
- text = response.text.rstrip('!')
470
- return text
471
- except Exception as ex:
472
- return {"status": False, "error": str(ex)}
473
- else:
474
- return {"status": False, "error": response.text}
475
- else:
476
- text = ""
477
- if response.status_code == 200:
478
- try:
479
- for line in response.iter_lines():
480
- chunk = line.decode("utf-8")
481
- text += chunk
482
- if streaming_callback:
483
- streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
484
- if text[0] == '"':
485
- text = text[1:]
486
- if text[-1] == '"':
487
- text = text[:-1]
488
- return text
489
- except Exception as ex:
490
- return {"status": False, "error": str(ex)}
491
- else:
492
- return {"status": False, "error": response.text}
493
-
192
+ return self.binding.load_model(model_name)
494
193
 
495
- def transformers_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, service_key:str="", streaming_callback=None):
496
- # Set default values to instance variables if optional arguments are None
497
- model_name = model_name if model_name else self.model_name
498
- n_predict = n_predict if n_predict else self.n_predict
499
- personality = personality if personality is not None else self.personality
500
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
501
- temperature = temperature if temperature is not None else self.temperature
502
- top_k = top_k if top_k is not None else self.top_k
503
- top_p = top_p if top_p is not None else self.top_p
504
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
505
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
506
- seed = seed or self.seed # Use the instance seed if not provided
507
- n_threads = n_threads if n_threads else self.n_threads
508
-
509
- self.generation_config.max_new_tokens = int(n_predict)
510
- self.generation_config.temperature = float(temperature)
511
- self.generation_config.top_k = int(top_k)
512
- self.generation_config.top_p = float(top_p)
513
- self.generation_config.repetition_penalty = float(repeat_penalty)
514
- self.generation_config.do_sample = True if float(temperature)>0 else False
515
- self.generation_config.pad_token_id = self.tokenizer.pad_token_id
516
- self.generation_config.eos_token_id = self.tokenizer.eos_token_id
517
- self.generation_config.output_attentions = False
518
-
519
- try:
520
- input_ids = self.tokenizer(prompt, add_special_tokens=False, return_tensors='pt').input_ids
521
- class StreamerClass:
522
- def __init__(self, tokenizer, callback):
523
- self.output = ""
524
- self.skip_prompt = True
525
- self.decode_kwargs = {}
526
- self.tokenizer = tokenizer
527
-
528
- # variables used in the streaming process
529
- self.token_cache = []
530
- self.print_len = 0
531
- self.next_tokens_are_prompt = True
532
- self.callback = callback
533
- def put(self, value):
534
- """
535
- Recives tokens, decodes them, and prints them to stdout as soon as they form entire words.
536
- """
537
- if len(value.shape)==1 and (value[0] == self.tokenizer.eos_token_id or value[0] == self.tokenizer.bos_token_id):
538
- print("eos detected")
539
- return
540
- if len(value.shape) > 1 and value.shape[0] > 1:
541
- raise ValueError("TextStreamer only supports batch size 1")
542
- elif len(value.shape) > 1:
543
- value = value[0]
544
-
545
- if self.skip_prompt and self.next_tokens_are_prompt:
546
- self.next_tokens_are_prompt = False
547
- return
548
-
549
- # Add the new token to the cache and decodes the entire thing.
550
- self.token_cache.extend(value.tolist())
551
- text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
552
-
553
- # After the symbol for a new line, we flush the cache.
554
- if text.endswith("\n"):
555
- printable_text = text[self.print_len :]
556
- self.token_cache = []
557
- self.print_len = 0
558
- # If the last token is a CJK character, we print the characters.
559
- elif len(text) > 0 and self._is_chinese_char(ord(text[-1])):
560
- printable_text = text[self.print_len :]
561
- self.print_len += len(printable_text)
562
- # Otherwise, prints until the last space char (simple heuristic to avoid printing incomplete words,
563
- # which may change with the subsequent token -- there are probably smarter ways to do this!)
564
- else:
565
- printable_text = text[self.print_len : text.rfind(" ") + 1]
566
- self.print_len += len(printable_text)
567
-
568
- self.output += printable_text
569
- if self.callback:
570
- if not self.callback(printable_text, 0):
571
- raise Exception("canceled")
572
-
573
- def _is_chinese_char(self, cp):
574
- """Checks whether CP is the codepoint of a CJK character."""
575
- # This defines a "chinese character" as anything in the CJK Unicode block:
576
- # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
577
- #
578
- # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
579
- # despite its name. The modern Korean Hangul alphabet is a different block,
580
- # as is Japanese Hiragana and Katakana. Those alphabets are used to write
581
- # space-separated words, so they are not treated specially and handled
582
- # like the all of the other languages.
583
- if (
584
- (cp >= 0x4E00 and cp <= 0x9FFF)
585
- or (cp >= 0x3400 and cp <= 0x4DBF) #
586
- or (cp >= 0x20000 and cp <= 0x2A6DF) #
587
- or (cp >= 0x2A700 and cp <= 0x2B73F) #
588
- or (cp >= 0x2B740 and cp <= 0x2B81F) #
589
- or (cp >= 0x2B820 and cp <= 0x2CEAF) #
590
- or (cp >= 0xF900 and cp <= 0xFAFF)
591
- or (cp >= 0x2F800 and cp <= 0x2FA1F) #
592
- ): #
593
- return True
594
-
595
- return False
596
- def end(self):
597
- """Flushes any remaining cache and prints a newline to stdout."""
598
- # Flush the cache, if it exists
599
- if len(self.token_cache) > 0:
600
- text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
601
- printable_text = text[self.print_len :]
602
- self.token_cache = []
603
- self.print_len = 0
604
- else:
605
- printable_text = ""
606
-
607
- self.next_tokens_are_prompt = True
608
- if self.callback:
609
- if self.callback(printable_text, 0):
610
- raise Exception("canceled")
611
- streamer = StreamerClass(self.tokenizer, streaming_callback)
612
- self.generate(
613
- inputs=input_ids,
614
- generation_config=self.generation_config,
615
- streamer = streamer,
616
- )
617
- return streamer.output.rstrip('!')
618
- except Exception as ex:
619
- return {"status": False, "error": str(ex)}
620
-
621
- def openai_generate(self,
622
- prompt,
623
- host_address=None,
624
- model_name=None,
625
- personality=None,
626
- n_predict=None,
627
- stream=False,
628
- temperature=0.1,
629
- top_k=50,
630
- top_p=0.95,
631
- repeat_penalty=0.8,
632
- repeat_last_n=40,
633
- seed=None,
634
- n_threads=8,
635
- completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat,
636
- service_key: str = "",
637
- streaming_callback=None):
194
+ def get_available_bindings(self) -> List[str]:
638
195
  """
639
- Generates text using the OpenAI API based on the provided prompt and parameters.
640
-
641
- Parameters:
642
- prompt (str): The input text prompt to generate completions for.
643
- host_address (str, optional): The API host address. Defaults to instance variable.
644
- model_name (str, optional): The model to use for generation. Defaults to instance variable.
645
- personality (str, optional): The personality setting for the model. Defaults to instance variable.
646
- n_predict (int, optional): The number of tokens to predict. Defaults to instance variable.
647
- stream (bool, optional): Whether to stream the response. Defaults to False.
648
- temperature (float, optional): Sampling temperature. Higher values mean more randomness. Defaults to 0.1.
649
- top_k (int, optional): The number of highest probability vocabulary tokens to keep for top-k filtering. Defaults to 50.
650
- top_p (float, optional): The cumulative probability of parameter options to keep for nucleus sampling. Defaults to 0.95.
651
- repeat_penalty (float, optional): The penalty for repeating tokens. Defaults to 0.8.
652
- repeat_last_n (int, optional): The number of last tokens to consider for repeat penalty. Defaults to 40.
653
- seed (int, optional): Random seed for reproducibility. Defaults to instance variable.
654
- n_threads (int, optional): The number of threads to use for generation. Defaults to 8.
655
- completion_format (ELF_COMPLETION_FORMAT, optional): The format of the completion request (Instruct or Chat). Defaults to ELF_COMPLETION_FORMAT.Instruct.
656
- service_key (str, optional): The API service key for authorization. Defaults to an empty string.
657
- streaming_callback (callable, optional): A callback function to handle streaming responses.
196
+ Get list of available bindings.
658
197
 
659
198
  Returns:
660
- str: The generated text response from the OpenAI API.
199
+ List[str]: List of binding names that can be used.
661
200
  """
662
- # Set default values to instance variables if optional arguments are None
663
- host_address = host_address if host_address else self.host_address
664
- model_name = model_name if model_name else self.model_name
665
- n_predict = n_predict if n_predict else self.n_predict
666
- personality = personality if personality is not None else self.personality
667
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
668
- temperature = temperature if temperature is not None else self.temperature
669
- top_k = top_k if top_k is not None else self.top_k
670
- top_p = top_p if top_p is not None else self.top_p
671
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
672
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
673
- seed = seed or self.seed # Use the instance seed if not provided
674
- n_threads = n_threads if n_threads else self.n_threads
675
- service_key = service_key if service_key else self.service_key
676
- self.client.api_key = service_key
677
- count = 0
678
- output= ""
679
-
680
-
681
- if "vision" in self.model_name:
682
- messages = [
683
- {
684
- "role": "user",
685
- "content": [
686
- {
687
- "type":"text",
688
- "text":prompt
689
- }
690
- ]
691
- }
692
- ]
693
- else:
694
- messages = [{"role": "user", "content": prompt}]
695
-
696
-
697
- if completion_format == ELF_COMPLETION_FORMAT.Chat:
698
- if "o1" in self.model_name:
699
- chat_completion = self.client.chat.completions.create(
700
- model=self.model_name, # Choose the engine according to your OpenAI plan
701
- messages=messages,
702
- n=1, # Specify the number of responses you want
703
- )
704
- output = chat_completion.choices[0].message.content
705
- else:
706
- chat_completion = self.client.chat.completions.create(
707
- model=self.model_name, # Choose the engine according to your OpenAI plan
708
- messages=messages,
709
- max_tokens=n_predict-7 if n_predict>512 else n_predict, # Adjust the desired length of the generated response
710
- n=1, # Specify the number of responses you want
711
- temperature=float(self.temperature), # Adjust the temperature for more or less randomness in the output
712
- stream=True)
713
-
714
- for resp in chat_completion:
715
- if count >= n_predict:
716
- break
717
- try:
718
- word = resp.choices[0].delta.content
719
- except Exception as ex:
720
- word = ""
721
- if streaming_callback is not None:
722
- if not streaming_callback(word):
723
- break
724
- if word:
725
- output += word
726
- count += 1
727
- else:
728
- completion = self.client.completions.create(
729
- model=self.model_name, # Choose the engine according to your OpenAI plan
730
- prompt=prompt,
731
- max_tokens=n_predict-7 if n_predict>512 else n_predict, # Adjust the desired length of the generated response
732
- n=1, # Specify the number of responses you want
733
- temperature=float(self.temperature), # Adjust the temperature for more or less randomness in the output
734
- stream=True)
735
-
736
- for resp in completion:
737
- if count >= n_predict:
738
- break
739
- try:
740
- word = resp.choices[0].text
741
- except Exception as ex:
742
- word = ""
743
- if streaming_callback is not None:
744
- if not streaming_callback(word):
745
- break
746
- if word:
747
- output += word
748
- count += 1
749
-
750
- return output
751
-
752
-
753
- def vllm_generate(self,
754
- prompt,
755
- host_address=None,
756
- model_name=None,
757
- personality=None,
758
- n_predict=None,
759
- stream=False,
760
- temperature=0.1,
761
- top_k=50,
762
- top_p=0.95,
763
- repeat_penalty=0.8,
764
- repeat_last_n=40,
765
- seed=None,
766
- n_threads=8,
767
- completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Instruct,
768
- service_key: str = "",
769
- streaming_callback=None):
201
+ return self.binding_manager.get_available_bindings()
202
+
203
+ def generate_text(self,
204
+ prompt: str,
205
+ images: Optional[List[str]] = None,
206
+ n_predict: Optional[int] = None,
207
+ stream: Optional[bool] = None,
208
+ temperature: Optional[float] = None,
209
+ top_k: Optional[int] = None,
210
+ top_p: Optional[float] = None,
211
+ repeat_penalty: Optional[float] = None,
212
+ repeat_last_n: Optional[int] = None,
213
+ seed: Optional[int] = None,
214
+ n_threads: Optional[int] = None,
215
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> str:
770
216
  """
771
- Generates text using the OpenAI API based on the provided prompt and parameters.
217
+ Generate text using the active binding, using instance defaults if parameters are not provided.
772
218
 
773
- Parameters:
774
- prompt (str): The input text prompt to generate completions for.
775
- host_address (str, optional): The API host address. Defaults to instance variable.
776
- model_name (str, optional): The model to use for generation. Defaults to instance variable.
777
- personality (str, optional): The personality setting for the model. Defaults to instance variable.
778
- n_predict (int, optional): The number of tokens to predict. Defaults to instance variable.
779
- stream (bool, optional): Whether to stream the response. Defaults to False.
780
- temperature (float, optional): Sampling temperature. Higher values mean more randomness. Defaults to 0.1.
781
- top_k (int, optional): The number of highest probability vocabulary tokens to keep for top-k filtering. Defaults to 50.
782
- top_p (float, optional): The cumulative probability of parameter options to keep for nucleus sampling. Defaults to 0.95.
783
- repeat_penalty (float, optional): The penalty for repeating tokens. Defaults to 0.8.
784
- repeat_last_n (int, optional): The number of last tokens to consider for repeat penalty. Defaults to 40.
785
- seed (int, optional): Random seed for reproducibility. Defaults to instance variable.
786
- n_threads (int, optional): The number of threads to use for generation. Defaults to 8.
787
- completion_format (ELF_COMPLETION_FORMAT, optional): The format of the completion request (Instruct or Chat). Defaults to ELF_COMPLETION_FORMAT.Instruct.
788
- service_key (str, optional): The API service key for authorization. Defaults to an empty string.
789
- streaming_callback (callable, optional): A callback function to handle streaming responses.
219
+ Args:
220
+ prompt (str): The input prompt for text generation.
221
+ images (Optional[List[str]]): List of image file paths for multimodal generation.
222
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
223
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
224
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
225
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
226
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
227
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
228
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
229
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
230
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
231
+ streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
232
+ Uses instance default if None.
233
+ - First parameter (str): The chunk of text received from the stream.
234
+ - Second parameter (MSG_TYPE): The message type enum (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
790
235
 
791
236
  Returns:
792
- str: The generated text response from the OpenAI API.
237
+ Union[str, dict]: Generated text or error dictionary if failed.
793
238
  """
794
- # Set default values to instance variables if optional arguments are None
795
- host_address = host_address if host_address else self.host_address
796
- model_name = model_name if model_name else self.model_name
797
- n_predict = n_predict if n_predict else self.n_predict
798
- personality = personality if personality is not None else self.personality
799
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
800
- temperature = temperature if temperature is not None else self.temperature
801
- top_k = top_k if top_k is not None else self.top_k
802
- top_p = top_p if top_p is not None else self.top_p
803
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
804
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
805
- seed = seed or self.seed # Use the instance seed if not provided
806
- n_threads = n_threads if n_threads else self.n_threads
807
-
808
- if service_key != "":
809
- headers = {
810
- 'Content-Type': 'application/json',
811
- 'Authorization': f'Bearer {service_key}',
812
- }
813
- else:
814
- headers = {
815
- 'Content-Type': 'application/json',
816
- }
817
-
818
- if completion_format == ELF_COMPLETION_FORMAT.Instruct:
819
- data = {
820
- 'model': model_name,
821
- 'prompt': prompt,
822
- "stream": True,
823
- "temperature": float(temperature),
824
- "max_tokens": n_predict
825
- }
826
- completion_format_path = "/v1/completions"
827
- elif completion_format == ELF_COMPLETION_FORMAT.Chat:
828
- data = {
829
- 'model': model_name,
830
- 'messages': [{
831
- 'role': "user",
832
- 'content': prompt
833
- }],
834
- "stream": True,
835
- "temperature": float(temperature),
836
- "max_tokens": n_predict
837
- }
838
- completion_format_path = "/v1/chat/completions"
839
-
840
- if host_address.endswith("/"):
841
- host_address = host_address[:-1]
842
-
843
- url = f'{host_address}{completion_format_path}'
844
-
845
- response = requests.post(url, headers=headers, data=json.dumps(data), stream=True, verify=self.verify_ssl_certificate)
846
-
847
- if response.status_code == 400:
848
- try:
849
- content = response.content.decode("utf8")
850
- content = json.loads(content)
851
- self.error(content["error"]["message"])
852
- return
853
- except:
854
- content = response.content.decode("utf8")
855
- content = json.loads(content)
856
- self.error(content["message"])
857
- return
858
- elif response.status_code == 404:
859
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
860
-
861
- text = ""
862
- for line in response.iter_lines():
863
- decoded = line.decode("utf-8")
864
- if decoded.startswith("data: "):
865
- try:
866
- json_data = json.loads(decoded[5:].strip())
867
- if completion_format == ELF_COMPLETION_FORMAT.Chat:
868
- try:
869
- chunk = json_data["choices"][0]["delta"]["content"]
870
- except:
871
- chunk = ""
872
- else:
873
- chunk = json_data["choices"][0]["text"]
874
- # Process the JSON data here
875
- text += chunk
876
- if streaming_callback:
877
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
878
- break
879
- except:
880
- break
881
- else:
882
- if decoded.startswith("{"):
883
- for line_ in response.iter_lines():
884
- decoded += line_.decode("utf-8")
885
- try:
886
- json_data = json.loads(decoded)
887
- if json_data["object"] == "error":
888
- self.error(json_data["message"])
889
- break
890
- except:
891
- self.error("Couldn't generate text, verify your key or model name")
892
- else:
893
- text += decoded
894
- if streaming_callback:
895
- if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
896
- break
897
- return text
898
-
899
- def openai_generate_with_images(self,
900
- prompt,
901
- images,
902
- host_address=None,
903
- model_name=None,
904
- personality=None,
905
- n_predict=None,
906
- stream=False,
907
- temperature=0.1,
908
- top_k=50,
909
- top_p=0.95,
910
- repeat_penalty=0.8,
911
- repeat_last_n=40,
912
- seed=None,
913
- n_threads=8,
914
- max_image_width=-1,
915
- service_key: str = "",
916
- streaming_callback=None,):
917
- """Generates text out of a prompt
239
+ return self.binding.generate_text(
240
+ prompt=prompt,
241
+ images=images,
242
+ n_predict=n_predict if n_predict is not None else self.default_n_predict,
243
+ stream=stream if stream is not None else self.default_stream,
244
+ temperature=temperature if temperature is not None else self.default_temperature,
245
+ top_k=top_k if top_k is not None else self.default_top_k,
246
+ top_p=top_p if top_p is not None else self.default_top_p,
247
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
248
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
249
+ seed=seed if seed is not None else self.default_seed,
250
+ n_threads=n_threads if n_threads is not None else self.default_n_threads,
251
+ streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
252
+ )
918
253
 
919
- Args:
920
- prompt (str): The prompt to use for generation
921
- n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
922
- callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
923
- verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
924
- """
925
- # Set default values to instance variables if optional arguments are None
926
- host_address = host_address if host_address else self.host_address
927
- model_name = model_name if model_name else self.model_name
928
- n_predict = n_predict if n_predict else self.n_predict
929
- personality = personality if personality is not None else self.personality
930
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
931
- temperature = temperature if temperature is not None else self.temperature
932
- top_k = top_k if top_k is not None else self.top_k
933
- top_p = top_p if top_p is not None else self.top_p
934
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
935
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
936
- seed = seed or self.seed # Use the instance seed if not provided
937
- n_threads = n_threads if n_threads else self.n_threads
938
-
939
- count = 0
940
- output = ""
941
-
942
- messages = [
943
- {
944
- "role": "user",
945
- "content": [
946
- {
947
- "type":"text",
948
- "text":prompt
949
- }
950
- ]+[
951
- {
952
- "type": "image_url",
953
- "image_url": {
954
- "url": f"data:image/jpeg;base64,{encode_image(image_path, max_image_width)}"
955
- }
956
- }
957
- for image_path in images
958
- ]
959
- }
960
- ]
961
- chat_completion = self.client.chat.completions.create(
962
- model=self.model_name, # Choose the engine according to your OpenAI plan
963
- messages=messages,
964
- max_tokens=n_predict, # Adjust the desired length of the generated response
965
- n=1, # Specify the number of responses you want
966
- temperature=temperature, # Adjust the temperature for more or less randomness in the output
967
- stream=True
968
- )
969
-
970
- for resp in chat_completion:
971
- if count >= n_predict:
972
- break
973
- try:
974
- word = resp.choices[0].delta.content
975
- except Exception as ex:
976
- word = ""
977
- if streaming_callback is not None:
978
- if not streaming_callback(word):
979
- break
980
- if word:
981
- output += word
982
- count += 1
983
- return output
984
-
985
254
 
986
- def ollama_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, completion_format:ELF_COMPLETION_FORMAT=ELF_COMPLETION_FORMAT.Instruct, service_key:str="", streaming_callback=None):
987
- # Set default values to instance variables if optional arguments are None
988
- host_address = host_address if host_address else self.host_address
989
- model_name = model_name if model_name else self.model_name
990
- n_predict = n_predict if n_predict else self.n_predict
991
- personality = personality if personality is not None else self.personality
992
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
993
- temperature = temperature if temperature is not None else self.temperature
994
- top_k = top_k if top_k is not None else self.top_k
995
- top_p = top_p if top_p is not None else self.top_p
996
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
997
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
998
- seed = seed or self.seed # Use the instance seed if not provided
999
- n_threads = n_threads if n_threads else self.n_threads
1000
-
1001
- if service_key!="":
1002
- headers = {
1003
- 'Content-Type': 'application/json',
1004
- 'Authorization': f'Bearer {service_key}',
1005
- }
1006
- else:
1007
- headers = {
1008
- 'Content-Type': 'application/json',
1009
- }
1010
-
1011
- data = {
1012
- 'model':model_name,
1013
- 'prompt': prompt,
1014
- "stream":stream,
1015
- "temperature": float(temperature),
1016
- "max_tokens": n_predict
1017
- }
1018
- completion_format_path = "/api/generate"
1019
- if host_address.endswith("/"):
1020
- host_address = host_address[:-1]
1021
- url = f'{host_address}{completion_format_path}'
1022
-
1023
- response = requests.post(url, json=data, headers=headers)
1024
-
1025
- if response.status_code==404:
1026
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
1027
- text = ""
1028
- if stream:
1029
- for line in response.iter_lines():
1030
- decoded = line.decode("utf-8")
1031
- json_data = json.loads(decoded)
1032
- chunk = json_data["response"]
1033
- ## Process the JSON data here
1034
- text +=chunk
1035
- if streaming_callback:
1036
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
1037
- break
1038
- return text
1039
- else:
1040
- return response.json()["response"]
1041
-
1042
- def ollama_generate_with_images(self,
1043
- prompt,
1044
- images,
1045
- host_address=None,
1046
- model_name=None,
1047
- personality=None,
1048
- n_predict=None,
1049
- stream=False,
1050
- temperature=0.1,
1051
- top_k=50,
1052
- top_p=0.95,
1053
- repeat_penalty=0.8,
1054
- repeat_last_n=40,
1055
- seed=None,
1056
- n_threads=8,
1057
- max_image_width=-1,
1058
- service_key: str = "",
1059
- streaming_callback=None,):
1060
- """Generates text out of a prompt
1061
-
1062
- Args:
1063
- prompt (str): The prompt to use for generation
1064
- n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
1065
- callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
1066
- verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
1067
- """
1068
- # Set default values to instance variables if optional arguments are None
1069
- host_address = host_address if host_address else self.host_address
1070
- model_name = model_name if model_name else self.model_name
1071
- n_predict = n_predict if n_predict else self.n_predict
1072
- personality = personality if personality is not None else self.personality
1073
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
1074
- temperature = temperature if temperature is not None else self.temperature
1075
- top_k = top_k if top_k is not None else self.top_k
1076
- top_p = top_p if top_p is not None else self.top_p
1077
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
1078
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
1079
- seed = seed or self.seed # Use the instance seed if not provided
1080
- n_threads = n_threads if n_threads else self.n_threads
1081
- if service_key != "":
1082
- headers = {
1083
- 'Content-Type': 'application/json',
1084
- 'Authorization': f'Bearer {service_key}',
1085
- }
1086
- else:
1087
- headers = {
1088
- 'Content-Type': 'application/json',
1089
- }
1090
-
1091
- images_list = []
1092
- for image in images:
1093
- images_list.append(f"{encode_image(image, max_image_width)}")
1094
-
1095
- data = {
1096
- 'model': model_name,
1097
- 'prompt': prompt,
1098
- 'images': images_list,
1099
- "raw": True,
1100
- "stream":True,
1101
- "temperature": float(temperature),
1102
- "max_tokens": n_predict
1103
- }
1104
-
1105
-
1106
- data = {
1107
- 'model': model_name,
1108
- 'messages': [
1109
- {
1110
- "role": "user",
1111
- "content": [
1112
- {
1113
- "type":"text",
1114
- "text":prompt
1115
- }
1116
- ]+[
1117
- {
1118
- "type": "image_url",
1119
- "image_url": {
1120
- "url": f"data:image/jpeg;base64,{encode_image(image_path, max_image_width)}"
1121
- }
1122
- }
1123
- for image_path in images
1124
- ]
1125
- }
1126
- ],
1127
- "stream": True,
1128
- "temperature": float(temperature),
1129
- "max_tokens": n_predict
1130
- }
1131
-
1132
- completion_format_path = "/api"
1133
-
1134
- if host_address.endswith("/"):
1135
- host_address = host_address[:-1]
1136
- url = f'{host_address}{completion_format_path}'
1137
-
1138
- response = requests.post(url, json=data, headers=headers)
1139
-
1140
- if response.status_code == 400:
1141
- try:
1142
- content = response.content.decode("utf8")
1143
- content = json.loads(content)
1144
- self.error(content["error"]["message"])
1145
- return
1146
- except:
1147
- content = response.content.decode("utf8")
1148
- content = json.loads(content)
1149
- self.error(content["message"])
1150
- return
1151
- elif response.status_code == 404:
1152
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
1153
-
1154
- text = ""
1155
- for line in response.iter_lines():
1156
- decoded = line.decode("utf-8")
1157
- if decoded.startswith("data: "):
1158
- try:
1159
- json_data = json.loads(decoded[5:].strip())
1160
- try:
1161
- chunk = json_data["choices"][0]["delta"]["content"]
1162
- except:
1163
- chunk = ""
1164
- # Process the JSON data here
1165
- text += chunk
1166
- if streaming_callback:
1167
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
1168
- break
1169
- except:
1170
- break
1171
- else:
1172
- if decoded.startswith("{"):
1173
- for line_ in response.iter_lines():
1174
- decoded += line_.decode("utf-8")
1175
- try:
1176
- json_data = json.loads(decoded)
1177
- if json_data["object"] == "error":
1178
- self.error(json_data["message"])
1179
- break
1180
- except:
1181
- self.error("Couldn't generate text, verify your key or model name")
1182
- else:
1183
- text += decoded
1184
- if streaming_callback:
1185
- if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
1186
- break
1187
- return text
1188
-
1189
- def litellm_generate(self, prompt, host_address=None, model_name=None, personality=None, n_predict=None, stream=False, temperature=0.1, top_k=50, top_p=0.95, repeat_penalty=0.8, repeat_last_n=40, seed=None, n_threads=8, completion_format:ELF_COMPLETION_FORMAT=ELF_COMPLETION_FORMAT.Instruct, service_key:str="", streaming_callback=None):
1190
- # Set default values to instance variables if optional arguments are None
1191
- host_address = host_address if host_address else self.host_address
1192
- model_name = model_name if model_name else self.model_name
1193
- n_predict = n_predict if n_predict else self.n_predict
1194
- personality = personality if personality is not None else self.personality
1195
- # Set temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads to the instance variables if they are not provided or None
1196
- temperature = temperature if temperature is not None else self.temperature
1197
- top_k = top_k if top_k is not None else self.top_k
1198
- top_p = top_p if top_p is not None else self.top_p
1199
- repeat_penalty = repeat_penalty if repeat_penalty is not None else self.repeat_penalty
1200
- repeat_last_n = repeat_last_n if repeat_last_n is not None else self.repeat_last_n
1201
- seed = seed or self.seed # Use the instance seed if not provided
1202
- n_threads = n_threads if n_threads else self.n_threads
1203
-
1204
- if service_key!="":
1205
- headers = {
1206
- 'Content-Type': 'application/json',
1207
- 'Authorization': f'Bearer {service_key}',
1208
- }
1209
- else:
1210
- headers = {
1211
- 'Content-Type': 'application/json',
1212
- }
1213
-
1214
- data = {
1215
- 'model':model_name,
1216
- 'prompt': prompt,
1217
- "stream":True,
1218
- "temperature": float(temperature),
1219
- "max_tokens": n_predict
1220
- }
1221
- completion_format_path = "/api/generate"
1222
- if host_address.endswith("/"):
1223
- host_address = host_address[:-1]
1224
- url = f'{host_address}{completion_format_path}'
1225
-
1226
- response = requests.post(url, json=data, headers=headers)
1227
-
1228
- if response.status_code==404:
1229
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
1230
- text = ""
1231
- for line in response.iter_lines():
1232
- decoded = line.decode("utf-8")
1233
- if decoded.startswith("{"):
1234
- json_data = json.loads(decoded)
1235
- if "error" in json_data:
1236
- self.error(json_data["error"]["message"])
1237
- break
1238
- else:
1239
- text +=decoded
1240
- if streaming_callback:
1241
- if not streaming_callback(decoded, MSG_TYPE.MSG_TYPE_CHUNK):
1242
- break
1243
-
1244
- return text
1245
-
1246
-
1247
- def lollms_listMountedPersonalities(self, host_address:str=None):
1248
- host_address = host_address if host_address else self.host_address
1249
- url = f"{host_address}/list_mounted_personalities"
1250
-
1251
- response = requests.get(url)
1252
-
1253
- if response.status_code == 200:
1254
- try:
1255
- text = json.loads(response.content.decode("utf-8"))
1256
- return text
1257
- except Exception as ex:
1258
- return {"status": False, "error": str(ex)}
1259
- else:
1260
- return {"status": False, "error": response.text}
1261
-
1262
- def listModels(self, host_address:str=None):
1263
- if self.default_generation_mode == ELF_GENERATION_FORMAT.LOLLMS:
1264
- return self.lollms_listModels(host_address)
1265
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OLLAMA:
1266
- return self.ollama_listModels(host_address)
1267
- elif self.default_generation_mode == ELF_GENERATION_FORMAT.OPENAI:
1268
- return self.openai_listModels(host_address)
1269
-
1270
- def lollms_listModels(self, host_address:str=None):
1271
- host_address = host_address if host_address else self.host_address
1272
- url = f"{host_address}/list_models"
1273
-
1274
- response = requests.get(url)
255
+ def embed(self, text):
256
+ self.binding.embed(text)
1275
257
 
1276
- if response.status_code == 200:
1277
- try:
1278
- text = json.loads(response.content.decode("utf-8"))
1279
- return text
1280
- except Exception as ex:
1281
- return {"status": False, "error": str(ex)}
1282
- else:
1283
- return {"status": False, "error": response.text}
1284
-
1285
- def ollama_listModels(self, host_address:str=None):
1286
- if host_address is None:
1287
- host_address = self.host_address
1288
- url = f'{host_address}/api/tags'
1289
- headers = {
1290
- 'accept': 'application/json',
1291
- 'Authorization': f'Bearer {self.service_key}'
1292
- }
1293
- response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
1294
- try:
1295
- data = response.json()
1296
- model_info = []
1297
-
1298
- for model in data['models']:
1299
- model_name = model['name']
1300
- owned_by = ""
1301
- created_datetime = model["modified_at"]
1302
- model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
1303
-
1304
- return model_info
1305
- except Exception as ex:
1306
- trace_exception(ex)
1307
- return []
1308
-
1309
- def openai_listModels(self, host_address:str=None):
1310
- if host_address is None:
1311
- host_address = self.host_address
1312
- url = f'{host_address}/v1/models'
1313
- headers = {
1314
- 'accept': 'application/json',
1315
- 'Authorization': f'Bearer {self.service_key}'
1316
- }
1317
- response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
1318
- try:
1319
- data = response.json()
1320
- model_info = []
1321
258
 
1322
- for model in data["data"]:
1323
- model_name = model['id']
1324
- owned_by = model['owned_by']
1325
- created_datetime = model["created"]
1326
- model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
259
+ def listModels(self):
260
+ self.binding.listModels()
1327
261
 
1328
- return model_info
1329
- except Exception as ex:
1330
- trace_exception(ex)
1331
- return []
1332
262
 
1333
263
 
1334
264
  def generate_codes(
@@ -1376,11 +306,11 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
1376
306
  {self.ai_full_header}"""
1377
307
 
1378
308
  if len(self.image_files)>0:
1379
- response = self.generate_with_images(full_prompt, self.image_files, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
309
+ response = self.generate_text_with_images(full_prompt, self.image_files, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
1380
310
  elif len(images)>0:
1381
- response = self.generate_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
311
+ response = self.generate_text_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
1382
312
  else:
1383
- response = self.generate(full_prompt, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
313
+ response = self.generate_text(full_prompt, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, callback, debug=debug)
1384
314
  response_full += response
1385
315
  codes = self.extract_code_blocks(response)
1386
316
  return codes
@@ -1428,16 +358,13 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
1428
358
  full_prompt += f"""You must return a single code tag.
1429
359
  Do not split the code in multiple tags.
1430
360
  {self.ai_full_header}"""
1431
- if len(images)>0:
1432
- response = self.generate_with_images(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
1433
- else:
1434
- response = self.generate(full_prompt, max_size, False, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
361
+ response = self.generate_text(full_prompt, images, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
1435
362
  codes = self.extract_code_blocks(response)
1436
363
  if len(codes)>0:
1437
364
  if not codes[-1]["is_complete"]:
1438
365
  code = "\n".join(codes[-1]["content"].split("\n")[:-1])
1439
366
  while not codes[-1]["is_complete"]:
1440
- response = self.generate(prompt+code+self.user_full_header+"continue the code. Start from last line and continue the code. Put the code inside a markdown code tag."+self.separator_template+self.ai_full_header, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
367
+ response = self.generate_text(prompt+code+self.user_full_header+"continue the code. Start from last line and continue the code. Put the code inside a markdown code tag."+self.separator_template+self.ai_full_header, max_size, temperature, top_k, top_p, repeat_penalty, repeat_last_n, streaming_callback=callback)
1441
368
  codes = self.extract_code_blocks(response)
1442
369
  if len(codes)==0:
1443
370
  break
@@ -1631,7 +558,7 @@ Do not split the code in multiple tags.
1631
558
  }
1632
559
  """
1633
560
 
1634
- response = self.generate_code(
561
+ response = self.generate_text_code(
1635
562
  prompt=prompt,
1636
563
  template=template,
1637
564
  language="json",
@@ -1699,7 +626,7 @@ Do not split the code in multiple tags.
1699
626
  else:
1700
627
  prompt += "{\"index\": (the selected answer index)}"
1701
628
 
1702
- response = self.generate_code(prompt, language="json", max_size=max_answer_length,
629
+ response = self.generate_text_code(prompt, language="json", max_size=max_answer_length,
1703
630
  accept_all_if_no_code_tags_is_present=True, return_full_generated_code=False, callback=callback)
1704
631
 
1705
632
  try:
@@ -1761,7 +688,7 @@ Do not split the code in multiple tags.
1761
688
  else:
1762
689
  prompt += "{\"ranking\": (list of indices ordered from best to worst)}"
1763
690
 
1764
- response = self.generate_code(prompt, language="json", return_full_generated_code=False, callback=callback)
691
+ response = self.generate_text_code(prompt, language="json", return_full_generated_code=False, callback=callback)
1765
692
 
1766
693
  try:
1767
694
  result = json.loads(response)
@@ -1883,7 +810,7 @@ Do not discuss the information inside thememory, just put the relevant informati
1883
810
  ASCIIColors.yellow(f" ----- {chunk_id-1} ------")
1884
811
  ASCIIColors.red(prompt)
1885
812
 
1886
- memory = self.generate(prompt, n_predict=ctx_size//4, streaming_callback=callback).strip()
813
+ memory = self.generate_text(prompt, n_predict=ctx_size//4, streaming_callback=callback).strip()
1887
814
  code = self.extract_code_blocks(memory)
1888
815
  if code:
1889
816
  memory=code[0]["content"]
@@ -1919,13 +846,13 @@ The updated memory must be put in a {chunk_processing_output_format} markdown ta
1919
846
 
1920
847
  # Generate final summary
1921
848
  final_prompt = final_prompt_template
1922
- memory = self.generate(final_prompt, streaming_callback=callback)
849
+ memory = self.generate_text(final_prompt, streaming_callback=callback)
1923
850
  code = self.extract_code_blocks(memory)
1924
851
  if code:
1925
852
  memory=code[0]["content"]
1926
853
  return memory
1927
854
 
1928
- def deepsearch(
855
+ def deep_analyze(
1929
856
  self,
1930
857
  query: str,
1931
858
  text: str = None,
@@ -2043,7 +970,7 @@ Make sure to extrafct only information relevant to be able to answer the query o
2043
970
  print(f"----- Chunk {chunk_id} from {file_name} ------")
2044
971
  print(prompt)
2045
972
 
2046
- output = self.generate(prompt, n_predict=ctx_size // 4, streaming_callback=callback).strip()
973
+ output = self.generate_text(prompt, n_predict=ctx_size // 4, streaming_callback=callback).strip()
2047
974
  code = self.extract_code_blocks(output)
2048
975
  memory = code[0]["content"] if code else output
2049
976
 
@@ -2071,7 +998,7 @@ Collected findings:
2071
998
  Provide the final output in {output_format} format.
2072
999
  {self.ai_full_header}
2073
1000
  """
2074
- final_output = self.generate(final_prompt, streaming_callback=callback)
1001
+ final_output = self.generate_text(final_prompt, streaming_callback=callback)
2075
1002
  code = self.extract_code_blocks(final_output)
2076
1003
  return code[0]["content"] if code else final_output
2077
1004
  return memory
@@ -2081,9 +1008,9 @@ def error(self, content, duration:int=4, client_id=None, verbose:bool=True):
2081
1008
 
2082
1009
 
2083
1010
  if __name__=="__main__":
2084
- #lc = LollmsClient("http://localhost:9600")
1011
+ lc = LollmsClient("ollama", model_name="mistral-nemo:latest")
2085
1012
  #lc = LollmsClient("http://localhost:11434", model_name="mistral-nemo:latest", default_generation_mode=ELF_GENERATION_FORMAT.OLLAMA)
2086
- lc = LollmsClient(model_name="gpt-3.5-turbo-0125", default_generation_mode=ELF_GENERATION_FORMAT.OPENAI)
1013
+ #lc = LollmsClient(model_name="gpt-3.5-turbo-0125", default_generation_mode=ELF_GENERATION_FORMAT.OPENAI)
2087
1014
  print(lc.listModels())
2088
1015
  code = lc.generate_code("Build a simple json that containes name and age. put the output inside a json markdown tag")
2089
1016
  print(code)