lollms-client 0.9.2__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -0,0 +1,260 @@
1
+ # bindings/OpenAI/binding.py
2
+ import requests
3
+ import json
4
+ from lollms_client.lollms_llm_binding import LollmsLLMBinding
5
+ from lollms_client.lollms_types import MSG_TYPE
6
+ from lollms_client.lollms_utilities import encode_image
7
+ from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
8
+ from typing import Optional, Callable, List, Union
9
+ from ascii_colors import ASCIIColors, trace_exception
10
+ import pipmaster as pm
11
+ if not pm.is_installed("openai"):
12
+ pm.install("openai")
13
+ if not pm.is_installed("tiktoken"):
14
+ pm.install("tiktoken")
15
+ import openai
16
+ import tiktoken
17
+ import os
18
+
19
+ BindingName = "OpenAIBinding"
20
+
21
+
22
+ class OpenAIBinding(LollmsLLMBinding):
23
+ """OpenAI-specific binding implementation"""
24
+
25
+
26
+ def __init__(self,
27
+ host_address: str = None,
28
+ model_name: str = "",
29
+ service_key: str = None,
30
+ verify_ssl_certificate: bool = True,
31
+ default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat):
32
+ """
33
+ Initialize the OpenAI binding.
34
+
35
+ Args:
36
+ host_address (str): Host address for the OpenAI service. Defaults to DEFAULT_HOST_ADDRESS.
37
+ model_name (str): Name of the model to use. Defaults to empty string.
38
+ service_key (str): Authentication key for the service. Defaults to None.
39
+ verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
40
+ personality (Optional[int]): Ignored parameter for compatibility with LollmsLLMBinding.
41
+ """
42
+ super().__init__(
43
+ host_address=host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS,
44
+ model_name=model_name,
45
+ service_key=service_key,
46
+ verify_ssl_certificate=verify_ssl_certificate,
47
+ default_completion_format=default_completion_format
48
+ )
49
+ self.service_key = os.getenv("OPENAI_API_KEY","")
50
+ self.client = openai.OpenAI(base_url=host_address)
51
+
52
+
53
+ def generate_text(self,
54
+ prompt: str,
55
+ images: Optional[List[str]] = None,
56
+ n_predict: Optional[int] = None,
57
+ stream: bool = False,
58
+ temperature: float = 0.1,
59
+ top_k: int = 50,
60
+ top_p: float = 0.95,
61
+ repeat_penalty: float = 0.8,
62
+ repeat_last_n: int = 40,
63
+ seed: Optional[int] = None,
64
+ n_threads: int = 8,
65
+ streaming_callback: Optional[Callable[[str, str], None]] = None) -> str:
66
+ """
67
+ Generate text based on the provided prompt and parameters.
68
+
69
+ Args:
70
+ prompt (str): The input prompt for text generation.
71
+ images (Optional[List[str]]): List of image file paths for multimodal generation.
72
+ n_predict (Optional[int]): Maximum number of tokens to generate.
73
+ stream (bool): Whether to stream the output. Defaults to False.
74
+ temperature (float): Sampling temperature. Defaults to 0.1.
75
+ top_k (int): Top-k sampling parameter. Defaults to 50.
76
+ top_p (float): Top-p sampling parameter. Defaults to 0.95.
77
+ repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8.
78
+ repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
79
+ seed (Optional[int]): Random seed for generation.
80
+ n_threads (int): Number of threads to use. Defaults to 8.
81
+ streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
82
+ - First parameter (str): The chunk of text received.
83
+ - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
84
+
85
+ Returns:
86
+ str: Generated text or error dictionary if failed.
87
+ """
88
+ count = 0
89
+ output = ""
90
+
91
+ # Prepare messages based on whether images are provided
92
+ if images:
93
+ messages = [
94
+ {
95
+ "role": "user",
96
+ "content": [
97
+ {
98
+ "type": "text",
99
+ "text": prompt
100
+ }
101
+ ] + [
102
+ {
103
+ "type": "image_url",
104
+ "image_url": {
105
+ "url": f"data:image/jpeg;base64,{encode_image(image_path)}"
106
+ }
107
+ }
108
+ for image_path in images
109
+ ]
110
+ }
111
+ ]
112
+ else:
113
+ messages = [{"role": "user", "content": prompt}]
114
+
115
+ # Generate text using the OpenAI API
116
+ if completion_format == ELF_COMPLETION_FORMAT.Chat:
117
+ chat_completion = self.client.chat.completions.create(
118
+ model=self.model_name, # Choose the engine according to your OpenAI plan
119
+ messages=messages,
120
+ max_tokens=n_predict, # Adjust the desired length of the generated response
121
+ n=1, # Specify the number of responses you want
122
+ temperature=temperature, # Adjust the temperature for more or less randomness in the output
123
+ stream=stream
124
+ )
125
+
126
+ if stream:
127
+ for resp in chat_completion:
128
+ if count >= n_predict:
129
+ break
130
+ try:
131
+ word = resp.choices[0].delta.content
132
+ except Exception as ex:
133
+ word = ""
134
+ if streaming_callback is not None:
135
+ if not streaming_callback(word, "MSG_TYPE_CHUNK"):
136
+ break
137
+ if word:
138
+ output += word
139
+ count += 1
140
+ else:
141
+ output = chat_completion.choices[0].message.content
142
+ else:
143
+ completion = self.client.completions.create(
144
+ model=self.model_name, # Choose the engine according to your OpenAI plan
145
+ prompt=prompt,
146
+ max_tokens=n_predict, # Adjust the desired length of the generated response
147
+ n=1, # Specify the number of responses you want
148
+ temperature=temperature, # Adjust the temperature for more or less randomness in the output
149
+ stream=stream
150
+ )
151
+
152
+ if stream:
153
+ for resp in completion:
154
+ if count >= n_predict:
155
+ break
156
+ try:
157
+ word = resp.choices[0].text
158
+ except Exception as ex:
159
+ word = ""
160
+ if streaming_callback is not None:
161
+ if not streaming_callback(word, "MSG_TYPE_CHUNK"):
162
+ break
163
+ if word:
164
+ output += word
165
+ count += 1
166
+ else:
167
+ output = completion.choices[0].text
168
+
169
+ return output
170
+
171
+ def tokenize(self, text: str) -> list:
172
+ """
173
+ Tokenize the input text into a list of characters.
174
+
175
+ Args:
176
+ text (str): The text to tokenize.
177
+
178
+ Returns:
179
+ list: List of individual characters.
180
+ """
181
+ try:
182
+ return tiktoken.model.encoding_for_model(self.model_name).encode(text)
183
+ except:
184
+ return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
185
+
186
+ def detokenize(self, tokens: list) -> str:
187
+ """
188
+ Convert a list of tokens back to text.
189
+
190
+ Args:
191
+ tokens (list): List of tokens (characters) to detokenize.
192
+
193
+ Returns:
194
+ str: Detokenized text.
195
+ """
196
+ try:
197
+ return tiktoken.model.encoding_for_model(self.model_name).decode(tokens)
198
+ except:
199
+ return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
200
+
201
+ def embed(self, text: str, **kwargs) -> list:
202
+ """
203
+ Get embeddings for the input text using OpenAI API
204
+
205
+ Args:
206
+ text (str or List[str]): Input text to embed
207
+ **kwargs: Additional arguments like model, truncate, options, keep_alive
208
+
209
+ Returns:
210
+ dict: Response containing embeddings
211
+ """
212
+ pass
213
+ def get_model_info(self) -> dict:
214
+ """
215
+ Return information about the current OpenAI model.
216
+
217
+ Returns:
218
+ dict: Dictionary containing model name, version, and host address.
219
+ """
220
+ return {
221
+ "name": "OpenAI",
222
+ "version": "2.0",
223
+ "host_address": self.host_address,
224
+ "model_name": self.model_name
225
+ }
226
+ def listModels(self):
227
+ """ Lists available models """
228
+ url = f'{self.host_address}/v1/models'
229
+ headers = {
230
+ 'accept': 'application/json',
231
+ 'Authorization': f'Bearer {self.service_key}'
232
+ }
233
+ response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
234
+ try:
235
+ data = response.json()
236
+ model_info = []
237
+
238
+ for model in data["data"]:
239
+ model_name = model['id']
240
+ owned_by = model['owned_by']
241
+ created_datetime = model["created"]
242
+ model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
243
+
244
+ return model_info
245
+ except Exception as ex:
246
+ trace_exception(ex)
247
+ return []
248
+ def load_model(self, model_name: str) -> bool:
249
+ """
250
+ Load a specific model into the OpenAI binding.
251
+
252
+ Args:
253
+ model_name (str): Name of the model to load.
254
+
255
+ Returns:
256
+ bool: True if model loaded successfully.
257
+ """
258
+ self.model = model_name
259
+ self.model_name = model_name
260
+ return True
@@ -0,0 +1,281 @@
1
+ # bindings/ollama/binding.py
2
+ import requests
3
+ import json
4
+ from lollms_client.lollms_llm_binding import LollmsLLMBinding
5
+ from lollms_client.lollms_types import MSG_TYPE
6
+ from lollms_client.lollms_utilities import encode_image
7
+ from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
8
+ from typing import Optional, Callable, List, Union
9
+ from ascii_colors import ASCIIColors
10
+
11
+ import pipmaster as pm
12
+ if not pm.is_installed("torch"):
13
+ ASCIIColors.yellow("Diffusers: Torch not found. Installing it")
14
+ pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
15
+
16
+ import torch
17
+ if not torch.cuda.is_available():
18
+ ASCIIColors.yellow("Diffusers: Torch not using cuda. Reinstalling it")
19
+ pm.install_multiple(["torch","torchvision","torchaudio"], "https://download.pytorch.org/whl/cu121", force_reinstall=True)
20
+ import torch
21
+
22
+ if not pm.is_installed("transformers"):
23
+ pm.install_or_update("transformers")
24
+
25
+ BindingName = "TransformersBinding"
26
+
27
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, BitsAndBytesConfig
28
+ from packaging import version
29
+
30
+ class TransformersBinding(LollmsLLMBinding):
31
+ """Transformers-specific binding implementation"""
32
+
33
+ def __init__(self,
34
+ host_address: str = None,
35
+ model_name: str = "",
36
+ service_key: str = None,
37
+ verify_ssl_certificate: bool = True,
38
+ default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat):
39
+ """
40
+ Initialize the Transformers binding.
41
+
42
+ Args:
43
+ host_address (str): Host address for the service. Defaults to None.
44
+ model_name (str): Name of the model to use. Defaults to empty string.
45
+ service_key (str): Authentication key for the service. Defaults to None.
46
+ verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
47
+ default_completion_format (ELF_COMPLETION_FORMAT): Default format for completions.
48
+ """
49
+ super().__init__(
50
+ host_address=host_address,
51
+ model_name=model_name,
52
+ service_key=service_key,
53
+ verify_ssl_certificate=verify_ssl_certificate,
54
+ default_completion_format=default_completion_format
55
+ )
56
+
57
+ # Configure 4-bit quantization
58
+ quantization_config = BitsAndBytesConfig(
59
+ load_in_4bit=True,
60
+ bnb_4bit_compute_dtype=torch.bfloat16,
61
+ bnb_4bit_quant_type="nf4",
62
+ bnb_4bit_use_double_quant=True
63
+ )
64
+
65
+ self.tokenizer = AutoTokenizer.from_pretrained(
66
+ str(model_name),
67
+ trust_remote_code=False
68
+ )
69
+
70
+ self.model = AutoModelForCausalLM.from_pretrained(
71
+ str(model_name),
72
+ device_map="auto",
73
+ quantization_config=quantization_config,
74
+ torch_dtype=torch.bfloat16
75
+ )
76
+
77
+ self.generation_config = GenerationConfig.from_pretrained(str(model_name))
78
+
79
+ # Display device information
80
+ device = next(self.model.parameters()).device
81
+ device_type = "CPU" if device.type == "cpu" else "GPU"
82
+ device_str = f"Running on {device}"
83
+
84
+ ASCIIColors.multicolor(
85
+ ["Model loaded - ", device_str],
86
+ [ASCIIColors.color_green, ASCIIColors.color_blue if device_type == "GPU" else ASCIIColors.color_red]
87
+ )
88
+
89
+
90
+ def generate_text(self,
91
+ prompt: str,
92
+ images: Optional[List[str]] = None,
93
+ n_predict: Optional[int] = None,
94
+ stream: bool = False,
95
+ temperature: float = 0.1,
96
+ top_k: int = 50,
97
+ top_p: float = 0.95,
98
+ repeat_penalty: float = 0.8,
99
+ repeat_last_n: int = 40,
100
+ seed: Optional[int] = None,
101
+ n_threads: int = 8,
102
+ streaming_callback: Optional[Callable[[str, str], None]] = None,
103
+ return_legacy_cache: bool = False) -> Union[str, dict]:
104
+ """
105
+ Generate text using the Transformers model, with optional image support.
106
+
107
+ Args:
108
+ prompt (str): The input prompt for text generation.
109
+ images (Optional[List[str]]): List of image file paths for multimodal generation.
110
+ n_predict (Optional[int]): Maximum number of tokens to generate.
111
+ stream (bool): Whether to stream the output. Defaults to False.
112
+ temperature (float): Sampling temperature. Defaults to 0.1.
113
+ top_k (int): Top-k sampling parameter. Defaults to 50.
114
+ top_p (float): Top-p sampling parameter. Defaults to 0.95.
115
+ repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8.
116
+ repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
117
+ seed (Optional[int]): Random seed for generation.
118
+ n_threads (int): Number of threads to use. Defaults to 8.
119
+ streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
120
+ return_legacy_cache (bool): Whether to use legacy cache format (pre-v4.47). Defaults to False.
121
+
122
+ Returns:
123
+ Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
124
+ """
125
+ try:
126
+ if not self.model or not self.tokenizer:
127
+ return {"status": "error", "error": "Model or tokenizer not loaded"}
128
+
129
+ # Set seed if provided
130
+ if seed is not None:
131
+ torch.manual_seed(seed)
132
+
133
+ # Prepare generation config
134
+ self.generation_config.max_new_tokens = n_predict if n_predict else 2048
135
+ self.generation_config.temperature = temperature
136
+ self.generation_config.top_k = top_k
137
+ self.generation_config.top_p = top_p
138
+ self.generation_config.repetition_penalty = repeat_penalty
139
+ self.generation_config.pad_token_id = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id is not None else self.tokenizer.eos_token_id
140
+
141
+ # Tokenize input with attention mask
142
+ inputs = self.tokenizer(prompt, return_tensors="pt", padding=True)
143
+ input_ids = inputs.input_ids.to(self.model.device)
144
+ attention_mask = inputs.attention_mask.to(self.model.device)
145
+
146
+ # Handle image input if provided (basic implementation)
147
+ if images and len(images) > 0:
148
+ ASCIIColors.yellow("Warning: Image processing not fully implemented in this binding")
149
+ prompt += "\n[Image content not processed]"
150
+
151
+ # Check transformers version for cache handling
152
+ use_legacy_cache = return_legacy_cache or version.parse(transformers.__version__) < version.parse("4.47.0")
153
+
154
+ if stream:
155
+ # Streaming case
156
+ if not streaming_callback:
157
+ return {"status": "error", "error": "Streaming callback required for stream mode"}
158
+
159
+ generated_text = ""
160
+ # Generate with streaming
161
+ for output in self.model.generate(
162
+ input_ids,
163
+ attention_mask=attention_mask,
164
+ generation_config=self.generation_config,
165
+ do_sample=True,
166
+ return_dict_in_generate=True,
167
+ output_scores=False,
168
+ return_legacy_cache=use_legacy_cache
169
+ ):
170
+ # Handle different output formats based on version/cache setting
171
+ if use_legacy_cache:
172
+ # Legacy format: tuple of (sequences, scores, ...)
173
+ sequences = output[0]
174
+ else:
175
+ # New format: Cache instance
176
+ sequences = output.sequences
177
+
178
+ # Decode the new tokens
179
+ new_tokens = sequences[:, -1:] # Get the last generated token
180
+ chunk = self.tokenizer.decode(new_tokens[0], skip_special_tokens=True)
181
+ generated_text += chunk
182
+
183
+ # Send chunk through callback
184
+ streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
185
+
186
+ return generated_text
187
+
188
+ else:
189
+ # Non-streaming case
190
+ outputs = self.model.generate(
191
+ input_ids,
192
+ attention_mask=attention_mask,
193
+ generation_config=self.generation_config,
194
+ do_sample=True,
195
+ return_dict_in_generate=True,
196
+ output_scores=False,
197
+ return_legacy_cache=use_legacy_cache
198
+ )
199
+
200
+ # Handle different output formats
201
+ sequences = outputs[0] if use_legacy_cache else outputs.sequences
202
+
203
+ # Decode the full sequence, removing the input prompt
204
+ generated_text = self.tokenizer.decode(
205
+ sequences[0][input_ids.shape[-1]:],
206
+ skip_special_tokens=True
207
+ )
208
+
209
+ return generated_text
210
+
211
+ except Exception as e:
212
+ error_msg = f"Error generating text: {str(e)}"
213
+ ASCIIColors.red(error_msg)
214
+ return {"status": "error", "error": error_msg}
215
+
216
+
217
+
218
+ def tokenize(self, text: str) -> list:
219
+ """
220
+ Tokenize the input text into a list of characters.
221
+
222
+ Args:
223
+ text (str): The text to tokenize.
224
+
225
+ Returns:
226
+ list: List of individual characters.
227
+ """
228
+ return list(text)
229
+
230
+ def detokenize(self, tokens: list) -> str:
231
+ """
232
+ Convert a list of tokens back to text.
233
+
234
+ Args:
235
+ tokens (list): List of tokens (characters) to detokenize.
236
+
237
+ Returns:
238
+ str: Detokenized text.
239
+ """
240
+ return "".join(tokens)
241
+ def embed(self, text: str, **kwargs) -> list:
242
+ """
243
+ Get embeddings for the input text using Ollama API
244
+
245
+ Args:
246
+ text (str or List[str]): Input text to embed
247
+ **kwargs: Additional arguments like model, truncate, options, keep_alive
248
+
249
+ Returns:
250
+ dict: Response containing embeddings
251
+ """
252
+ pass
253
+ def get_model_info(self) -> dict:
254
+ """
255
+ Return information about the current Ollama model.
256
+
257
+ Returns:
258
+ dict: Dictionary containing model name, version, and host address.
259
+ """
260
+ return {
261
+ "name": "ollama",
262
+ "version": "2.0",
263
+ "host_address": self.host_address,
264
+ "model_name": self.model_name
265
+ }
266
+ def listModels(self):
267
+ """ Lists available models """
268
+ pass
269
+ def load_model(self, model_name: str) -> bool:
270
+ """
271
+ Load a specific model into the Ollama binding.
272
+
273
+ Args:
274
+ model_name (str): Name of the model to load.
275
+
276
+ Returns:
277
+ bool: True if model loaded successfully.
278
+ """
279
+ self.model = model_name
280
+ self.model_name = model_name
281
+ return True