lollms-client 0.12.3__py3-none-any.whl → 0.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -3,11 +3,17 @@ import requests
3
3
  import json
4
4
  from lollms_client.lollms_llm_binding import LollmsLLMBinding
5
5
  from lollms_client.lollms_types import MSG_TYPE
6
- from lollms_client.lollms_utilities import encode_image
6
+ # encode_image is not strictly needed if ollama-python handles paths, but kept for consistency if ever needed.
7
+ # from lollms_client.lollms_utilities import encode_image
7
8
  from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
8
- from typing import Optional, Callable, List, Union
9
+ from typing import Optional, Callable, List, Union, Dict
10
+
9
11
  from ascii_colors import ASCIIColors, trace_exception
12
+ import pipmaster as pm
13
+ pm.ensure_packages(["ollama","pillow"])
14
+
10
15
 
16
+ import ollama
11
17
  BindingName = "OllamaBinding"
12
18
 
13
19
 
@@ -15,7 +21,9 @@ def count_tokens_ollama(
15
21
  text_to_tokenize: str,
16
22
  model_name: str,
17
23
  ollama_host: str = "http://localhost:11434",
18
- timeout: int = 30
24
+ timeout: int = 30,
25
+ verify_ssl_certificate: bool = True,
26
+ headers: Optional[Dict[str, str]] = None
19
27
  ) -> int:
20
28
  """
21
29
  Counts the number of tokens in a given text using a specified Ollama model
@@ -26,25 +34,21 @@ def count_tokens_ollama(
26
34
  model_name (str): The name of the Ollama model to use (e.g., "llama3", "mistral").
27
35
  ollama_host (str): The base URL of the Ollama server (default: "http://localhost:11434").
28
36
  timeout (int): Timeout for the request in seconds (default: 30).
37
+ verify_ssl_certificate (bool): Whether to verify SSL.
38
+ headers (Optional[Dict[str, str]]): Optional headers for the request.
29
39
 
30
40
  Returns:
31
41
  int: The number of tokens. Returns -1 if an error occurs.
32
-
33
- Raises:
34
- requests.exceptions.RequestException: For network issues or timeouts.
35
- requests.exceptions.HTTPError: For HTTP error responses (4xx or 5xx).
36
- ValueError: If the response from Ollama is not as expected (e.g., missing 'tokens' key).
37
42
  """
38
- api_url = f"{ollama_host}/api/tokenize"
43
+ api_url = f"{ollama_host.rstrip('/')}/api/tokenize"
39
44
  payload = {
40
45
  "model": model_name,
41
46
  "prompt": text_to_tokenize
42
- # You can add "options" here if needed, but for tokenization, it's usually not required.
43
- # "options": {"num_ctx": 4096} # Example, might influence tokenizer for specific context length
44
47
  }
48
+ request_headers = headers if headers else {}
45
49
 
46
50
  try:
47
- response = requests.post(api_url, json=payload, timeout=timeout)
51
+ response = requests.post(api_url, json=payload, timeout=timeout, verify=verify_ssl_certificate, headers=request_headers)
48
52
  response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
49
53
 
50
54
  response_data = response.json()
@@ -52,33 +56,28 @@ def count_tokens_ollama(
52
56
  if "tokens" in response_data and isinstance(response_data["tokens"], list):
53
57
  return len(response_data["tokens"])
54
58
  else:
55
- raise ValueError(
56
- f"Ollama response did not contain a 'tokens' list. Response: {response_data}"
59
+ ASCIIColors.warning(
60
+ f"Ollama response for token count did not contain a 'tokens' list. Response: {response_data}"
57
61
  )
62
+ return -1 # Or raise ValueError
58
63
 
59
64
  except requests.exceptions.HTTPError as http_err:
60
- # You might want to inspect http_err.response.text for more details from Ollama
61
- print(f"HTTP error occurred: {http_err} - {http_err.response.text}")
62
- raise # Re-raise the exception
63
- except requests.exceptions.ConnectionError as conn_err:
64
- print(f"Connection error occurred: {conn_err}")
65
- raise
66
- except requests.exceptions.Timeout as timeout_err:
67
- print(f"Timeout error occurred: {timeout_err}")
68
- raise
65
+ ASCIIColors.error(f"HTTP error occurred during token count: {http_err} - {http_err.response.text if http_err.response else 'No response text'}")
66
+ return -1
69
67
  except requests.exceptions.RequestException as req_err:
70
- print(f"An unexpected error occurred with the request: {req_err}")
71
- raise
68
+ ASCIIColors.error(f"Request error occurred during token count: {req_err}")
69
+ return -1
72
70
  except json.JSONDecodeError as json_err:
73
- # This can happen if the server returns non-JSON, e.g., an HTML error page
74
- raise ValueError(
75
- f"Failed to decode JSON response from Ollama: {json_err}. Response text: {response.text}"
76
- ) from json_err
77
- except ValueError as val_err: # Catches the ValueError raised above for missing 'tokens'
78
- print(f"Value error: {val_err}")
79
- raise
71
+ ASCIIColors.error(
72
+ f"Failed to decode JSON response from Ollama during token count: {json_err}. Response text: {response.text if hasattr(response, 'text') else 'No response object'}"
73
+ )
74
+ return -1
75
+ except Exception as e:
76
+ ASCIIColors.error(f"An unexpected error occurred during token count: {e}")
77
+ return -1
78
+
80
79
  class OllamaBinding(LollmsLLMBinding):
81
- """Ollama-specific binding implementation"""
80
+ """Ollama-specific binding implementation using the ollama-python library."""
82
81
 
83
82
  DEFAULT_HOST_ADDRESS = "http://localhost:11434"
84
83
 
@@ -95,282 +94,483 @@ class OllamaBinding(LollmsLLMBinding):
95
94
  Args:
96
95
  host_address (str): Host address for the Ollama service. Defaults to DEFAULT_HOST_ADDRESS.
97
96
  model_name (str): Name of the model to use. Defaults to empty string.
98
- service_key (str): Authentication key for the service. Defaults to None.
97
+ service_key (str): Authentication key for the service (used in Authorization header). Defaults to None.
99
98
  verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
100
- personality (Optional[int]): Ignored parameter for compatibility with LollmsLLMBinding.
99
+ default_completion_format (ELF_COMPLETION_FORMAT): Default completion format.
101
100
  """
101
+ _host_address = host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS
102
102
  super().__init__(
103
- binding_name="ollama",
104
- host_address=host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS,
103
+ binding_name=BindingName, # Use the module-level BindingName
104
+ host_address=_host_address,
105
105
  model_name=model_name,
106
106
  service_key=service_key,
107
107
  verify_ssl_certificate=verify_ssl_certificate,
108
108
  default_completion_format=default_completion_format
109
109
  )
110
- self.model = None
110
+ if ollama is None:
111
+ raise ImportError("Ollama library is not installed. Please run 'pip install ollama'.")
112
+
113
+ self.ollama_client_headers = {}
114
+ if self.service_key:
115
+ self.ollama_client_headers['Authorization'] = f'Bearer {self.service_key}'
116
+
117
+ try:
118
+ self.ollama_client = ollama.Client(
119
+ host=self.host_address,
120
+ headers=self.ollama_client_headers if self.ollama_client_headers else None,
121
+ verify=self.verify_ssl_certificate # Passed to httpx.Client
122
+ )
123
+ except Exception as e:
124
+ ASCIIColors.error(f"Failed to initialize Ollama client: {e}")
125
+ self.ollama_client = None # Ensure it's None if initialization fails
126
+ # Optionally re-raise or handle so the binding is clearly unusable
127
+ raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
111
128
 
112
129
  def generate_text(self,
113
130
  prompt: str,
114
- images: Optional[List[str]] = None,
131
+ images: Optional[List[str]] = None, # List of image file paths
115
132
  n_predict: Optional[int] = None,
116
133
  stream: bool = False,
117
- temperature: float = 0.1,
118
- top_k: int = 50,
119
- top_p: float = 0.95,
120
- repeat_penalty: float = 0.8,
121
- repeat_last_n: int = 40,
134
+ temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
135
+ top_k: int = 40, # Ollama default is 40
136
+ top_p: float = 0.9, # Ollama default is 0.9
137
+ repeat_penalty: float = 1.1, # Ollama default is 1.1
138
+ repeat_last_n: int = 64, # Ollama default is 64
122
139
  seed: Optional[int] = None,
123
- n_threads: int = 8,
124
- ctx_size: int | None = None,
125
- streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
140
+ n_threads: Optional[int] = None, # Ollama calls this num_thread
141
+ ctx_size: Optional[int] = None, # Ollama calls this num_ctx
142
+ streaming_callback: Optional[Callable[[str, int], bool]] = None
143
+ ) -> Union[str, Dict[str, any]]:
126
144
  """
127
145
  Generate text using the Ollama service, with optional image support.
128
146
 
129
147
  Args:
130
148
  prompt (str): The input prompt for text generation.
131
149
  images (Optional[List[str]]): List of image file paths for multimodal generation.
132
- If provided, uses the /api endpoint with message format.
133
- n_predict (Optional[int]): Maximum number of tokens to generate.
150
+ n_predict (Optional[int]): Maximum number of tokens to generate (num_predict).
134
151
  stream (bool): Whether to stream the output. Defaults to False.
135
- temperature (float): Sampling temperature. Defaults to 0.1.
136
- top_k (int): Top-k sampling parameter. Defaults to 50 (not used in Ollama API directly).
137
- top_p (float): Top-p sampling parameter. Defaults to 0.95 (not used in Ollama API directly).
138
- repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8 (not used in Ollama API directly).
139
- repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40 (not used).
152
+ temperature (float): Sampling temperature.
153
+ top_k (int): Top-k sampling parameter.
154
+ top_p (float): Top-p sampling parameter.
155
+ repeat_penalty (float): Penalty for repeated tokens.
156
+ repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
140
157
  seed (Optional[int]): Random seed for generation.
141
- n_threads (int): Number of threads to use. Defaults to 8 (not used in Ollama API directly).
142
- streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
158
+ n_threads (Optional[int]): Number of threads to use (num_thread).
159
+ ctx_size (Optional[int]): Context window size (num_ctx).
160
+ streaming_callback (Optional[Callable[[str, int], bool]]): Callback for streaming output.
143
161
  - First parameter (str): The chunk of text received from the stream.
144
- - Second parameter (str): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
162
+ - Second parameter (int): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
163
+ Return False to stop streaming.
145
164
 
146
165
  Returns:
147
- Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
148
-
149
- Note:
150
- Some parameters (top_k, top_p, repeat_penalty, repeat_last_n, n_threads) are included for interface
151
- consistency but are not directly used in the Ollama API implementation.
166
+ Union[str, Dict[str, any]]: Generated text if successful, or a dictionary with status and error if failed.
152
167
  """
153
- # Set headers
154
- headers = {
155
- 'Content-Type': 'application/json',
156
- }
157
- if self.service_key:
158
- headers['Authorization'] = f'Bearer {self.service_key}'
159
-
160
- # Clean host address
161
- host_address = self.host_address.rstrip('/')
162
-
163
- # Prepare data based on whether images are provided
164
- if images:
165
- # Multimodal generation using /api endpoint
166
- images_list = [encode_image(image_path) for image_path in images]
167
- data = {
168
- 'model': self.model_name,
169
- 'messages': [{
170
- "role": "user",
171
- "content": [
172
- {"type": "text", "text": prompt}
173
- ] + [
174
- {
175
- "type": "image_url",
176
- "image_url": {"url": f"data:image/jpeg;base64,{img}"}
177
- } for img in images_list
178
- ]
179
- }],
180
- "stream": stream,
181
- "temperature": float(temperature),
182
- "max_tokens": n_predict,
183
- }
184
- if ctx_size is not None:
185
- data["num_ctx"] = ctx_size
186
- url = f'{host_address}/api/chat'
187
- else:
188
- # Text-only generation using /api/generate endpoint
189
- data = {
190
- 'model': self.model_name,
191
- 'prompt': prompt,
192
- "stream": stream,
193
- "temperature": float(temperature),
194
- "max_tokens": n_predict
195
- }
196
- url = f'{host_address}/api/generate'
197
-
198
- # Make the request
199
- response = requests.post(url, json=data, headers=headers, stream=stream)
200
-
201
- # Handle response
202
- if not stream:
203
- if response.status_code == 200:
204
- try:
205
- if images:
206
- # For multimodal, response is in chat format
207
- return response.json()["message"]["content"]
208
- else:
209
- # For text-only
210
- return response.json()["response"]
211
- except Exception as ex:
212
- return {"status": False, "error": str(ex)}
213
- elif response.status_code == 404:
214
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
215
- return {"status": False, "error": "404 Not Found"}
216
- else:
217
- return {"status": False, "error": response.text}
218
- else:
219
- text = ""
220
- if response.status_code == 200:
221
- try:
222
- for line in response.iter_lines():
223
- decoded = line.decode("utf-8")
224
- if images:
225
- # Streaming with images (chat format)
226
- if decoded.startswith("data: "):
227
- json_data = json.loads(decoded[5:].strip())
228
- chunk = json_data["message"]["content"] if "message" in json_data else ""
229
- else:
230
- continue
231
- else:
232
- # Streaming without images (generate format)
233
- json_data = json.loads(decoded)
234
- chunk = json_data["response"]
235
-
236
- text += chunk
237
- if streaming_callback:
238
- if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
239
- break
240
- return text
241
- except Exception as ex:
242
- return {"status": False, "error": str(ex)}
243
- elif response.status_code == 404:
244
- ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
245
- return {"status": False, "error": "404 Not Found"}
246
- elif response.status_code == 400:
247
- try:
248
- content = json.loads(response.content.decode("utf8"))
249
- return {"status": False, "error": content.get("error", {}).get("message", content.get("message", "Unknown error"))}
250
- except:
251
- return {"status": False, "error": response.content.decode("utf8")}
252
- else:
253
- return {"status": False, "error": response.text}
168
+ if not self.ollama_client:
169
+ return {"status": False, "error": "Ollama client not initialized."}
170
+
171
+ options = {}
172
+ if n_predict is not None: options['num_predict'] = n_predict
173
+ if temperature is not None: options['temperature'] = float(temperature)
174
+ if top_k is not None: options['top_k'] = top_k
175
+ if top_p is not None: options['top_p'] = top_p
176
+ if repeat_penalty is not None: options['repeat_penalty'] = repeat_penalty
177
+ if repeat_last_n is not None: options['repeat_last_n'] = repeat_last_n
178
+ if seed is not None: options['seed'] = seed
179
+ if n_threads is not None: options['num_thread'] = n_threads
180
+ if ctx_size is not None: options['num_ctx'] = ctx_size
181
+
182
+ full_response_text = ""
183
+
184
+ try:
185
+ if images: # Multimodal
186
+ # ollama-python expects paths or bytes for images
187
+ processed_images = []
188
+ for img_path in images:
189
+ # Assuming img_path is a file path. ollama-python will read and encode it.
190
+ # If images were base64 strings, they would need decoding to bytes first.
191
+ processed_images.append(img_path)
192
+
193
+ messages = [{'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None}]
194
+
195
+ if stream:
196
+ response_stream = self.ollama_client.chat(
197
+ model=self.model_name,
198
+ messages=messages,
199
+ stream=True,
200
+ options=options if options else None
201
+ )
202
+ for chunk_dict in response_stream:
203
+ chunk_content = chunk_dict.get('message', {}).get('content', '')
204
+ if chunk_content: # Ensure there is content to process
205
+ full_response_text += chunk_content
206
+ if streaming_callback:
207
+ if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
208
+ break # Callback requested stop
209
+ return full_response_text
210
+ else: # Not streaming
211
+ response_dict = self.ollama_client.chat(
212
+ model=self.model_name,
213
+ messages=messages,
214
+ stream=False,
215
+ options=options if options else None
216
+ )
217
+ return response_dict.get('message', {}).get('content', '')
218
+ else: # Text-only
219
+ if stream:
220
+ response_stream = self.ollama_client.generate(
221
+ model=self.model_name,
222
+ prompt=prompt,
223
+ stream=True,
224
+ options=options if options else None
225
+ )
226
+ for chunk_dict in response_stream:
227
+ chunk_content = chunk_dict.get('response', '')
228
+ if chunk_content:
229
+ full_response_text += chunk_content
230
+ if streaming_callback:
231
+ if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
232
+ break
233
+ return full_response_text
234
+ else: # Not streaming
235
+ response_dict = self.ollama_client.generate(
236
+ model=self.model_name,
237
+ prompt=prompt,
238
+ stream=False,
239
+ options=options if options else None
240
+ )
241
+ return response_dict.get('response', '')
242
+ except ollama.ResponseError as e:
243
+ error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
244
+ ASCIIColors.error(error_message)
245
+ return {"status": False, "error": error_message, "status_code": e.status_code}
246
+ except ollama.RequestError as e: # Covers connection errors, timeouts during request
247
+ error_message = f"Ollama API RequestError: {str(e)}"
248
+ ASCIIColors.error(error_message)
249
+ return {"status": False, "error": error_message}
250
+ except Exception as ex:
251
+ error_message = f"An unexpected error occurred: {str(ex)}"
252
+ trace_exception(ex)
253
+ return {"status": False, "error": error_message}
254
254
 
255
- def tokenize(self, text: str) -> list:
255
+ def tokenize(self, text: str) -> List[Union[int, str]]:
256
256
  """
257
- Tokenize the input text into a list of characters.
257
+ Tokenize the input text. For Ollama, this is complex as tokenization is model-specific
258
+ and best done by the server. This method provides a basic character-level tokenization
259
+ as a fallback or placeholder, or one could attempt to call /api/tokenize if desired.
260
+ The `count_tokens` method is more accurate for Ollama.
258
261
 
259
262
  Args:
260
263
  text (str): The text to tokenize.
261
264
 
262
265
  Returns:
263
- list: List of individual characters.
266
+ list: List of tokens (characters or token IDs if /api/tokenize is used).
264
267
  """
265
- return list(text)
266
-
267
- def detokenize(self, tokens: list) -> str:
268
+ # Basic character-level tokenization
269
+ # return list(text)
270
+
271
+ # For actual token IDs (slower, makes a network request):
272
+ api_url = f"{self.host_address.rstrip('/')}/api/tokenize"
273
+ payload = {"model": self.model_name, "prompt": text}
274
+ try:
275
+ response = requests.post(api_url, json=payload, timeout=10, verify=self.verify_ssl_certificate, headers=self.ollama_client_headers)
276
+ response.raise_for_status()
277
+ return response.json().get("tokens", [])
278
+ except Exception as e:
279
+ ASCIIColors.warning(f"Failed to tokenize text with Ollama server, falling back to char tokens: {e}")
280
+ return list(text)
281
+
282
+ def detokenize(self, tokens: List[Union[int,str]]) -> str:
268
283
  """
269
- Convert a list of tokens back to text.
284
+ Convert a list of tokens back to text. If tokens are characters, joins them.
285
+ If tokens are IDs, this is non-trivial without the model's tokenizer.
270
286
 
271
287
  Args:
272
- tokens (list): List of tokens (characters) to detokenize.
288
+ tokens (list): List of tokens to detokenize.
273
289
 
274
290
  Returns:
275
291
  str: Detokenized text.
276
292
  """
277
- return "".join(tokens)
293
+ if not tokens:
294
+ return ""
295
+ if isinstance(tokens[0], str): # Assuming character tokens
296
+ return "".join(tokens)
297
+ else:
298
+ # Detokenizing IDs from Ollama is not straightforward client-side without specific tokenizer.
299
+ # This is a placeholder. For Ollama, detokenization usually happens server-side.
300
+ ASCIIColors.warning("Detokenizing integer tokens is not accurately supported by this Ollama client binding. Returning joined string of token IDs.")
301
+ return "".join(map(str, tokens))
278
302
 
279
303
  def count_tokens(self, text: str) -> int:
280
304
  """
281
- Count tokens from a text.
305
+ Count tokens from a text using the Ollama server's /api/tokenize endpoint.
282
306
 
283
307
  Args:
284
- tokens (list): List of tokens to detokenize.
308
+ text (str): Text to count tokens from.
285
309
 
286
310
  Returns:
287
- int: Number of tokens in text.
311
+ int: Number of tokens in text. Returns -1 on error.
288
312
  """
289
- return count_tokens_ollama(text, self.model_name, self.host_address)
313
+ if not self.model_name:
314
+ ASCIIColors.warning("Cannot count tokens, model_name is not set.")
315
+ return -1
316
+ return count_tokens_ollama(text, self.model_name, self.host_address, verify_ssl_certificate=self.verify_ssl_certificate, headers=self.ollama_client_headers)
290
317
 
291
- def embed(self, text: str, **kwargs) -> list:
318
+ def embed(self, text: str, **kwargs) -> List[float]:
292
319
  """
293
- Get embeddings for the input text using Ollama API
320
+ Get embeddings for the input text using Ollama API.
294
321
 
295
322
  Args:
296
- text (str or List[str]): Input text to embed
297
- **kwargs: Additional arguments like model, truncate, options, keep_alive
323
+ text (str): Input text to embed.
324
+ **kwargs: Optional arguments. Can include 'model' to override self.model_name,
325
+ and 'options' dictionary for Ollama embedding options.
298
326
 
299
327
  Returns:
300
- dict: Response containing embeddings
301
- """
302
- import requests
303
-
304
- url = f"{self.base_url}/api/embed"
305
-
306
- # Prepare the request payload
307
- payload = {
308
- "input": text,
309
- "model": kwargs.get("model", "llama2") # default model
310
- }
311
-
312
- # Add optional parameters if provided
313
- if "truncate" in kwargs:
314
- payload["truncate"] = kwargs["truncate"]
315
- if "options" in kwargs:
316
- payload["options"] = kwargs["options"]
317
- if "keep_alive" in kwargs:
318
- payload["keep_alive"] = kwargs["keep_alive"]
328
+ List[float]: The embedding vector.
319
329
 
330
+ Raises:
331
+ Exception: if embedding fails or Ollama client is not available.
332
+ """
333
+ if not self.ollama_client:
334
+ raise Exception("Ollama client not initialized.")
335
+
336
+ model_to_use = kwargs.get("model", self.model_name)
337
+ if not model_to_use:
338
+ raise ValueError("Model name for embedding must be specified either in init or via kwargs.")
339
+
340
+ ollama_options = kwargs.get("options", None)
320
341
  try:
321
- response = requests.post(url, json=payload)
322
- response.raise_for_status() # Raise exception for bad status codes
323
- return response.json()
324
- except requests.exceptions.RequestException as e:
325
- raise Exception(f"Embedding request failed: {str(e)}")
342
+ response = self.ollama_client.embeddings(
343
+ model=model_to_use,
344
+ prompt=text,
345
+ options=ollama_options
346
+ )
347
+ return response['embedding']
348
+ except ollama.ResponseError as e:
349
+ error_message = f"Ollama API Embeddings ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
350
+ ASCIIColors.error(error_message)
351
+ raise Exception(error_message) from e
352
+ except ollama.RequestError as e:
353
+ error_message = f"Ollama API Embeddings RequestError: {str(e)}"
354
+ ASCIIColors.error(error_message)
355
+ raise Exception(error_message) from e
356
+ except Exception as ex:
357
+ trace_exception(ex)
358
+ raise Exception(f"Embedding failed: {str(ex)}") from ex
326
359
 
327
-
328
360
  def get_model_info(self) -> dict:
329
361
  """
330
- Return information about the current Ollama model.
362
+ Return information about the current Ollama model setup.
331
363
 
332
364
  Returns:
333
- dict: Dictionary containing model name, version, and host address.
365
+ dict: Dictionary containing binding name, version, host address, and model name.
334
366
  """
335
367
  return {
336
- "name": "ollama",
337
- "version": "2.0",
368
+ "name": self.binding_name, # from super class
369
+ "version": ollama.__version__ if ollama else "unknown", # Ollama library version
338
370
  "host_address": self.host_address,
339
- "model_name": self.model_name
371
+ "model_name": self.model_name,
372
+ "supports_structured_output": False, # Ollama primarily supports text/chat
373
+ "supports_vision": True # Many Ollama models (e.g. llava, bakllava) support vision
340
374
  }
341
- def listModels(self):
342
- """ Lists available models """
343
- url = f'{self.host_address}/api/tags'
344
- headers = {
345
- 'accept': 'application/json',
346
- 'Authorization': f'Bearer {self.service_key}'
347
- }
348
- response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
349
- try:
350
- ASCIIColors.debug("Listing ollama models")
351
- data = response.json()
352
- model_info = []
353
375
 
354
- for model in data['models']:
355
- model_name = model['name']
356
- owned_by = ""
357
- created_datetime = model["modified_at"]
358
- model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
359
-
360
- return model_info
376
+ def listModels(self) -> List[Dict[str, str]]:
377
+ """
378
+ Lists available models from the Ollama service using the ollama-python library.
379
+ The returned list of dictionaries matches the format of the original template.
380
+
381
+ Returns:
382
+ List[Dict[str, str]]: A list of model information dictionaries.
383
+ Each dict has 'model_name', 'owned_by', 'created_datetime'.
384
+ """
385
+ if not self.ollama_client:
386
+ ASCIIColors.error("Ollama client not initialized. Cannot list models.")
387
+ return []
388
+ try:
389
+ ASCIIColors.debug(f"Listing ollama models from {self.host_address}")
390
+ response_data = self.ollama_client.list() # This returns {'models': [{'name':..., 'modified_at':..., ...}]}
391
+
392
+ model_info_list = []
393
+ if 'models' in response_data:
394
+ for model_entry in response_data['models']:
395
+ model_info_list.append({
396
+ 'model_name': model_entry.get('model'),
397
+ 'owned_by': "", # Ollama API doesn't provide a direct "owned_by" field.
398
+ 'created_datetime': model_entry.get('modified_at')
399
+ })
400
+ return model_info_list
401
+ except ollama.ResponseError as e:
402
+ ASCIIColors.error(f"Ollama API listModels ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code}) from {self.host_address}")
403
+ return []
404
+ except ollama.RequestError as e: # Covers connection errors, timeouts during request
405
+ ASCIIColors.error(f"Ollama API listModels RequestError: {str(e)} from {self.host_address}")
406
+ return []
361
407
  except Exception as ex:
362
408
  trace_exception(ex)
363
409
  return []
410
+
364
411
  def load_model(self, model_name: str) -> bool:
365
412
  """
366
- Load a specific model into the Ollama binding.
413
+ Set the model name for subsequent operations. Ollama loads models on demand.
414
+ This method can be used to verify if a model exists by attempting a small operation,
415
+ but for now, it just sets the name.
367
416
 
368
417
  Args:
369
- model_name (str): Name of the model to load.
418
+ model_name (str): Name of the model to set.
370
419
 
371
420
  Returns:
372
- bool: True if model loaded successfully.
421
+ bool: True if model name is set.
373
422
  """
374
- self.model = model_name
375
423
  self.model_name = model_name
424
+ # Optionally, you could try a quick self.ollama_client.show(model_name) to verify existence.
425
+ # For simplicity, we just set it.
426
+ ASCIIColors.info(f"Ollama model set to: {model_name}. It will be loaded by the server on first use.")
376
427
  return True
428
+
429
+ if __name__ == '__main__':
430
+ global full_streamed_text
431
+ # Example Usage (requires an Ollama server running)
432
+ ASCIIColors.yellow("Testing OllamaBinding...")
433
+
434
+ # --- Configuration ---
435
+ # Replace with your Ollama server details if not localhost
436
+ ollama_host = "http://localhost:11434"
437
+ # Common model, pull it first: `ollama pull llama3` or `ollama pull llava` for vision
438
+ test_model_name = "llama3"
439
+ test_vision_model_name = "llava" # or another vision model you have
440
+
441
+ try:
442
+ # --- Initialization ---
443
+ ASCIIColors.cyan("\n--- Initializing Binding ---")
444
+ binding = OllamaBinding(host_address=ollama_host, model_name=test_model_name)
445
+ ASCIIColors.green("Binding initialized successfully.")
446
+ ASCIIColors.info(f"Using Ollama client version: {ollama.__version__ if ollama else 'N/A'}")
447
+
448
+ # --- List Models ---
449
+ ASCIIColors.cyan("\n--- Listing Models ---")
450
+ models = binding.listModels()
451
+ if models:
452
+ ASCIIColors.green(f"Found {len(models)} models. First 5:")
453
+ for m in models[:5]:
454
+ print(m)
455
+ else:
456
+ ASCIIColors.warning("No models found or failed to list models. Ensure Ollama is running and has models.")
457
+
458
+ # --- Load Model (sets active model) ---
459
+ ASCIIColors.cyan(f"\n--- Setting model to: {test_model_name} ---")
460
+ binding.load_model(test_model_name)
461
+
462
+ # --- Count Tokens ---
463
+ ASCIIColors.cyan("\n--- Counting Tokens ---")
464
+ sample_text = "Hello, world! This is a test."
465
+ token_count = binding.count_tokens(sample_text)
466
+ ASCIIColors.green(f"Token count for '{sample_text}': {token_count}")
467
+
468
+ # --- Tokenize/Detokenize (using server for tokenize) ---
469
+ ASCIIColors.cyan("\n--- Tokenize/Detokenize ---")
470
+ tokens = binding.tokenize(sample_text)
471
+ ASCIIColors.green(f"Tokens for '{sample_text}': {tokens[:10]}...") # Print first 10
472
+ detokenized_text = binding.detokenize(tokens)
473
+ # Note: detokenize might not be perfect if tokens are IDs and not chars
474
+ ASCIIColors.green(f"Detokenized text (may vary based on tokenization type): {detokenized_text}")
475
+
476
+
477
+ # --- Text Generation (Non-Streaming) ---
478
+ ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
479
+ prompt_text = "Why is the sky blue?"
480
+ ASCIIColors.info(f"Prompt: {prompt_text}")
481
+ generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False)
482
+ if isinstance(generated_text, str):
483
+ ASCIIColors.green(f"Generated text: {generated_text}")
484
+ else:
485
+ ASCIIColors.error(f"Generation failed: {generated_text}")
486
+
487
+ # --- Text Generation (Streaming) ---
488
+ ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
489
+ full_streamed_text = ""
490
+ def stream_callback(chunk: str, msg_type: int):
491
+ global full_streamed_text
492
+ print(f"{ASCIIColors.GREEN}Stream chunk: {chunk}{ASCIIColors.RESET}", end="", flush=True)
493
+ full_streamed_text += chunk
494
+ if len(full_streamed_text) > 100: # Example: stop after 100 chars for test
495
+ # print("\nStopping stream early for test.")
496
+ # return False # uncomment to test early stop
497
+ pass
498
+ return True
499
+
500
+ ASCIIColors.info(f"Prompt: {prompt_text}")
501
+ result = binding.generate_text(prompt_text, n_predict=100, stream=True, streaming_callback=stream_callback)
502
+ print("\n--- End of Stream ---")
503
+ if isinstance(result, str):
504
+ ASCIIColors.green(f"Full streamed text: {result}") # 'result' is the full_streamed_text
505
+ else:
506
+ ASCIIColors.error(f"Streaming generation failed: {result}")
507
+
508
+
509
+ # --- Embeddings ---
510
+ ASCIIColors.cyan("\n--- Embeddings ---")
511
+ # Ensure you have an embedding model like 'mxbai-embed-large' or 'nomic-embed-text'
512
+ # Or use a general model if it supports embedding (some do implicitly)
513
+ # For this test, we'll try with the current test_model_name,
514
+ # but ideally use a dedicated embedding model.
515
+ # binding.load_model("mxbai-embed-large") # if you have it
516
+ try:
517
+ embedding_text = "Lollms is a cool project."
518
+ embedding_vector = binding.embed(embedding_text) # Uses current self.model_name
519
+ ASCIIColors.green(f"Embedding for '{embedding_text}' (first 5 dims): {embedding_vector[:5]}...")
520
+ ASCIIColors.info(f"Embedding vector dimension: {len(embedding_vector)}")
521
+ except Exception as e:
522
+ ASCIIColors.warning(f"Could not get embedding with '{binding.model_name}': {e}. Some models don't support /api/embeddings or may need to be specified.")
523
+ ASCIIColors.warning("Try `ollama pull mxbai-embed-large` and set it as model for embedding.")
524
+
525
+
526
+ # --- Vision Model Test (if llava or similar is available) ---
527
+ # Create a dummy image file for testing
528
+ dummy_image_path = "dummy_test_image.png"
529
+ try:
530
+ from PIL import Image, ImageDraw
531
+ img = Image.new('RGB', (100, 30), color = ('red'))
532
+ d = ImageDraw.Draw(img)
533
+ d.text((10,10), "Hello", fill=('white'))
534
+ img.save(dummy_image_path)
535
+ ASCIIColors.info(f"Created dummy image: {dummy_image_path}")
536
+
537
+ ASCIIColors.cyan(f"\n--- Vision Generation (using {test_vision_model_name}) ---")
538
+ # Check if vision model exists
539
+ vision_model_exists = any(m['model_name'].startswith(test_vision_model_name) for m in models)
540
+ if not vision_model_exists:
541
+ ASCIIColors.warning(f"Vision model '{test_vision_model_name}' not found in pulled models. Skipping vision test.")
542
+ ASCIIColors.warning(f"Try: `ollama pull {test_vision_model_name}`")
543
+ else:
544
+ binding.load_model(test_vision_model_name) # Switch to vision model
545
+ vision_prompt = "What is written in this image?"
546
+ ASCIIColors.info(f"Vision Prompt: {vision_prompt} with image {dummy_image_path}")
547
+
548
+ vision_response = binding.generate_text(
549
+ prompt=vision_prompt,
550
+ images=[dummy_image_path],
551
+ n_predict=50,
552
+ stream=False
553
+ )
554
+ if isinstance(vision_response, str):
555
+ ASCIIColors.green(f"Vision model response: {vision_response}")
556
+ else:
557
+ ASCIIColors.error(f"Vision generation failed: {vision_response}")
558
+ except ImportError:
559
+ ASCIIColors.warning("Pillow library not found. Cannot create dummy image for vision test. `pip install Pillow`")
560
+ except Exception as e:
561
+ ASCIIColors.error(f"Error during vision test: {e}")
562
+ finally:
563
+ import os
564
+ if os.path.exists(dummy_image_path):
565
+ os.remove(dummy_image_path)
566
+
567
+
568
+ except ConnectionRefusedError:
569
+ ASCIIColors.error("Connection to Ollama server refused. Is Ollama running?")
570
+ except ImportError as e:
571
+ ASCIIColors.error(f"Import error: {e}. Make sure 'ollama' library is installed ('pip install ollama').")
572
+ except Exception as e:
573
+ ASCIIColors.error(f"An error occurred during testing: {e}")
574
+ trace_exception(e)
575
+
576
+ ASCIIColors.yellow("\nOllamaBinding test finished.")
@@ -8,10 +8,9 @@ from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
8
8
  from typing import Optional, Callable, List, Union
9
9
  from ascii_colors import ASCIIColors, trace_exception
10
10
  import pipmaster as pm
11
- if not pm.is_installed("openai"):
12
- pm.install("openai")
13
- if not pm.is_installed("tiktoken"):
14
- pm.install("tiktoken")
11
+
12
+ pm.ensure_packages(["openai","tiktoken"])
13
+
15
14
  import openai
16
15
  import tiktoken
17
16
  import os
@@ -49,6 +48,7 @@ class OpenAIBinding(LollmsLLMBinding):
49
48
  )
50
49
  self.service_key = os.getenv("OPENAI_API_KEY","")
51
50
  self.client = openai.OpenAI(base_url=host_address)
51
+ self.completion_format = ELF_COMPLETION_FORMAT.Chat
52
52
 
53
53
 
54
54
  def generate_text(self,
@@ -115,7 +115,7 @@ class OpenAIBinding(LollmsLLMBinding):
115
115
  messages = [{"role": "user", "content": prompt}]
116
116
 
117
117
  # Generate text using the OpenAI API
118
- if completion_format == ELF_COMPLETION_FORMAT.Chat:
118
+ if self.completion_format == ELF_COMPLETION_FORMAT.Chat:
119
119
  chat_completion = self.client.chat.completions.create(
120
120
  model=self.model_name, # Choose the engine according to your OpenAI plan
121
121
  messages=messages,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.12.3
3
+ Version: 0.12.6
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Home-page: https://github.com/ParisNeo/lollms_client
6
6
  Author: ParisNeo
@@ -21,8 +21,8 @@ lollms_client/lollms_types.py,sha256=cfc1sremM8KR4avkYX99fIVkkdRvXErrCWKGjLrgv50
21
21
  lollms_client/lollms_utilities.py,sha256=YAgamfp0pBVApR68AHKjhp1lh6isMNF8iadwWLl63c0,7045
22
22
  lollms_client/llm_bindings/__init__.py,sha256=9sWGpmWSSj6KQ8H4lKGCjpLYwhnVdL_2N7gXCphPqh4,14
23
23
  lollms_client/llm_bindings/lollms/__init__.py,sha256=H1Vw6trTzinS_xaeNWZ7Aq-3XngbzoYxtA4Se2IeCpQ,12213
24
- lollms_client/llm_bindings/ollama/__init__.py,sha256=oOSWM7eVpDPTBGulRdHukOxpIwvA1x5VNsJEVgT2jFk,15843
25
- lollms_client/llm_bindings/openai/__init__.py,sha256=n8y14j6MAkaZSGMsvvUogr8LRgLsNz4S6QJJdB6H0lQ,10763
24
+ lollms_client/llm_bindings/ollama/__init__.py,sha256=EVM6-IMc_KIAnlMJY4UWOHoZqZNFUUUhXkCzwopIq_I,28425
25
+ lollms_client/llm_bindings/openai/__init__.py,sha256=M3tEB1Qg6jys_dX7qq7bsXwpv4FLPYykWO2R8t7-yu4,10750
26
26
  lollms_client/llm_bindings/transformers/__init__.py,sha256=UyaiQcJQricBZJGe1zfNIVy6Cb3QpHSvImSoE9FhgC0,12771
27
27
  lollms_client/stt_bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  lollms_client/stt_bindings/lollms/__init__.py,sha256=7-IZkrsn15Vaz0oqkqCxMeNQfMkeilbgScLlrrywES4,6098
@@ -34,8 +34,8 @@ lollms_client/tts_bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
34
34
  lollms_client/tts_bindings/lollms/__init__.py,sha256=8x2_T9XscvISw2TiaLoFxvrS7TIsVLdqbwSc04cX-wc,7164
35
35
  lollms_client/ttv_bindings/__init__.py,sha256=UZ8o2izQOJLQgtZ1D1cXoNST7rzqW22rL2Vufc7ddRc,3141
36
36
  lollms_client/ttv_bindings/lollms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
- lollms_client-0.12.3.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
38
- lollms_client-0.12.3.dist-info/METADATA,sha256=t92LRON5mw051xnzqLrPRyagxWEklQZrnaOJwEXy9OM,6866
39
- lollms_client-0.12.3.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
40
- lollms_client-0.12.3.dist-info/top_level.txt,sha256=Bk_kz-ri6Arwsk7YG-T5VsRorV66uVhcHGvb_g2WqgE,14
41
- lollms_client-0.12.3.dist-info/RECORD,,
37
+ lollms_client-0.12.6.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
38
+ lollms_client-0.12.6.dist-info/METADATA,sha256=vqmLDZ2nBJOZ9tKLHWsU8Gat4maKzJ5UqaMK2f2SUsE,6866
39
+ lollms_client-0.12.6.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
40
+ lollms_client-0.12.6.dist-info/top_level.txt,sha256=Bk_kz-ri6Arwsk7YG-T5VsRorV66uVhcHGvb_g2WqgE,14
41
+ lollms_client-0.12.6.dist-info/RECORD,,