lollms-client 0.12.2__tar.gz → 0.12.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- {lollms_client-0.12.2 → lollms_client-0.12.4}/PKG-INFO +1 -1
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/llm_bindings/lollms/__init__.py +12 -0
- lollms_client-0.12.4/lollms_client/llm_bindings/ollama/__init__.py +578 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/llm_bindings/openai/__init__.py +13 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/llm_bindings/transformers/__init__.py +13 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_llm_binding.py +13 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client.egg-info/PKG-INFO +1 -1
- {lollms_client-0.12.2 → lollms_client-0.12.4}/setup.py +1 -1
- lollms_client-0.12.2/lollms_client/llm_bindings/ollama/__init__.py +0 -298
- {lollms_client-0.12.2 → lollms_client-0.12.4}/LICENSE +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/README.md +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/llm_bindings/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_config.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_core.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_discussion.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_functions.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_js_analyzer.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_python_analyzer.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_stt_binding.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_tasks.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_tti_binding.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_ttm_binding.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_tts_binding.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_ttv_binding.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_types.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/lollms_utilities.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/stt_bindings/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/stt_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/tti_bindings/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/tti_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/ttm_bindings/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/ttm_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/tts_bindings/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/tts_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/ttv_bindings/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/ttv_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client.egg-info/SOURCES.txt +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client.egg-info/dependency_links.txt +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client.egg-info/requires.txt +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client.egg-info/top_level.txt +0 -0
- {lollms_client-0.12.2 → lollms_client-0.12.4}/setup.cfg +0 -0
|
@@ -10,6 +10,7 @@ import json
|
|
|
10
10
|
|
|
11
11
|
BindingName = "LollmsLLMBinding"
|
|
12
12
|
|
|
13
|
+
|
|
13
14
|
class LollmsLLMBinding(LollmsLLMBinding):
|
|
14
15
|
"""LOLLMS-specific binding implementation"""
|
|
15
16
|
|
|
@@ -215,7 +216,18 @@ class LollmsLLMBinding(LollmsLLMBinding):
|
|
|
215
216
|
except Exception as ex:
|
|
216
217
|
return {"status": False, "error": str(ex)}
|
|
217
218
|
|
|
219
|
+
def count_tokens(self, text: str) -> int:
|
|
220
|
+
"""
|
|
221
|
+
Count tokens from a text.
|
|
218
222
|
|
|
223
|
+
Args:
|
|
224
|
+
tokens (list): List of tokens to detokenize.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
int: Number of tokens in text.
|
|
228
|
+
"""
|
|
229
|
+
return len(self.tokenize(text))
|
|
230
|
+
|
|
219
231
|
def embed(self, text: str, **kwargs) -> list:
|
|
220
232
|
"""
|
|
221
233
|
Get embeddings for the input text using Ollama API
|
|
@@ -0,0 +1,578 @@
|
|
|
1
|
+
# bindings/ollama/binding.py
|
|
2
|
+
import requests
|
|
3
|
+
import json
|
|
4
|
+
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
5
|
+
from lollms_client.lollms_types import MSG_TYPE
|
|
6
|
+
# encode_image is not strictly needed if ollama-python handles paths, but kept for consistency if ever needed.
|
|
7
|
+
# from lollms_client.lollms_utilities import encode_image
|
|
8
|
+
from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
|
|
9
|
+
from typing import Optional, Callable, List, Union, Dict
|
|
10
|
+
|
|
11
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
import ollama
|
|
15
|
+
except ImportError:
|
|
16
|
+
ASCIIColors.warning("Ollama library not found. Please install it using 'pip install ollama'")
|
|
17
|
+
ollama = None
|
|
18
|
+
|
|
19
|
+
BindingName = "OllamaBinding"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def count_tokens_ollama(
|
|
23
|
+
text_to_tokenize: str,
|
|
24
|
+
model_name: str,
|
|
25
|
+
ollama_host: str = "http://localhost:11434",
|
|
26
|
+
timeout: int = 30,
|
|
27
|
+
verify_ssl_certificate: bool = True,
|
|
28
|
+
headers: Optional[Dict[str, str]] = None
|
|
29
|
+
) -> int:
|
|
30
|
+
"""
|
|
31
|
+
Counts the number of tokens in a given text using a specified Ollama model
|
|
32
|
+
by calling the Ollama server's /api/tokenize endpoint.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
text_to_tokenize (str): The text to be tokenized.
|
|
36
|
+
model_name (str): The name of the Ollama model to use (e.g., "llama3", "mistral").
|
|
37
|
+
ollama_host (str): The base URL of the Ollama server (default: "http://localhost:11434").
|
|
38
|
+
timeout (int): Timeout for the request in seconds (default: 30).
|
|
39
|
+
verify_ssl_certificate (bool): Whether to verify SSL.
|
|
40
|
+
headers (Optional[Dict[str, str]]): Optional headers for the request.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
int: The number of tokens. Returns -1 if an error occurs.
|
|
44
|
+
"""
|
|
45
|
+
api_url = f"{ollama_host.rstrip('/')}/api/tokenize"
|
|
46
|
+
payload = {
|
|
47
|
+
"model": model_name,
|
|
48
|
+
"prompt": text_to_tokenize
|
|
49
|
+
}
|
|
50
|
+
request_headers = headers if headers else {}
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
response = requests.post(api_url, json=payload, timeout=timeout, verify=verify_ssl_certificate, headers=request_headers)
|
|
54
|
+
response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
|
|
55
|
+
|
|
56
|
+
response_data = response.json()
|
|
57
|
+
|
|
58
|
+
if "tokens" in response_data and isinstance(response_data["tokens"], list):
|
|
59
|
+
return len(response_data["tokens"])
|
|
60
|
+
else:
|
|
61
|
+
ASCIIColors.warning(
|
|
62
|
+
f"Ollama response for token count did not contain a 'tokens' list. Response: {response_data}"
|
|
63
|
+
)
|
|
64
|
+
return -1 # Or raise ValueError
|
|
65
|
+
|
|
66
|
+
except requests.exceptions.HTTPError as http_err:
|
|
67
|
+
ASCIIColors.error(f"HTTP error occurred during token count: {http_err} - {http_err.response.text if http_err.response else 'No response text'}")
|
|
68
|
+
return -1
|
|
69
|
+
except requests.exceptions.RequestException as req_err:
|
|
70
|
+
ASCIIColors.error(f"Request error occurred during token count: {req_err}")
|
|
71
|
+
return -1
|
|
72
|
+
except json.JSONDecodeError as json_err:
|
|
73
|
+
ASCIIColors.error(
|
|
74
|
+
f"Failed to decode JSON response from Ollama during token count: {json_err}. Response text: {response.text if hasattr(response, 'text') else 'No response object'}"
|
|
75
|
+
)
|
|
76
|
+
return -1
|
|
77
|
+
except Exception as e:
|
|
78
|
+
ASCIIColors.error(f"An unexpected error occurred during token count: {e}")
|
|
79
|
+
return -1
|
|
80
|
+
|
|
81
|
+
class OllamaBinding(LollmsLLMBinding):
|
|
82
|
+
"""Ollama-specific binding implementation using the ollama-python library."""
|
|
83
|
+
|
|
84
|
+
DEFAULT_HOST_ADDRESS = "http://localhost:11434"
|
|
85
|
+
|
|
86
|
+
def __init__(self,
|
|
87
|
+
host_address: str = None,
|
|
88
|
+
model_name: str = "",
|
|
89
|
+
service_key: str = None,
|
|
90
|
+
verify_ssl_certificate: bool = True,
|
|
91
|
+
default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat
|
|
92
|
+
):
|
|
93
|
+
"""
|
|
94
|
+
Initialize the Ollama binding.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
host_address (str): Host address for the Ollama service. Defaults to DEFAULT_HOST_ADDRESS.
|
|
98
|
+
model_name (str): Name of the model to use. Defaults to empty string.
|
|
99
|
+
service_key (str): Authentication key for the service (used in Authorization header). Defaults to None.
|
|
100
|
+
verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
|
|
101
|
+
default_completion_format (ELF_COMPLETION_FORMAT): Default completion format.
|
|
102
|
+
"""
|
|
103
|
+
_host_address = host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS
|
|
104
|
+
super().__init__(
|
|
105
|
+
binding_name=BindingName, # Use the module-level BindingName
|
|
106
|
+
host_address=_host_address,
|
|
107
|
+
model_name=model_name,
|
|
108
|
+
service_key=service_key,
|
|
109
|
+
verify_ssl_certificate=verify_ssl_certificate,
|
|
110
|
+
default_completion_format=default_completion_format
|
|
111
|
+
)
|
|
112
|
+
if ollama is None:
|
|
113
|
+
raise ImportError("Ollama library is not installed. Please run 'pip install ollama'.")
|
|
114
|
+
|
|
115
|
+
self.ollama_client_headers = {}
|
|
116
|
+
if self.service_key:
|
|
117
|
+
self.ollama_client_headers['Authorization'] = f'Bearer {self.service_key}'
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
self.ollama_client = ollama.Client(
|
|
121
|
+
host=self.host_address,
|
|
122
|
+
headers=self.ollama_client_headers if self.ollama_client_headers else None,
|
|
123
|
+
verify=self.verify_ssl_certificate # Passed to httpx.Client
|
|
124
|
+
)
|
|
125
|
+
except Exception as e:
|
|
126
|
+
ASCIIColors.error(f"Failed to initialize Ollama client: {e}")
|
|
127
|
+
self.ollama_client = None # Ensure it's None if initialization fails
|
|
128
|
+
# Optionally re-raise or handle so the binding is clearly unusable
|
|
129
|
+
raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
|
|
130
|
+
|
|
131
|
+
def generate_text(self,
|
|
132
|
+
prompt: str,
|
|
133
|
+
images: Optional[List[str]] = None, # List of image file paths
|
|
134
|
+
n_predict: Optional[int] = None,
|
|
135
|
+
stream: bool = False,
|
|
136
|
+
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
137
|
+
top_k: int = 40, # Ollama default is 40
|
|
138
|
+
top_p: float = 0.9, # Ollama default is 0.9
|
|
139
|
+
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
140
|
+
repeat_last_n: int = 64, # Ollama default is 64
|
|
141
|
+
seed: Optional[int] = None,
|
|
142
|
+
n_threads: Optional[int] = None, # Ollama calls this num_thread
|
|
143
|
+
ctx_size: Optional[int] = None, # Ollama calls this num_ctx
|
|
144
|
+
streaming_callback: Optional[Callable[[str, int], bool]] = None
|
|
145
|
+
) -> Union[str, Dict[str, any]]:
|
|
146
|
+
"""
|
|
147
|
+
Generate text using the Ollama service, with optional image support.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
prompt (str): The input prompt for text generation.
|
|
151
|
+
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
152
|
+
n_predict (Optional[int]): Maximum number of tokens to generate (num_predict).
|
|
153
|
+
stream (bool): Whether to stream the output. Defaults to False.
|
|
154
|
+
temperature (float): Sampling temperature.
|
|
155
|
+
top_k (int): Top-k sampling parameter.
|
|
156
|
+
top_p (float): Top-p sampling parameter.
|
|
157
|
+
repeat_penalty (float): Penalty for repeated tokens.
|
|
158
|
+
repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
|
|
159
|
+
seed (Optional[int]): Random seed for generation.
|
|
160
|
+
n_threads (Optional[int]): Number of threads to use (num_thread).
|
|
161
|
+
ctx_size (Optional[int]): Context window size (num_ctx).
|
|
162
|
+
streaming_callback (Optional[Callable[[str, int], bool]]): Callback for streaming output.
|
|
163
|
+
- First parameter (str): The chunk of text received from the stream.
|
|
164
|
+
- Second parameter (int): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
|
|
165
|
+
Return False to stop streaming.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Union[str, Dict[str, any]]: Generated text if successful, or a dictionary with status and error if failed.
|
|
169
|
+
"""
|
|
170
|
+
if not self.ollama_client:
|
|
171
|
+
return {"status": False, "error": "Ollama client not initialized."}
|
|
172
|
+
|
|
173
|
+
options = {}
|
|
174
|
+
if n_predict is not None: options['num_predict'] = n_predict
|
|
175
|
+
if temperature is not None: options['temperature'] = float(temperature)
|
|
176
|
+
if top_k is not None: options['top_k'] = top_k
|
|
177
|
+
if top_p is not None: options['top_p'] = top_p
|
|
178
|
+
if repeat_penalty is not None: options['repeat_penalty'] = repeat_penalty
|
|
179
|
+
if repeat_last_n is not None: options['repeat_last_n'] = repeat_last_n
|
|
180
|
+
if seed is not None: options['seed'] = seed
|
|
181
|
+
if n_threads is not None: options['num_thread'] = n_threads
|
|
182
|
+
if ctx_size is not None: options['num_ctx'] = ctx_size
|
|
183
|
+
|
|
184
|
+
full_response_text = ""
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
if images: # Multimodal
|
|
188
|
+
# ollama-python expects paths or bytes for images
|
|
189
|
+
processed_images = []
|
|
190
|
+
for img_path in images:
|
|
191
|
+
# Assuming img_path is a file path. ollama-python will read and encode it.
|
|
192
|
+
# If images were base64 strings, they would need decoding to bytes first.
|
|
193
|
+
processed_images.append(img_path)
|
|
194
|
+
|
|
195
|
+
messages = [{'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None}]
|
|
196
|
+
|
|
197
|
+
if stream:
|
|
198
|
+
response_stream = self.ollama_client.chat(
|
|
199
|
+
model=self.model_name,
|
|
200
|
+
messages=messages,
|
|
201
|
+
stream=True,
|
|
202
|
+
options=options if options else None
|
|
203
|
+
)
|
|
204
|
+
for chunk_dict in response_stream:
|
|
205
|
+
chunk_content = chunk_dict.get('message', {}).get('content', '')
|
|
206
|
+
if chunk_content: # Ensure there is content to process
|
|
207
|
+
full_response_text += chunk_content
|
|
208
|
+
if streaming_callback:
|
|
209
|
+
if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
210
|
+
break # Callback requested stop
|
|
211
|
+
return full_response_text
|
|
212
|
+
else: # Not streaming
|
|
213
|
+
response_dict = self.ollama_client.chat(
|
|
214
|
+
model=self.model_name,
|
|
215
|
+
messages=messages,
|
|
216
|
+
stream=False,
|
|
217
|
+
options=options if options else None
|
|
218
|
+
)
|
|
219
|
+
return response_dict.get('message', {}).get('content', '')
|
|
220
|
+
else: # Text-only
|
|
221
|
+
if stream:
|
|
222
|
+
response_stream = self.ollama_client.generate(
|
|
223
|
+
model=self.model_name,
|
|
224
|
+
prompt=prompt,
|
|
225
|
+
stream=True,
|
|
226
|
+
options=options if options else None
|
|
227
|
+
)
|
|
228
|
+
for chunk_dict in response_stream:
|
|
229
|
+
chunk_content = chunk_dict.get('response', '')
|
|
230
|
+
if chunk_content:
|
|
231
|
+
full_response_text += chunk_content
|
|
232
|
+
if streaming_callback:
|
|
233
|
+
if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
234
|
+
break
|
|
235
|
+
return full_response_text
|
|
236
|
+
else: # Not streaming
|
|
237
|
+
response_dict = self.ollama_client.generate(
|
|
238
|
+
model=self.model_name,
|
|
239
|
+
prompt=prompt,
|
|
240
|
+
stream=False,
|
|
241
|
+
options=options if options else None
|
|
242
|
+
)
|
|
243
|
+
return response_dict.get('response', '')
|
|
244
|
+
except ollama.ResponseError as e:
|
|
245
|
+
error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
246
|
+
ASCIIColors.error(error_message)
|
|
247
|
+
return {"status": False, "error": error_message, "status_code": e.status_code}
|
|
248
|
+
except ollama.RequestError as e: # Covers connection errors, timeouts during request
|
|
249
|
+
error_message = f"Ollama API RequestError: {str(e)}"
|
|
250
|
+
ASCIIColors.error(error_message)
|
|
251
|
+
return {"status": False, "error": error_message}
|
|
252
|
+
except Exception as ex:
|
|
253
|
+
error_message = f"An unexpected error occurred: {str(ex)}"
|
|
254
|
+
trace_exception(ex)
|
|
255
|
+
return {"status": False, "error": error_message}
|
|
256
|
+
|
|
257
|
+
def tokenize(self, text: str) -> List[Union[int, str]]:
|
|
258
|
+
"""
|
|
259
|
+
Tokenize the input text. For Ollama, this is complex as tokenization is model-specific
|
|
260
|
+
and best done by the server. This method provides a basic character-level tokenization
|
|
261
|
+
as a fallback or placeholder, or one could attempt to call /api/tokenize if desired.
|
|
262
|
+
The `count_tokens` method is more accurate for Ollama.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
text (str): The text to tokenize.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
list: List of tokens (characters or token IDs if /api/tokenize is used).
|
|
269
|
+
"""
|
|
270
|
+
# Basic character-level tokenization
|
|
271
|
+
# return list(text)
|
|
272
|
+
|
|
273
|
+
# For actual token IDs (slower, makes a network request):
|
|
274
|
+
api_url = f"{self.host_address.rstrip('/')}/api/tokenize"
|
|
275
|
+
payload = {"model": self.model_name, "prompt": text}
|
|
276
|
+
try:
|
|
277
|
+
response = requests.post(api_url, json=payload, timeout=10, verify=self.verify_ssl_certificate, headers=self.ollama_client_headers)
|
|
278
|
+
response.raise_for_status()
|
|
279
|
+
return response.json().get("tokens", [])
|
|
280
|
+
except Exception as e:
|
|
281
|
+
ASCIIColors.warning(f"Failed to tokenize text with Ollama server, falling back to char tokens: {e}")
|
|
282
|
+
return list(text)
|
|
283
|
+
|
|
284
|
+
def detokenize(self, tokens: List[Union[int,str]]) -> str:
|
|
285
|
+
"""
|
|
286
|
+
Convert a list of tokens back to text. If tokens are characters, joins them.
|
|
287
|
+
If tokens are IDs, this is non-trivial without the model's tokenizer.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
tokens (list): List of tokens to detokenize.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
str: Detokenized text.
|
|
294
|
+
"""
|
|
295
|
+
if not tokens:
|
|
296
|
+
return ""
|
|
297
|
+
if isinstance(tokens[0], str): # Assuming character tokens
|
|
298
|
+
return "".join(tokens)
|
|
299
|
+
else:
|
|
300
|
+
# Detokenizing IDs from Ollama is not straightforward client-side without specific tokenizer.
|
|
301
|
+
# This is a placeholder. For Ollama, detokenization usually happens server-side.
|
|
302
|
+
ASCIIColors.warning("Detokenizing integer tokens is not accurately supported by this Ollama client binding. Returning joined string of token IDs.")
|
|
303
|
+
return "".join(map(str, tokens))
|
|
304
|
+
|
|
305
|
+
def count_tokens(self, text: str) -> int:
|
|
306
|
+
"""
|
|
307
|
+
Count tokens from a text using the Ollama server's /api/tokenize endpoint.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
text (str): Text to count tokens from.
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
int: Number of tokens in text. Returns -1 on error.
|
|
314
|
+
"""
|
|
315
|
+
if not self.model_name:
|
|
316
|
+
ASCIIColors.warning("Cannot count tokens, model_name is not set.")
|
|
317
|
+
return -1
|
|
318
|
+
return count_tokens_ollama(text, self.model_name, self.host_address, verify_ssl_certificate=self.verify_ssl_certificate, headers=self.ollama_client_headers)
|
|
319
|
+
|
|
320
|
+
def embed(self, text: str, **kwargs) -> List[float]:
|
|
321
|
+
"""
|
|
322
|
+
Get embeddings for the input text using Ollama API.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
text (str): Input text to embed.
|
|
326
|
+
**kwargs: Optional arguments. Can include 'model' to override self.model_name,
|
|
327
|
+
and 'options' dictionary for Ollama embedding options.
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
List[float]: The embedding vector.
|
|
331
|
+
|
|
332
|
+
Raises:
|
|
333
|
+
Exception: if embedding fails or Ollama client is not available.
|
|
334
|
+
"""
|
|
335
|
+
if not self.ollama_client:
|
|
336
|
+
raise Exception("Ollama client not initialized.")
|
|
337
|
+
|
|
338
|
+
model_to_use = kwargs.get("model", self.model_name)
|
|
339
|
+
if not model_to_use:
|
|
340
|
+
raise ValueError("Model name for embedding must be specified either in init or via kwargs.")
|
|
341
|
+
|
|
342
|
+
ollama_options = kwargs.get("options", None)
|
|
343
|
+
try:
|
|
344
|
+
response = self.ollama_client.embeddings(
|
|
345
|
+
model=model_to_use,
|
|
346
|
+
prompt=text,
|
|
347
|
+
options=ollama_options
|
|
348
|
+
)
|
|
349
|
+
return response['embedding']
|
|
350
|
+
except ollama.ResponseError as e:
|
|
351
|
+
error_message = f"Ollama API Embeddings ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
352
|
+
ASCIIColors.error(error_message)
|
|
353
|
+
raise Exception(error_message) from e
|
|
354
|
+
except ollama.RequestError as e:
|
|
355
|
+
error_message = f"Ollama API Embeddings RequestError: {str(e)}"
|
|
356
|
+
ASCIIColors.error(error_message)
|
|
357
|
+
raise Exception(error_message) from e
|
|
358
|
+
except Exception as ex:
|
|
359
|
+
trace_exception(ex)
|
|
360
|
+
raise Exception(f"Embedding failed: {str(ex)}") from ex
|
|
361
|
+
|
|
362
|
+
def get_model_info(self) -> dict:
|
|
363
|
+
"""
|
|
364
|
+
Return information about the current Ollama model setup.
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
dict: Dictionary containing binding name, version, host address, and model name.
|
|
368
|
+
"""
|
|
369
|
+
return {
|
|
370
|
+
"name": self.binding_name, # from super class
|
|
371
|
+
"version": ollama.__version__ if ollama else "unknown", # Ollama library version
|
|
372
|
+
"host_address": self.host_address,
|
|
373
|
+
"model_name": self.model_name,
|
|
374
|
+
"supports_structured_output": False, # Ollama primarily supports text/chat
|
|
375
|
+
"supports_vision": True # Many Ollama models (e.g. llava, bakllava) support vision
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
def listModels(self) -> List[Dict[str, str]]:
|
|
379
|
+
"""
|
|
380
|
+
Lists available models from the Ollama service using the ollama-python library.
|
|
381
|
+
The returned list of dictionaries matches the format of the original template.
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
List[Dict[str, str]]: A list of model information dictionaries.
|
|
385
|
+
Each dict has 'model_name', 'owned_by', 'created_datetime'.
|
|
386
|
+
"""
|
|
387
|
+
if not self.ollama_client:
|
|
388
|
+
ASCIIColors.error("Ollama client not initialized. Cannot list models.")
|
|
389
|
+
return []
|
|
390
|
+
try:
|
|
391
|
+
ASCIIColors.debug(f"Listing ollama models from {self.host_address}")
|
|
392
|
+
response_data = self.ollama_client.list() # This returns {'models': [{'name':..., 'modified_at':..., ...}]}
|
|
393
|
+
|
|
394
|
+
model_info_list = []
|
|
395
|
+
if 'models' in response_data:
|
|
396
|
+
for model_entry in response_data['models']:
|
|
397
|
+
model_info_list.append({
|
|
398
|
+
'model_name': model_entry.get('model'),
|
|
399
|
+
'owned_by': "", # Ollama API doesn't provide a direct "owned_by" field.
|
|
400
|
+
'created_datetime': model_entry.get('modified_at')
|
|
401
|
+
})
|
|
402
|
+
return model_info_list
|
|
403
|
+
except ollama.ResponseError as e:
|
|
404
|
+
ASCIIColors.error(f"Ollama API listModels ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code}) from {self.host_address}")
|
|
405
|
+
return []
|
|
406
|
+
except ollama.RequestError as e: # Covers connection errors, timeouts during request
|
|
407
|
+
ASCIIColors.error(f"Ollama API listModels RequestError: {str(e)} from {self.host_address}")
|
|
408
|
+
return []
|
|
409
|
+
except Exception as ex:
|
|
410
|
+
trace_exception(ex)
|
|
411
|
+
return []
|
|
412
|
+
|
|
413
|
+
def load_model(self, model_name: str) -> bool:
|
|
414
|
+
"""
|
|
415
|
+
Set the model name for subsequent operations. Ollama loads models on demand.
|
|
416
|
+
This method can be used to verify if a model exists by attempting a small operation,
|
|
417
|
+
but for now, it just sets the name.
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
model_name (str): Name of the model to set.
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
bool: True if model name is set.
|
|
424
|
+
"""
|
|
425
|
+
self.model_name = model_name
|
|
426
|
+
# Optionally, you could try a quick self.ollama_client.show(model_name) to verify existence.
|
|
427
|
+
# For simplicity, we just set it.
|
|
428
|
+
ASCIIColors.info(f"Ollama model set to: {model_name}. It will be loaded by the server on first use.")
|
|
429
|
+
return True
|
|
430
|
+
|
|
431
|
+
if __name__ == '__main__':
|
|
432
|
+
global full_streamed_text
|
|
433
|
+
# Example Usage (requires an Ollama server running)
|
|
434
|
+
ASCIIColors.yellow("Testing OllamaBinding...")
|
|
435
|
+
|
|
436
|
+
# --- Configuration ---
|
|
437
|
+
# Replace with your Ollama server details if not localhost
|
|
438
|
+
ollama_host = "http://localhost:11434"
|
|
439
|
+
# Common model, pull it first: `ollama pull llama3` or `ollama pull llava` for vision
|
|
440
|
+
test_model_name = "llama3"
|
|
441
|
+
test_vision_model_name = "llava" # or another vision model you have
|
|
442
|
+
|
|
443
|
+
try:
|
|
444
|
+
# --- Initialization ---
|
|
445
|
+
ASCIIColors.cyan("\n--- Initializing Binding ---")
|
|
446
|
+
binding = OllamaBinding(host_address=ollama_host, model_name=test_model_name)
|
|
447
|
+
ASCIIColors.green("Binding initialized successfully.")
|
|
448
|
+
ASCIIColors.info(f"Using Ollama client version: {ollama.__version__ if ollama else 'N/A'}")
|
|
449
|
+
|
|
450
|
+
# --- List Models ---
|
|
451
|
+
ASCIIColors.cyan("\n--- Listing Models ---")
|
|
452
|
+
models = binding.listModels()
|
|
453
|
+
if models:
|
|
454
|
+
ASCIIColors.green(f"Found {len(models)} models. First 5:")
|
|
455
|
+
for m in models[:5]:
|
|
456
|
+
print(m)
|
|
457
|
+
else:
|
|
458
|
+
ASCIIColors.warning("No models found or failed to list models. Ensure Ollama is running and has models.")
|
|
459
|
+
|
|
460
|
+
# --- Load Model (sets active model) ---
|
|
461
|
+
ASCIIColors.cyan(f"\n--- Setting model to: {test_model_name} ---")
|
|
462
|
+
binding.load_model(test_model_name)
|
|
463
|
+
|
|
464
|
+
# --- Count Tokens ---
|
|
465
|
+
ASCIIColors.cyan("\n--- Counting Tokens ---")
|
|
466
|
+
sample_text = "Hello, world! This is a test."
|
|
467
|
+
token_count = binding.count_tokens(sample_text)
|
|
468
|
+
ASCIIColors.green(f"Token count for '{sample_text}': {token_count}")
|
|
469
|
+
|
|
470
|
+
# --- Tokenize/Detokenize (using server for tokenize) ---
|
|
471
|
+
ASCIIColors.cyan("\n--- Tokenize/Detokenize ---")
|
|
472
|
+
tokens = binding.tokenize(sample_text)
|
|
473
|
+
ASCIIColors.green(f"Tokens for '{sample_text}': {tokens[:10]}...") # Print first 10
|
|
474
|
+
detokenized_text = binding.detokenize(tokens)
|
|
475
|
+
# Note: detokenize might not be perfect if tokens are IDs and not chars
|
|
476
|
+
ASCIIColors.green(f"Detokenized text (may vary based on tokenization type): {detokenized_text}")
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
# --- Text Generation (Non-Streaming) ---
|
|
480
|
+
ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
|
|
481
|
+
prompt_text = "Why is the sky blue?"
|
|
482
|
+
ASCIIColors.info(f"Prompt: {prompt_text}")
|
|
483
|
+
generated_text = binding.generate_text(prompt_text, n_predict=50, stream=False)
|
|
484
|
+
if isinstance(generated_text, str):
|
|
485
|
+
ASCIIColors.green(f"Generated text: {generated_text}")
|
|
486
|
+
else:
|
|
487
|
+
ASCIIColors.error(f"Generation failed: {generated_text}")
|
|
488
|
+
|
|
489
|
+
# --- Text Generation (Streaming) ---
|
|
490
|
+
ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
|
|
491
|
+
full_streamed_text = ""
|
|
492
|
+
def stream_callback(chunk: str, msg_type: int):
|
|
493
|
+
global full_streamed_text
|
|
494
|
+
print(f"{ASCIIColors.GREEN}Stream chunk: {chunk}{ASCIIColors.RESET}", end="", flush=True)
|
|
495
|
+
full_streamed_text += chunk
|
|
496
|
+
if len(full_streamed_text) > 100: # Example: stop after 100 chars for test
|
|
497
|
+
# print("\nStopping stream early for test.")
|
|
498
|
+
# return False # uncomment to test early stop
|
|
499
|
+
pass
|
|
500
|
+
return True
|
|
501
|
+
|
|
502
|
+
ASCIIColors.info(f"Prompt: {prompt_text}")
|
|
503
|
+
result = binding.generate_text(prompt_text, n_predict=100, stream=True, streaming_callback=stream_callback)
|
|
504
|
+
print("\n--- End of Stream ---")
|
|
505
|
+
if isinstance(result, str):
|
|
506
|
+
ASCIIColors.green(f"Full streamed text: {result}") # 'result' is the full_streamed_text
|
|
507
|
+
else:
|
|
508
|
+
ASCIIColors.error(f"Streaming generation failed: {result}")
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
# --- Embeddings ---
|
|
512
|
+
ASCIIColors.cyan("\n--- Embeddings ---")
|
|
513
|
+
# Ensure you have an embedding model like 'mxbai-embed-large' or 'nomic-embed-text'
|
|
514
|
+
# Or use a general model if it supports embedding (some do implicitly)
|
|
515
|
+
# For this test, we'll try with the current test_model_name,
|
|
516
|
+
# but ideally use a dedicated embedding model.
|
|
517
|
+
# binding.load_model("mxbai-embed-large") # if you have it
|
|
518
|
+
try:
|
|
519
|
+
embedding_text = "Lollms is a cool project."
|
|
520
|
+
embedding_vector = binding.embed(embedding_text) # Uses current self.model_name
|
|
521
|
+
ASCIIColors.green(f"Embedding for '{embedding_text}' (first 5 dims): {embedding_vector[:5]}...")
|
|
522
|
+
ASCIIColors.info(f"Embedding vector dimension: {len(embedding_vector)}")
|
|
523
|
+
except Exception as e:
|
|
524
|
+
ASCIIColors.warning(f"Could not get embedding with '{binding.model_name}': {e}. Some models don't support /api/embeddings or may need to be specified.")
|
|
525
|
+
ASCIIColors.warning("Try `ollama pull mxbai-embed-large` and set it as model for embedding.")
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
# --- Vision Model Test (if llava or similar is available) ---
|
|
529
|
+
# Create a dummy image file for testing
|
|
530
|
+
dummy_image_path = "dummy_test_image.png"
|
|
531
|
+
try:
|
|
532
|
+
from PIL import Image, ImageDraw
|
|
533
|
+
img = Image.new('RGB', (100, 30), color = ('red'))
|
|
534
|
+
d = ImageDraw.Draw(img)
|
|
535
|
+
d.text((10,10), "Hello", fill=('white'))
|
|
536
|
+
img.save(dummy_image_path)
|
|
537
|
+
ASCIIColors.info(f"Created dummy image: {dummy_image_path}")
|
|
538
|
+
|
|
539
|
+
ASCIIColors.cyan(f"\n--- Vision Generation (using {test_vision_model_name}) ---")
|
|
540
|
+
# Check if vision model exists
|
|
541
|
+
vision_model_exists = any(m['model_name'].startswith(test_vision_model_name) for m in models)
|
|
542
|
+
if not vision_model_exists:
|
|
543
|
+
ASCIIColors.warning(f"Vision model '{test_vision_model_name}' not found in pulled models. Skipping vision test.")
|
|
544
|
+
ASCIIColors.warning(f"Try: `ollama pull {test_vision_model_name}`")
|
|
545
|
+
else:
|
|
546
|
+
binding.load_model(test_vision_model_name) # Switch to vision model
|
|
547
|
+
vision_prompt = "What is written in this image?"
|
|
548
|
+
ASCIIColors.info(f"Vision Prompt: {vision_prompt} with image {dummy_image_path}")
|
|
549
|
+
|
|
550
|
+
vision_response = binding.generate_text(
|
|
551
|
+
prompt=vision_prompt,
|
|
552
|
+
images=[dummy_image_path],
|
|
553
|
+
n_predict=50,
|
|
554
|
+
stream=False
|
|
555
|
+
)
|
|
556
|
+
if isinstance(vision_response, str):
|
|
557
|
+
ASCIIColors.green(f"Vision model response: {vision_response}")
|
|
558
|
+
else:
|
|
559
|
+
ASCIIColors.error(f"Vision generation failed: {vision_response}")
|
|
560
|
+
except ImportError:
|
|
561
|
+
ASCIIColors.warning("Pillow library not found. Cannot create dummy image for vision test. `pip install Pillow`")
|
|
562
|
+
except Exception as e:
|
|
563
|
+
ASCIIColors.error(f"Error during vision test: {e}")
|
|
564
|
+
finally:
|
|
565
|
+
import os
|
|
566
|
+
if os.path.exists(dummy_image_path):
|
|
567
|
+
os.remove(dummy_image_path)
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
except ConnectionRefusedError:
|
|
571
|
+
ASCIIColors.error("Connection to Ollama server refused. Is Ollama running?")
|
|
572
|
+
except ImportError as e:
|
|
573
|
+
ASCIIColors.error(f"Import error: {e}. Make sure 'ollama' library is installed ('pip install ollama').")
|
|
574
|
+
except Exception as e:
|
|
575
|
+
ASCIIColors.error(f"An error occurred during testing: {e}")
|
|
576
|
+
trace_exception(e)
|
|
577
|
+
|
|
578
|
+
ASCIIColors.yellow("\nOllamaBinding test finished.")
|
|
@@ -199,6 +199,19 @@ class OpenAIBinding(LollmsLLMBinding):
|
|
|
199
199
|
return tiktoken.model.encoding_for_model(self.model_name).decode(tokens)
|
|
200
200
|
except:
|
|
201
201
|
return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
|
|
202
|
+
|
|
203
|
+
def count_tokens(self, text: str) -> int:
|
|
204
|
+
"""
|
|
205
|
+
Count tokens from a text.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
tokens (list): List of tokens to detokenize.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
int: Number of tokens in text.
|
|
212
|
+
"""
|
|
213
|
+
return len(self.tokenize(text))
|
|
214
|
+
|
|
202
215
|
|
|
203
216
|
def embed(self, text: str, **kwargs) -> list:
|
|
204
217
|
"""
|
{lollms_client-0.12.2 → lollms_client-0.12.4}/lollms_client/llm_bindings/transformers/__init__.py
RENAMED
|
@@ -254,6 +254,19 @@ class TransformersBinding(LollmsLLMBinding):
|
|
|
254
254
|
"""Convert a list of tokens back to text."""
|
|
255
255
|
return "".join(tokens)
|
|
256
256
|
|
|
257
|
+
def count_tokens(self, text: str) -> int:
|
|
258
|
+
"""
|
|
259
|
+
Count tokens from a text.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
tokens (list): List of tokens to detokenize.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
int: Number of tokens in text.
|
|
266
|
+
"""
|
|
267
|
+
return len(self.tokenize(text))
|
|
268
|
+
|
|
269
|
+
|
|
257
270
|
def embed(self, text: str, **kwargs) -> list:
|
|
258
271
|
"""Get embeddings for the input text (placeholder)."""
|
|
259
272
|
pass
|
|
@@ -103,6 +103,19 @@ class LollmsLLMBinding(ABC):
|
|
|
103
103
|
str: Detokenized text.
|
|
104
104
|
"""
|
|
105
105
|
pass
|
|
106
|
+
|
|
107
|
+
@abstractmethod
|
|
108
|
+
def count_tokens(self, text: str) -> int:
|
|
109
|
+
"""
|
|
110
|
+
Count tokens from a text.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
tokens (list): List of tokens to detokenize.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
int: Number of tokens in text.
|
|
117
|
+
"""
|
|
118
|
+
pass
|
|
106
119
|
|
|
107
120
|
@abstractmethod
|
|
108
121
|
def embed(self, text: str, **kwargs) -> list:
|
|
@@ -1,298 +0,0 @@
|
|
|
1
|
-
# bindings/ollama/binding.py
|
|
2
|
-
import requests
|
|
3
|
-
import json
|
|
4
|
-
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
5
|
-
from lollms_client.lollms_types import MSG_TYPE
|
|
6
|
-
from lollms_client.lollms_utilities import encode_image
|
|
7
|
-
from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
|
|
8
|
-
from typing import Optional, Callable, List, Union
|
|
9
|
-
from ascii_colors import ASCIIColors, trace_exception
|
|
10
|
-
|
|
11
|
-
BindingName = "OllamaBinding"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class OllamaBinding(LollmsLLMBinding):
|
|
15
|
-
"""Ollama-specific binding implementation"""
|
|
16
|
-
|
|
17
|
-
DEFAULT_HOST_ADDRESS = "http://localhost:11434"
|
|
18
|
-
|
|
19
|
-
def __init__(self,
|
|
20
|
-
host_address: str = None,
|
|
21
|
-
model_name: str = "",
|
|
22
|
-
service_key: str = None,
|
|
23
|
-
verify_ssl_certificate: bool = True,
|
|
24
|
-
default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat
|
|
25
|
-
):
|
|
26
|
-
"""
|
|
27
|
-
Initialize the Ollama binding.
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
host_address (str): Host address for the Ollama service. Defaults to DEFAULT_HOST_ADDRESS.
|
|
31
|
-
model_name (str): Name of the model to use. Defaults to empty string.
|
|
32
|
-
service_key (str): Authentication key for the service. Defaults to None.
|
|
33
|
-
verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
|
|
34
|
-
personality (Optional[int]): Ignored parameter for compatibility with LollmsLLMBinding.
|
|
35
|
-
"""
|
|
36
|
-
super().__init__(
|
|
37
|
-
binding_name="ollama",
|
|
38
|
-
host_address=host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS,
|
|
39
|
-
model_name=model_name,
|
|
40
|
-
service_key=service_key,
|
|
41
|
-
verify_ssl_certificate=verify_ssl_certificate,
|
|
42
|
-
default_completion_format=default_completion_format
|
|
43
|
-
)
|
|
44
|
-
self.model = None
|
|
45
|
-
|
|
46
|
-
def generate_text(self,
|
|
47
|
-
prompt: str,
|
|
48
|
-
images: Optional[List[str]] = None,
|
|
49
|
-
n_predict: Optional[int] = None,
|
|
50
|
-
stream: bool = False,
|
|
51
|
-
temperature: float = 0.1,
|
|
52
|
-
top_k: int = 50,
|
|
53
|
-
top_p: float = 0.95,
|
|
54
|
-
repeat_penalty: float = 0.8,
|
|
55
|
-
repeat_last_n: int = 40,
|
|
56
|
-
seed: Optional[int] = None,
|
|
57
|
-
n_threads: int = 8,
|
|
58
|
-
ctx_size: int | None = None,
|
|
59
|
-
streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
|
|
60
|
-
"""
|
|
61
|
-
Generate text using the Ollama service, with optional image support.
|
|
62
|
-
|
|
63
|
-
Args:
|
|
64
|
-
prompt (str): The input prompt for text generation.
|
|
65
|
-
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
66
|
-
If provided, uses the /api endpoint with message format.
|
|
67
|
-
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
68
|
-
stream (bool): Whether to stream the output. Defaults to False.
|
|
69
|
-
temperature (float): Sampling temperature. Defaults to 0.1.
|
|
70
|
-
top_k (int): Top-k sampling parameter. Defaults to 50 (not used in Ollama API directly).
|
|
71
|
-
top_p (float): Top-p sampling parameter. Defaults to 0.95 (not used in Ollama API directly).
|
|
72
|
-
repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8 (not used in Ollama API directly).
|
|
73
|
-
repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40 (not used).
|
|
74
|
-
seed (Optional[int]): Random seed for generation.
|
|
75
|
-
n_threads (int): Number of threads to use. Defaults to 8 (not used in Ollama API directly).
|
|
76
|
-
streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
|
|
77
|
-
- First parameter (str): The chunk of text received from the stream.
|
|
78
|
-
- Second parameter (str): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
|
|
79
|
-
|
|
80
|
-
Returns:
|
|
81
|
-
Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
|
|
82
|
-
|
|
83
|
-
Note:
|
|
84
|
-
Some parameters (top_k, top_p, repeat_penalty, repeat_last_n, n_threads) are included for interface
|
|
85
|
-
consistency but are not directly used in the Ollama API implementation.
|
|
86
|
-
"""
|
|
87
|
-
# Set headers
|
|
88
|
-
headers = {
|
|
89
|
-
'Content-Type': 'application/json',
|
|
90
|
-
}
|
|
91
|
-
if self.service_key:
|
|
92
|
-
headers['Authorization'] = f'Bearer {self.service_key}'
|
|
93
|
-
|
|
94
|
-
# Clean host address
|
|
95
|
-
host_address = self.host_address.rstrip('/')
|
|
96
|
-
|
|
97
|
-
# Prepare data based on whether images are provided
|
|
98
|
-
if images:
|
|
99
|
-
# Multimodal generation using /api endpoint
|
|
100
|
-
images_list = [encode_image(image_path) for image_path in images]
|
|
101
|
-
data = {
|
|
102
|
-
'model': self.model_name,
|
|
103
|
-
'messages': [{
|
|
104
|
-
"role": "user",
|
|
105
|
-
"content": [
|
|
106
|
-
{"type": "text", "text": prompt}
|
|
107
|
-
] + [
|
|
108
|
-
{
|
|
109
|
-
"type": "image_url",
|
|
110
|
-
"image_url": {"url": f"data:image/jpeg;base64,{img}"}
|
|
111
|
-
} for img in images_list
|
|
112
|
-
]
|
|
113
|
-
}],
|
|
114
|
-
"stream": stream,
|
|
115
|
-
"temperature": float(temperature),
|
|
116
|
-
"max_tokens": n_predict,
|
|
117
|
-
}
|
|
118
|
-
if ctx_size is not None:
|
|
119
|
-
data["num_ctx"] = ctx_size
|
|
120
|
-
url = f'{host_address}/api/chat'
|
|
121
|
-
else:
|
|
122
|
-
# Text-only generation using /api/generate endpoint
|
|
123
|
-
data = {
|
|
124
|
-
'model': self.model_name,
|
|
125
|
-
'prompt': prompt,
|
|
126
|
-
"stream": stream,
|
|
127
|
-
"temperature": float(temperature),
|
|
128
|
-
"max_tokens": n_predict
|
|
129
|
-
}
|
|
130
|
-
url = f'{host_address}/api/generate'
|
|
131
|
-
|
|
132
|
-
# Make the request
|
|
133
|
-
response = requests.post(url, json=data, headers=headers, stream=stream)
|
|
134
|
-
|
|
135
|
-
# Handle response
|
|
136
|
-
if not stream:
|
|
137
|
-
if response.status_code == 200:
|
|
138
|
-
try:
|
|
139
|
-
if images:
|
|
140
|
-
# For multimodal, response is in chat format
|
|
141
|
-
return response.json()["message"]["content"]
|
|
142
|
-
else:
|
|
143
|
-
# For text-only
|
|
144
|
-
return response.json()["response"]
|
|
145
|
-
except Exception as ex:
|
|
146
|
-
return {"status": False, "error": str(ex)}
|
|
147
|
-
elif response.status_code == 404:
|
|
148
|
-
ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
|
|
149
|
-
return {"status": False, "error": "404 Not Found"}
|
|
150
|
-
else:
|
|
151
|
-
return {"status": False, "error": response.text}
|
|
152
|
-
else:
|
|
153
|
-
text = ""
|
|
154
|
-
if response.status_code == 200:
|
|
155
|
-
try:
|
|
156
|
-
for line in response.iter_lines():
|
|
157
|
-
decoded = line.decode("utf-8")
|
|
158
|
-
if images:
|
|
159
|
-
# Streaming with images (chat format)
|
|
160
|
-
if decoded.startswith("data: "):
|
|
161
|
-
json_data = json.loads(decoded[5:].strip())
|
|
162
|
-
chunk = json_data["message"]["content"] if "message" in json_data else ""
|
|
163
|
-
else:
|
|
164
|
-
continue
|
|
165
|
-
else:
|
|
166
|
-
# Streaming without images (generate format)
|
|
167
|
-
json_data = json.loads(decoded)
|
|
168
|
-
chunk = json_data["response"]
|
|
169
|
-
|
|
170
|
-
text += chunk
|
|
171
|
-
if streaming_callback:
|
|
172
|
-
if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
173
|
-
break
|
|
174
|
-
return text
|
|
175
|
-
except Exception as ex:
|
|
176
|
-
return {"status": False, "error": str(ex)}
|
|
177
|
-
elif response.status_code == 404:
|
|
178
|
-
ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
|
|
179
|
-
return {"status": False, "error": "404 Not Found"}
|
|
180
|
-
elif response.status_code == 400:
|
|
181
|
-
try:
|
|
182
|
-
content = json.loads(response.content.decode("utf8"))
|
|
183
|
-
return {"status": False, "error": content.get("error", {}).get("message", content.get("message", "Unknown error"))}
|
|
184
|
-
except:
|
|
185
|
-
return {"status": False, "error": response.content.decode("utf8")}
|
|
186
|
-
else:
|
|
187
|
-
return {"status": False, "error": response.text}
|
|
188
|
-
|
|
189
|
-
def tokenize(self, text: str) -> list:
|
|
190
|
-
"""
|
|
191
|
-
Tokenize the input text into a list of characters.
|
|
192
|
-
|
|
193
|
-
Args:
|
|
194
|
-
text (str): The text to tokenize.
|
|
195
|
-
|
|
196
|
-
Returns:
|
|
197
|
-
list: List of individual characters.
|
|
198
|
-
"""
|
|
199
|
-
return list(text)
|
|
200
|
-
|
|
201
|
-
def detokenize(self, tokens: list) -> str:
|
|
202
|
-
"""
|
|
203
|
-
Convert a list of tokens back to text.
|
|
204
|
-
|
|
205
|
-
Args:
|
|
206
|
-
tokens (list): List of tokens (characters) to detokenize.
|
|
207
|
-
|
|
208
|
-
Returns:
|
|
209
|
-
str: Detokenized text.
|
|
210
|
-
"""
|
|
211
|
-
return "".join(tokens)
|
|
212
|
-
|
|
213
|
-
def embed(self, text: str, **kwargs) -> list:
|
|
214
|
-
"""
|
|
215
|
-
Get embeddings for the input text using Ollama API
|
|
216
|
-
|
|
217
|
-
Args:
|
|
218
|
-
text (str or List[str]): Input text to embed
|
|
219
|
-
**kwargs: Additional arguments like model, truncate, options, keep_alive
|
|
220
|
-
|
|
221
|
-
Returns:
|
|
222
|
-
dict: Response containing embeddings
|
|
223
|
-
"""
|
|
224
|
-
import requests
|
|
225
|
-
|
|
226
|
-
url = f"{self.base_url}/api/embed"
|
|
227
|
-
|
|
228
|
-
# Prepare the request payload
|
|
229
|
-
payload = {
|
|
230
|
-
"input": text,
|
|
231
|
-
"model": kwargs.get("model", "llama2") # default model
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
# Add optional parameters if provided
|
|
235
|
-
if "truncate" in kwargs:
|
|
236
|
-
payload["truncate"] = kwargs["truncate"]
|
|
237
|
-
if "options" in kwargs:
|
|
238
|
-
payload["options"] = kwargs["options"]
|
|
239
|
-
if "keep_alive" in kwargs:
|
|
240
|
-
payload["keep_alive"] = kwargs["keep_alive"]
|
|
241
|
-
|
|
242
|
-
try:
|
|
243
|
-
response = requests.post(url, json=payload)
|
|
244
|
-
response.raise_for_status() # Raise exception for bad status codes
|
|
245
|
-
return response.json()
|
|
246
|
-
except requests.exceptions.RequestException as e:
|
|
247
|
-
raise Exception(f"Embedding request failed: {str(e)}")
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
def get_model_info(self) -> dict:
|
|
251
|
-
"""
|
|
252
|
-
Return information about the current Ollama model.
|
|
253
|
-
|
|
254
|
-
Returns:
|
|
255
|
-
dict: Dictionary containing model name, version, and host address.
|
|
256
|
-
"""
|
|
257
|
-
return {
|
|
258
|
-
"name": "ollama",
|
|
259
|
-
"version": "2.0",
|
|
260
|
-
"host_address": self.host_address,
|
|
261
|
-
"model_name": self.model_name
|
|
262
|
-
}
|
|
263
|
-
def listModels(self):
|
|
264
|
-
""" Lists available models """
|
|
265
|
-
url = f'{self.host_address}/api/tags'
|
|
266
|
-
headers = {
|
|
267
|
-
'accept': 'application/json',
|
|
268
|
-
'Authorization': f'Bearer {self.service_key}'
|
|
269
|
-
}
|
|
270
|
-
response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
|
|
271
|
-
try:
|
|
272
|
-
ASCIIColors.debug("Listing ollama models")
|
|
273
|
-
data = response.json()
|
|
274
|
-
model_info = []
|
|
275
|
-
|
|
276
|
-
for model in data['models']:
|
|
277
|
-
model_name = model['name']
|
|
278
|
-
owned_by = ""
|
|
279
|
-
created_datetime = model["modified_at"]
|
|
280
|
-
model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
|
|
281
|
-
|
|
282
|
-
return model_info
|
|
283
|
-
except Exception as ex:
|
|
284
|
-
trace_exception(ex)
|
|
285
|
-
return []
|
|
286
|
-
def load_model(self, model_name: str) -> bool:
|
|
287
|
-
"""
|
|
288
|
-
Load a specific model into the Ollama binding.
|
|
289
|
-
|
|
290
|
-
Args:
|
|
291
|
-
model_name (str): Name of the model to load.
|
|
292
|
-
|
|
293
|
-
Returns:
|
|
294
|
-
bool: True if model loaded successfully.
|
|
295
|
-
"""
|
|
296
|
-
self.model = model_name
|
|
297
|
-
self.model_name = model_name
|
|
298
|
-
return True
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|