lollms-client 0.9.2__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/__init__.py +1 -0
- lollms_client/llm_bindings/lollms/__init__.py +301 -0
- lollms_client/llm_bindings/ollama/__init__.py +293 -0
- lollms_client/llm_bindings/openai/__init__.py +260 -0
- lollms_client/llm_bindings/transformers/__init__.py +281 -0
- lollms_client/lollms_core.py +179 -1252
- lollms_client/lollms_llm_binding.py +210 -0
- lollms_client/lollms_tasks.py +42 -109
- lollms_client/lollms_tts.py +7 -3
- lollms_client/lollms_types.py +19 -1
- lollms_client/stt_bindings/__init__.py +0 -0
- lollms_client/stt_bindings/lollms/__init__.py +0 -0
- lollms_client/tti_bindings/__init__.py +0 -0
- lollms_client/tti_bindings/lollms/__init__.py +0 -0
- lollms_client/tts_bindings/__init__.py +0 -0
- lollms_client/tts_bindings/lollms/__init__.py +0 -0
- lollms_client/ttv_bindings/__init__.py +0 -0
- lollms_client/ttv_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.9.2.dist-info → lollms_client-0.10.0.dist-info}/METADATA +16 -12
- lollms_client-0.10.0.dist-info/RECORD +34 -0
- lollms_client-0.9.2.dist-info/RECORD +0 -20
- {lollms_client-0.9.2.dist-info → lollms_client-0.10.0.dist-info}/LICENSE +0 -0
- {lollms_client-0.9.2.dist-info → lollms_client-0.10.0.dist-info}/WHEEL +0 -0
- {lollms_client-0.9.2.dist-info → lollms_client-0.10.0.dist-info}/top_level.txt +0 -0
lollms_client/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from lollms_client.lollms_core import LollmsClient,
|
|
1
|
+
from lollms_client.lollms_core import LollmsClient, ELF_COMPLETION_FORMAT
|
|
2
2
|
from lollms_client.lollms_tasks import TasksLibrary
|
|
3
3
|
from lollms_client.lollms_types import MSG_TYPE
|
|
4
4
|
from lollms_client.lollms_personality import LollmsPersonality
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# to be done
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
# bindings/lollms/binding.py
|
|
2
|
+
import requests
|
|
3
|
+
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
4
|
+
from lollms_client.lollms_types import MSG_TYPE
|
|
5
|
+
from lollms_client.lollms_utilities import encode_image
|
|
6
|
+
from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
|
|
7
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
8
|
+
from typing import Optional, Callable, List, Union
|
|
9
|
+
import json
|
|
10
|
+
|
|
11
|
+
BindingName = "LollmsLLMBinding"
|
|
12
|
+
|
|
13
|
+
class LollmsLLMBinding(LollmsLLMBinding):
|
|
14
|
+
"""LOLLMS-specific binding implementation"""
|
|
15
|
+
|
|
16
|
+
DEFAULT_HOST_ADDRESS = "http://localhost:9600"
|
|
17
|
+
|
|
18
|
+
def __init__(self,
|
|
19
|
+
host_address: str = None,
|
|
20
|
+
model_name: str = "",
|
|
21
|
+
service_key: str = None,
|
|
22
|
+
verify_ssl_certificate: bool = True,
|
|
23
|
+
personality: Optional[int] = None,
|
|
24
|
+
default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat):
|
|
25
|
+
"""
|
|
26
|
+
Initialize the LOLLMS binding.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
host_address (str): Host address for the LOLLMS service. Defaults to DEFAULT_HOST_ADDRESS.
|
|
30
|
+
model_name (str): Name of the model to use. Defaults to empty string.
|
|
31
|
+
service_key (str): Authentication key for the service. Defaults to None.
|
|
32
|
+
verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
|
|
33
|
+
personality (Optional[int]): Personality ID for generation. Defaults to None.
|
|
34
|
+
"""
|
|
35
|
+
super().__init__(
|
|
36
|
+
host_address=host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS,
|
|
37
|
+
model_name=model_name,
|
|
38
|
+
service_key=service_key,
|
|
39
|
+
verify_ssl_certificate=verify_ssl_certificate,
|
|
40
|
+
default_completion_format=default_completion_format
|
|
41
|
+
)
|
|
42
|
+
self.personality = personality
|
|
43
|
+
self.model = None
|
|
44
|
+
|
|
45
|
+
def generate_text(self,
|
|
46
|
+
prompt: str,
|
|
47
|
+
images: Optional[List[str]] = None,
|
|
48
|
+
n_predict: Optional[int] = None,
|
|
49
|
+
stream: bool = False,
|
|
50
|
+
temperature: float = 0.1,
|
|
51
|
+
top_k: int = 50,
|
|
52
|
+
top_p: float = 0.95,
|
|
53
|
+
repeat_penalty: float = 0.8,
|
|
54
|
+
repeat_last_n: int = 40,
|
|
55
|
+
seed: Optional[int] = None,
|
|
56
|
+
n_threads: int = 8,
|
|
57
|
+
streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
|
|
58
|
+
"""
|
|
59
|
+
Generate text using the LOLLMS service, with optional image support.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
prompt (str): The input prompt for text generation.
|
|
63
|
+
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
64
|
+
If provided, uses the /lollms_generate_with_images endpoint.
|
|
65
|
+
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
66
|
+
stream (bool): Whether to stream the output. Defaults to False.
|
|
67
|
+
temperature (float): Sampling temperature. Defaults to 0.1.
|
|
68
|
+
top_k (int): Top-k sampling parameter. Defaults to 50.
|
|
69
|
+
top_p (float): Top-p sampling parameter. Defaults to 0.95.
|
|
70
|
+
repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8.
|
|
71
|
+
repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
|
|
72
|
+
seed (Optional[int]): Random seed for generation.
|
|
73
|
+
n_threads (int): Number of threads to use. Defaults to 8.
|
|
74
|
+
streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
|
|
75
|
+
- First parameter (str): The chunk of text received from the stream.
|
|
76
|
+
- Second parameter (str): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
|
|
80
|
+
"""
|
|
81
|
+
# Determine endpoint based on presence of images
|
|
82
|
+
endpoint = "/lollms_generate_with_images" if images else "/lollms_generate"
|
|
83
|
+
url = f"{self.host_address}{endpoint}"
|
|
84
|
+
|
|
85
|
+
# Set headers
|
|
86
|
+
headers = {
|
|
87
|
+
'Content-Type': 'application/json',
|
|
88
|
+
}
|
|
89
|
+
if self.service_key:
|
|
90
|
+
headers['Authorization'] = f'Bearer {self.service_key}'
|
|
91
|
+
|
|
92
|
+
# Handle images if provided
|
|
93
|
+
image_data = []
|
|
94
|
+
if images:
|
|
95
|
+
for image_path in images:
|
|
96
|
+
try:
|
|
97
|
+
encoded_image = encode_image(image_path)
|
|
98
|
+
image_data.append(encoded_image)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
return {"status": False, "error": f"Failed to process image {image_path}: {str(e)}"}
|
|
101
|
+
|
|
102
|
+
# Prepare request data
|
|
103
|
+
data = {
|
|
104
|
+
"prompt": prompt,
|
|
105
|
+
"model_name": self.model_name,
|
|
106
|
+
"personality": self.personality,
|
|
107
|
+
"n_predict": n_predict,
|
|
108
|
+
"stream": stream,
|
|
109
|
+
"temperature": temperature,
|
|
110
|
+
"top_k": top_k,
|
|
111
|
+
"top_p": top_p,
|
|
112
|
+
"repeat_penalty": repeat_penalty,
|
|
113
|
+
"repeat_last_n": repeat_last_n,
|
|
114
|
+
"seed": seed,
|
|
115
|
+
"n_threads": n_threads
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if image_data:
|
|
119
|
+
data["images"] = image_data
|
|
120
|
+
|
|
121
|
+
# Make the request
|
|
122
|
+
response = requests.post(
|
|
123
|
+
url,
|
|
124
|
+
json=data,
|
|
125
|
+
headers=headers,
|
|
126
|
+
stream=stream,
|
|
127
|
+
verify=self.verify_ssl_certificate
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
if not stream:
|
|
131
|
+
if response.status_code == 200:
|
|
132
|
+
try:
|
|
133
|
+
text = response.text.strip().rstrip('!')
|
|
134
|
+
return text
|
|
135
|
+
except Exception as ex:
|
|
136
|
+
return {"status": False, "error": str(ex)}
|
|
137
|
+
else:
|
|
138
|
+
return {"status": False, "error": response.text}
|
|
139
|
+
else:
|
|
140
|
+
text = ""
|
|
141
|
+
if response.status_code == 200:
|
|
142
|
+
try:
|
|
143
|
+
for line in response.iter_lines():
|
|
144
|
+
chunk = line.decode("utf-8")
|
|
145
|
+
text += chunk
|
|
146
|
+
if streaming_callback:
|
|
147
|
+
streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK)
|
|
148
|
+
# Handle potential quotes from streaming response
|
|
149
|
+
if text and text[0] == '"':
|
|
150
|
+
text = text[1:]
|
|
151
|
+
if text and text[-1] == '"':
|
|
152
|
+
text = text[:-1]
|
|
153
|
+
return text.rstrip('!')
|
|
154
|
+
except Exception as ex:
|
|
155
|
+
return {"status": False, "error": str(ex)}
|
|
156
|
+
else:
|
|
157
|
+
return {"status": False, "error": response.text}
|
|
158
|
+
|
|
159
|
+
def tokenize(self, text: str) -> list:
|
|
160
|
+
"""
|
|
161
|
+
Tokenize the input text into a list of tokens using the /lollms_tokenize endpoint.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
text (str): The text to tokenize.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
list: List of tokens.
|
|
168
|
+
"""
|
|
169
|
+
try:
|
|
170
|
+
# Prepare the request payload
|
|
171
|
+
payload = {
|
|
172
|
+
"prompt": text,
|
|
173
|
+
"return_named": False # Set to True if you want named tokens
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
# Make the POST request to the /lollms_tokenize endpoint
|
|
177
|
+
response = requests.post(f"{self.host_address}/lollms_tokenize", json=payload)
|
|
178
|
+
|
|
179
|
+
# Check if the request was successful
|
|
180
|
+
if response.status_code == 200:
|
|
181
|
+
return response.json()
|
|
182
|
+
else:
|
|
183
|
+
raise Exception(f"Failed to tokenize text: {response.text}")
|
|
184
|
+
except Exception as ex:
|
|
185
|
+
trace_exception(ex)
|
|
186
|
+
raise Exception(f"Failed to tokenize text: {response.text}")
|
|
187
|
+
|
|
188
|
+
def detokenize(self, tokens: list) -> str:
|
|
189
|
+
"""
|
|
190
|
+
Convert a list of tokens back to text using the /lollms_detokenize endpoint.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
tokens (list): List of tokens to detokenize.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
str: Detokenized text.
|
|
197
|
+
"""
|
|
198
|
+
try:
|
|
199
|
+
# Prepare the request payload
|
|
200
|
+
payload = {
|
|
201
|
+
"tokens": tokens,
|
|
202
|
+
"return_named": False # Set to True if you want named tokens
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
# Make the POST request to the /lollms_detokenize endpoint
|
|
206
|
+
response = requests.post(f"{self.host_address}/lollms_detokenize", json=payload)
|
|
207
|
+
|
|
208
|
+
# Check if the request was successful
|
|
209
|
+
if response.status_code == 200:
|
|
210
|
+
return response.json()
|
|
211
|
+
else:
|
|
212
|
+
raise Exception(f"Failed to detokenize tokens: {response.text}")
|
|
213
|
+
except Exception as ex:
|
|
214
|
+
return {"status": False, "error": str(ex)}
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def embed(self, text: str, **kwargs) -> list:
|
|
218
|
+
"""
|
|
219
|
+
Get embeddings for the input text using Ollama API
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
text (str or List[str]): Input text to embed
|
|
223
|
+
**kwargs: Additional arguments like model, truncate, options, keep_alive
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
dict: Response containing embeddings
|
|
227
|
+
"""
|
|
228
|
+
api_key = kwargs.pop("api_key", None)
|
|
229
|
+
headers = (
|
|
230
|
+
{"Content-Type": "application/json", "Authorization": api_key}
|
|
231
|
+
if api_key
|
|
232
|
+
else {"Content-Type": "application/json"}
|
|
233
|
+
)
|
|
234
|
+
embeddings = []
|
|
235
|
+
request_data = {"text": text}
|
|
236
|
+
response = requests.post(f"{self.host_address}/lollms_embed", json=request_data, headers=headers)
|
|
237
|
+
response.raise_for_status()
|
|
238
|
+
result = response.json()
|
|
239
|
+
return result["vector"]
|
|
240
|
+
|
|
241
|
+
def get_model_info(self) -> dict:
|
|
242
|
+
"""
|
|
243
|
+
Return information about the current LOLLMS model.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
dict: Dictionary containing model name, version, host address, and personality.
|
|
247
|
+
"""
|
|
248
|
+
return {
|
|
249
|
+
"name": "lollms",
|
|
250
|
+
"version": "1.0",
|
|
251
|
+
"host_address": self.host_address,
|
|
252
|
+
"model_name": self.model_name,
|
|
253
|
+
"personality": self.personality
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def listModels(self) -> dict:
|
|
258
|
+
"""Lists models"""
|
|
259
|
+
url = f"{self.host_address}/list_models"
|
|
260
|
+
|
|
261
|
+
response = requests.get(url)
|
|
262
|
+
|
|
263
|
+
if response.status_code == 200:
|
|
264
|
+
try:
|
|
265
|
+
text = json.loads(response.content.decode("utf-8"))
|
|
266
|
+
return text
|
|
267
|
+
except Exception as ex:
|
|
268
|
+
return {"status": False, "error": str(ex)}
|
|
269
|
+
else:
|
|
270
|
+
return {"status": False, "error": response.text}
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def load_model(self, model_name: str) -> bool:
|
|
274
|
+
"""
|
|
275
|
+
Load a specific model into the LOLLMS binding.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
model_name (str): Name of the model to load.
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
bool: True if model loaded successfully.
|
|
282
|
+
"""
|
|
283
|
+
self.model = model_name
|
|
284
|
+
self.model_name = model_name
|
|
285
|
+
return True
|
|
286
|
+
|
|
287
|
+
# Lollms specific methods
|
|
288
|
+
def lollms_listMountedPersonalities(self, host_address:str=None):
|
|
289
|
+
host_address = host_address if host_address else self.host_address
|
|
290
|
+
url = f"{host_address}/list_mounted_personalities"
|
|
291
|
+
|
|
292
|
+
response = requests.get(url)
|
|
293
|
+
|
|
294
|
+
if response.status_code == 200:
|
|
295
|
+
try:
|
|
296
|
+
text = json.loads(response.content.decode("utf-8"))
|
|
297
|
+
return text
|
|
298
|
+
except Exception as ex:
|
|
299
|
+
return {"status": False, "error": str(ex)}
|
|
300
|
+
else:
|
|
301
|
+
return {"status": False, "error": response.text}
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
# bindings/ollama/binding.py
|
|
2
|
+
import requests
|
|
3
|
+
import json
|
|
4
|
+
from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
5
|
+
from lollms_client.lollms_types import MSG_TYPE
|
|
6
|
+
from lollms_client.lollms_utilities import encode_image
|
|
7
|
+
from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
|
|
8
|
+
from typing import Optional, Callable, List, Union
|
|
9
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
10
|
+
|
|
11
|
+
BindingName = "OllamaBinding"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OllamaBinding(LollmsLLMBinding):
|
|
15
|
+
"""Ollama-specific binding implementation"""
|
|
16
|
+
|
|
17
|
+
DEFAULT_HOST_ADDRESS = "http://localhost:11434"
|
|
18
|
+
|
|
19
|
+
def __init__(self,
|
|
20
|
+
host_address: str = None,
|
|
21
|
+
model_name: str = "",
|
|
22
|
+
service_key: str = None,
|
|
23
|
+
verify_ssl_certificate: bool = True,
|
|
24
|
+
default_completion_format: ELF_COMPLETION_FORMAT = ELF_COMPLETION_FORMAT.Chat
|
|
25
|
+
):
|
|
26
|
+
"""
|
|
27
|
+
Initialize the Ollama binding.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
host_address (str): Host address for the Ollama service. Defaults to DEFAULT_HOST_ADDRESS.
|
|
31
|
+
model_name (str): Name of the model to use. Defaults to empty string.
|
|
32
|
+
service_key (str): Authentication key for the service. Defaults to None.
|
|
33
|
+
verify_ssl_certificate (bool): Whether to verify SSL certificates. Defaults to True.
|
|
34
|
+
personality (Optional[int]): Ignored parameter for compatibility with LollmsLLMBinding.
|
|
35
|
+
"""
|
|
36
|
+
super().__init__(
|
|
37
|
+
host_address=host_address if host_address is not None else self.DEFAULT_HOST_ADDRESS,
|
|
38
|
+
model_name=model_name,
|
|
39
|
+
service_key=service_key,
|
|
40
|
+
verify_ssl_certificate=verify_ssl_certificate,
|
|
41
|
+
default_completion_format=default_completion_format
|
|
42
|
+
)
|
|
43
|
+
self.model = None
|
|
44
|
+
|
|
45
|
+
def generate_text(self,
|
|
46
|
+
prompt: str,
|
|
47
|
+
images: Optional[List[str]] = None,
|
|
48
|
+
n_predict: Optional[int] = None,
|
|
49
|
+
stream: bool = False,
|
|
50
|
+
temperature: float = 0.1,
|
|
51
|
+
top_k: int = 50,
|
|
52
|
+
top_p: float = 0.95,
|
|
53
|
+
repeat_penalty: float = 0.8,
|
|
54
|
+
repeat_last_n: int = 40,
|
|
55
|
+
seed: Optional[int] = None,
|
|
56
|
+
n_threads: int = 8,
|
|
57
|
+
streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
|
|
58
|
+
"""
|
|
59
|
+
Generate text using the Ollama service, with optional image support.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
prompt (str): The input prompt for text generation.
|
|
63
|
+
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
64
|
+
If provided, uses the /api endpoint with message format.
|
|
65
|
+
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
66
|
+
stream (bool): Whether to stream the output. Defaults to False.
|
|
67
|
+
temperature (float): Sampling temperature. Defaults to 0.1.
|
|
68
|
+
top_k (int): Top-k sampling parameter. Defaults to 50 (not used in Ollama API directly).
|
|
69
|
+
top_p (float): Top-p sampling parameter. Defaults to 0.95 (not used in Ollama API directly).
|
|
70
|
+
repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8 (not used in Ollama API directly).
|
|
71
|
+
repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40 (not used).
|
|
72
|
+
seed (Optional[int]): Random seed for generation.
|
|
73
|
+
n_threads (int): Number of threads to use. Defaults to 8 (not used in Ollama API directly).
|
|
74
|
+
streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
|
|
75
|
+
- First parameter (str): The chunk of text received from the stream.
|
|
76
|
+
- Second parameter (str): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
|
|
80
|
+
|
|
81
|
+
Note:
|
|
82
|
+
Some parameters (top_k, top_p, repeat_penalty, repeat_last_n, n_threads) are included for interface
|
|
83
|
+
consistency but are not directly used in the Ollama API implementation.
|
|
84
|
+
"""
|
|
85
|
+
# Set headers
|
|
86
|
+
headers = {
|
|
87
|
+
'Content-Type': 'application/json',
|
|
88
|
+
}
|
|
89
|
+
if self.service_key:
|
|
90
|
+
headers['Authorization'] = f'Bearer {self.service_key}'
|
|
91
|
+
|
|
92
|
+
# Clean host address
|
|
93
|
+
host_address = self.host_address.rstrip('/')
|
|
94
|
+
|
|
95
|
+
# Prepare data based on whether images are provided
|
|
96
|
+
if images:
|
|
97
|
+
# Multimodal generation using /api endpoint
|
|
98
|
+
images_list = [encode_image(image_path) for image_path in images]
|
|
99
|
+
data = {
|
|
100
|
+
'model': self.model_name,
|
|
101
|
+
'messages': [{
|
|
102
|
+
"role": "user",
|
|
103
|
+
"content": [
|
|
104
|
+
{"type": "text", "text": prompt}
|
|
105
|
+
] + [
|
|
106
|
+
{
|
|
107
|
+
"type": "image_url",
|
|
108
|
+
"image_url": {"url": f"data:image/jpeg;base64,{img}"}
|
|
109
|
+
} for img in images_list
|
|
110
|
+
]
|
|
111
|
+
}],
|
|
112
|
+
"stream": stream,
|
|
113
|
+
"temperature": float(temperature),
|
|
114
|
+
"max_tokens": n_predict
|
|
115
|
+
}
|
|
116
|
+
url = f'{host_address}/api/chat'
|
|
117
|
+
else:
|
|
118
|
+
# Text-only generation using /api/generate endpoint
|
|
119
|
+
data = {
|
|
120
|
+
'model': self.model_name,
|
|
121
|
+
'prompt': prompt,
|
|
122
|
+
"stream": stream,
|
|
123
|
+
"temperature": float(temperature),
|
|
124
|
+
"max_tokens": n_predict
|
|
125
|
+
}
|
|
126
|
+
url = f'{host_address}/api/generate'
|
|
127
|
+
|
|
128
|
+
# Make the request
|
|
129
|
+
response = requests.post(url, json=data, headers=headers, stream=stream)
|
|
130
|
+
|
|
131
|
+
# Handle response
|
|
132
|
+
if not stream:
|
|
133
|
+
if response.status_code == 200:
|
|
134
|
+
try:
|
|
135
|
+
if images:
|
|
136
|
+
# For multimodal, response is in chat format
|
|
137
|
+
return response.json()["message"]["content"]
|
|
138
|
+
else:
|
|
139
|
+
# For text-only
|
|
140
|
+
return response.json()["response"]
|
|
141
|
+
except Exception as ex:
|
|
142
|
+
return {"status": False, "error": str(ex)}
|
|
143
|
+
elif response.status_code == 404:
|
|
144
|
+
ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
|
|
145
|
+
return {"status": False, "error": "404 Not Found"}
|
|
146
|
+
else:
|
|
147
|
+
return {"status": False, "error": response.text}
|
|
148
|
+
else:
|
|
149
|
+
text = ""
|
|
150
|
+
if response.status_code == 200:
|
|
151
|
+
try:
|
|
152
|
+
for line in response.iter_lines():
|
|
153
|
+
decoded = line.decode("utf-8")
|
|
154
|
+
if images:
|
|
155
|
+
# Streaming with images (chat format)
|
|
156
|
+
if decoded.startswith("data: "):
|
|
157
|
+
json_data = json.loads(decoded[5:].strip())
|
|
158
|
+
chunk = json_data["message"]["content"] if "message" in json_data else ""
|
|
159
|
+
else:
|
|
160
|
+
continue
|
|
161
|
+
else:
|
|
162
|
+
# Streaming without images (generate format)
|
|
163
|
+
json_data = json.loads(decoded)
|
|
164
|
+
chunk = json_data["response"]
|
|
165
|
+
|
|
166
|
+
text += chunk
|
|
167
|
+
if streaming_callback:
|
|
168
|
+
if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
169
|
+
break
|
|
170
|
+
return text
|
|
171
|
+
except Exception as ex:
|
|
172
|
+
return {"status": False, "error": str(ex)}
|
|
173
|
+
elif response.status_code == 404:
|
|
174
|
+
ASCIIColors.error(response.content.decode("utf-8", errors='ignore'))
|
|
175
|
+
return {"status": False, "error": "404 Not Found"}
|
|
176
|
+
elif response.status_code == 400:
|
|
177
|
+
try:
|
|
178
|
+
content = json.loads(response.content.decode("utf8"))
|
|
179
|
+
return {"status": False, "error": content.get("error", {}).get("message", content.get("message", "Unknown error"))}
|
|
180
|
+
except:
|
|
181
|
+
return {"status": False, "error": response.content.decode("utf8")}
|
|
182
|
+
else:
|
|
183
|
+
return {"status": False, "error": response.text}
|
|
184
|
+
|
|
185
|
+
def tokenize(self, text: str) -> list:
|
|
186
|
+
"""
|
|
187
|
+
Tokenize the input text into a list of characters.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
text (str): The text to tokenize.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
list: List of individual characters.
|
|
194
|
+
"""
|
|
195
|
+
return list(text)
|
|
196
|
+
|
|
197
|
+
def detokenize(self, tokens: list) -> str:
|
|
198
|
+
"""
|
|
199
|
+
Convert a list of tokens back to text.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
tokens (list): List of tokens (characters) to detokenize.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
str: Detokenized text.
|
|
206
|
+
"""
|
|
207
|
+
return "".join(tokens)
|
|
208
|
+
|
|
209
|
+
def embed(self, text: str, **kwargs) -> list:
|
|
210
|
+
"""
|
|
211
|
+
Get embeddings for the input text using Ollama API
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
text (str or List[str]): Input text to embed
|
|
215
|
+
**kwargs: Additional arguments like model, truncate, options, keep_alive
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
dict: Response containing embeddings
|
|
219
|
+
"""
|
|
220
|
+
import requests
|
|
221
|
+
|
|
222
|
+
url = f"{self.base_url}/api/embed"
|
|
223
|
+
|
|
224
|
+
# Prepare the request payload
|
|
225
|
+
payload = {
|
|
226
|
+
"input": text,
|
|
227
|
+
"model": kwargs.get("model", "llama2") # default model
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
# Add optional parameters if provided
|
|
231
|
+
if "truncate" in kwargs:
|
|
232
|
+
payload["truncate"] = kwargs["truncate"]
|
|
233
|
+
if "options" in kwargs:
|
|
234
|
+
payload["options"] = kwargs["options"]
|
|
235
|
+
if "keep_alive" in kwargs:
|
|
236
|
+
payload["keep_alive"] = kwargs["keep_alive"]
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
response = requests.post(url, json=payload)
|
|
240
|
+
response.raise_for_status() # Raise exception for bad status codes
|
|
241
|
+
return response.json()
|
|
242
|
+
except requests.exceptions.RequestException as e:
|
|
243
|
+
raise Exception(f"Embedding request failed: {str(e)}")
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def get_model_info(self) -> dict:
|
|
247
|
+
"""
|
|
248
|
+
Return information about the current Ollama model.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
dict: Dictionary containing model name, version, and host address.
|
|
252
|
+
"""
|
|
253
|
+
return {
|
|
254
|
+
"name": "ollama",
|
|
255
|
+
"version": "2.0",
|
|
256
|
+
"host_address": self.host_address,
|
|
257
|
+
"model_name": self.model_name
|
|
258
|
+
}
|
|
259
|
+
def listModels(self):
|
|
260
|
+
""" Lists available models """
|
|
261
|
+
url = f'{self.host_address}/api/tags'
|
|
262
|
+
headers = {
|
|
263
|
+
'accept': 'application/json',
|
|
264
|
+
'Authorization': f'Bearer {self.service_key}'
|
|
265
|
+
}
|
|
266
|
+
response = requests.get(url, headers=headers, verify= self.verify_ssl_certificate)
|
|
267
|
+
try:
|
|
268
|
+
data = response.json()
|
|
269
|
+
model_info = []
|
|
270
|
+
|
|
271
|
+
for model in data['models']:
|
|
272
|
+
model_name = model['name']
|
|
273
|
+
owned_by = ""
|
|
274
|
+
created_datetime = model["modified_at"]
|
|
275
|
+
model_info.append({'model_name': model_name, 'owned_by': owned_by, 'created_datetime': created_datetime})
|
|
276
|
+
|
|
277
|
+
return model_info
|
|
278
|
+
except Exception as ex:
|
|
279
|
+
trace_exception(ex)
|
|
280
|
+
return []
|
|
281
|
+
def load_model(self, model_name: str) -> bool:
|
|
282
|
+
"""
|
|
283
|
+
Load a specific model into the Ollama binding.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
model_name (str): Name of the model to load.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
bool: True if model loaded successfully.
|
|
290
|
+
"""
|
|
291
|
+
self.model = model_name
|
|
292
|
+
self.model_name = model_name
|
|
293
|
+
return True
|