lollms-client 0.20.3__py3-none-any.whl → 0.20.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- examples/gradio_chat_app.py +228 -0
- examples/internet_search_with_rag.py +1 -2
- examples/run_remote_mcp_example copy.py +226 -0
- lollms_client/__init__.py +2 -2
- lollms_client/llm_bindings/llamacpp/__init__.py +104 -0
- lollms_client/llm_bindings/lollms/__init__.py +102 -1
- lollms_client/llm_bindings/ollama/__init__.py +99 -0
- lollms_client/llm_bindings/openai/__init__.py +109 -0
- lollms_client/lollms_core.py +60 -0
- lollms_client/lollms_discussion.py +478 -33
- lollms_client/lollms_llm_binding.py +43 -0
- lollms_client/mcp_bindings/remote_mcp/__init__.py +233 -132
- {lollms_client-0.20.3.dist-info → lollms_client-0.20.6.dist-info}/METADATA +1 -1
- {lollms_client-0.20.3.dist-info → lollms_client-0.20.6.dist-info}/RECORD +17 -15
- {lollms_client-0.20.3.dist-info → lollms_client-0.20.6.dist-info}/WHEEL +0 -0
- {lollms_client-0.20.3.dist-info → lollms_client-0.20.6.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.20.3.dist-info → lollms_client-0.20.6.dist-info}/top_level.txt +0 -0
|
@@ -168,7 +168,108 @@ class LollmsLLMBinding(LollmsLLMBinding):
|
|
|
168
168
|
return {"status": False, "error": str(ex)}
|
|
169
169
|
else:
|
|
170
170
|
return {"status": False, "error": response.text}
|
|
171
|
-
|
|
171
|
+
def chat(self,
|
|
172
|
+
discussion: LollmsDiscussion,
|
|
173
|
+
branch_tip_id: Optional[str] = None,
|
|
174
|
+
n_predict: Optional[int] = None,
|
|
175
|
+
stream: Optional[bool] = None,
|
|
176
|
+
temperature: Optional[float] = None,
|
|
177
|
+
top_k: Optional[int] = None,
|
|
178
|
+
top_p: Optional[float] = None,
|
|
179
|
+
repeat_penalty: Optional[float] = None,
|
|
180
|
+
repeat_last_n: Optional[int] = None,
|
|
181
|
+
seed: Optional[int] = None,
|
|
182
|
+
n_threads: Optional[int] = None,
|
|
183
|
+
ctx_size: int | None = None,
|
|
184
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
|
|
185
|
+
) -> Union[str, dict]:
|
|
186
|
+
"""
|
|
187
|
+
Conduct a chat session with a lollms-webui server using a LollmsDiscussion object.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
discussion (LollmsDiscussion): The discussion object containing the conversation history.
|
|
191
|
+
branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
|
|
192
|
+
... (other parameters) ...
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Union[str, dict]: The generated text or an error dictionary.
|
|
196
|
+
"""
|
|
197
|
+
# 1. Export the discussion to the lollms-native text format
|
|
198
|
+
prompt_text = discussion.export("lollms_text", branch_tip_id)
|
|
199
|
+
|
|
200
|
+
# 2. Extract images from the LAST message of the branch
|
|
201
|
+
# lollms-webui's endpoint associates images with the final prompt
|
|
202
|
+
active_branch_id = branch_tip_id or discussion.active_branch_id
|
|
203
|
+
branch = discussion.get_branch(active_branch_id)
|
|
204
|
+
last_message = branch[-1] if branch else None
|
|
205
|
+
|
|
206
|
+
image_data = []
|
|
207
|
+
if last_message and last_message.images:
|
|
208
|
+
# The endpoint expects a list of base64 strings.
|
|
209
|
+
# We will only process images of type 'base64'. URL types are not supported by this endpoint.
|
|
210
|
+
for img in last_message.images:
|
|
211
|
+
if img['type'] == 'base64':
|
|
212
|
+
image_data.append(img['data'])
|
|
213
|
+
# Note: 'url' type images are ignored for this binding.
|
|
214
|
+
|
|
215
|
+
# 3. Determine endpoint and build payload
|
|
216
|
+
endpoint = "/lollms_generate_with_images" if image_data else "/lollms_generate"
|
|
217
|
+
url = f"{self.host_address}{endpoint}"
|
|
218
|
+
|
|
219
|
+
headers = {'Content-Type': 'application/json'}
|
|
220
|
+
if self.service_key:
|
|
221
|
+
headers['Authorization'] = f'Bearer {self.service_key}'
|
|
222
|
+
|
|
223
|
+
data = {
|
|
224
|
+
"prompt": prompt_text,
|
|
225
|
+
"model_name": self.model_name,
|
|
226
|
+
"personality": self.personality,
|
|
227
|
+
"n_predict": n_predict,
|
|
228
|
+
"stream": stream,
|
|
229
|
+
"temperature": temperature,
|
|
230
|
+
"top_k": top_k,
|
|
231
|
+
"top_p": top_p,
|
|
232
|
+
"repeat_penalty": repeat_penalty,
|
|
233
|
+
"repeat_last_n": repeat_last_n,
|
|
234
|
+
"seed": seed,
|
|
235
|
+
"n_threads": n_threads
|
|
236
|
+
}
|
|
237
|
+
if image_data:
|
|
238
|
+
data["images"] = image_data
|
|
239
|
+
|
|
240
|
+
# 4. Make the request (logic copied and adapted from generate_text)
|
|
241
|
+
try:
|
|
242
|
+
response = requests.post(
|
|
243
|
+
url,
|
|
244
|
+
json=data,
|
|
245
|
+
headers=headers,
|
|
246
|
+
stream=stream,
|
|
247
|
+
verify=self.verify_ssl_certificate
|
|
248
|
+
)
|
|
249
|
+
response.raise_for_status() # Raise an exception for bad status codes
|
|
250
|
+
|
|
251
|
+
if not stream:
|
|
252
|
+
return response.text.strip()
|
|
253
|
+
else:
|
|
254
|
+
full_response_text = ""
|
|
255
|
+
for line in response.iter_lines():
|
|
256
|
+
if line:
|
|
257
|
+
chunk = line.decode("utf-8")
|
|
258
|
+
full_response_text += chunk
|
|
259
|
+
if streaming_callback:
|
|
260
|
+
if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
261
|
+
break
|
|
262
|
+
# Clean up potential quotes from some streaming formats
|
|
263
|
+
if full_response_text.startswith('"') and full_response_text.endswith('"'):
|
|
264
|
+
full_response_text = full_response_text[1:-1]
|
|
265
|
+
return full_response_text.rstrip('!')
|
|
266
|
+
|
|
267
|
+
except requests.exceptions.RequestException as e:
|
|
268
|
+
error_message = f"lollms-webui request error: {e}"
|
|
269
|
+
return {"status": "error", "message": error_message}
|
|
270
|
+
except Exception as ex:
|
|
271
|
+
error_message = f"lollms-webui generation error: {str(ex)}"
|
|
272
|
+
return {"status": "error", "message": error_message}
|
|
172
273
|
def tokenize(self, text: str) -> list:
|
|
173
274
|
"""
|
|
174
275
|
Tokenize the input text into a list of tokens using the /lollms_tokenize endpoint.
|
|
@@ -6,6 +6,7 @@ from lollms_client.lollms_types import MSG_TYPE
|
|
|
6
6
|
# encode_image is not strictly needed if ollama-python handles paths, but kept for consistency if ever needed.
|
|
7
7
|
# from lollms_client.lollms_utilities import encode_image
|
|
8
8
|
from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
|
|
9
|
+
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
9
10
|
from typing import Optional, Callable, List, Union, Dict
|
|
10
11
|
|
|
11
12
|
from ascii_colors import ASCIIColors, trace_exception
|
|
@@ -258,6 +259,104 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
258
259
|
trace_exception(ex)
|
|
259
260
|
return {"status": False, "error": error_message}
|
|
260
261
|
|
|
262
|
+
def chat(self,
|
|
263
|
+
discussion: LollmsDiscussion,
|
|
264
|
+
branch_tip_id: Optional[str] = None,
|
|
265
|
+
n_predict: Optional[int] = None,
|
|
266
|
+
stream: Optional[bool] = None,
|
|
267
|
+
temperature: float = 0.7,
|
|
268
|
+
top_k: int = 40,
|
|
269
|
+
top_p: float = 0.9,
|
|
270
|
+
repeat_penalty: float = 1.1,
|
|
271
|
+
repeat_last_n: int = 64,
|
|
272
|
+
seed: Optional[int] = None,
|
|
273
|
+
n_threads: Optional[int] = None,
|
|
274
|
+
ctx_size: Optional[int] = None,
|
|
275
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
|
|
276
|
+
) -> Union[str, dict]:
|
|
277
|
+
"""
|
|
278
|
+
Conduct a chat session with the Ollama model using a LollmsDiscussion object.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
discussion (LollmsDiscussion): The discussion object containing the conversation history.
|
|
282
|
+
branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
|
|
283
|
+
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
284
|
+
stream (Optional[bool]): Whether to stream the output.
|
|
285
|
+
temperature (float): Sampling temperature.
|
|
286
|
+
top_k (int): Top-k sampling parameter.
|
|
287
|
+
top_p (float): Top-p sampling parameter.
|
|
288
|
+
repeat_penalty (float): Penalty for repeated tokens.
|
|
289
|
+
repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
|
|
290
|
+
seed (Optional[int]): Random seed for generation.
|
|
291
|
+
n_threads (Optional[int]): Number of threads to use.
|
|
292
|
+
ctx_size (Optional[int]): Context size override for this generation.
|
|
293
|
+
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
Union[str, dict]: The generated text or an error dictionary.
|
|
297
|
+
"""
|
|
298
|
+
if not self.ollama_client:
|
|
299
|
+
return {"status": "error", "message": "Ollama client not initialized."}
|
|
300
|
+
|
|
301
|
+
# 1. Export the discussion to the Ollama chat format
|
|
302
|
+
# This handles system prompts, user/assistant roles, and base64-encoded images.
|
|
303
|
+
messages = discussion.export("ollama_chat", branch_tip_id)
|
|
304
|
+
|
|
305
|
+
# 2. Build the generation options dictionary
|
|
306
|
+
options = {
|
|
307
|
+
'num_predict': n_predict,
|
|
308
|
+
'temperature': float(temperature),
|
|
309
|
+
'top_k': top_k,
|
|
310
|
+
'top_p': top_p,
|
|
311
|
+
'repeat_penalty': repeat_penalty,
|
|
312
|
+
'repeat_last_n': repeat_last_n,
|
|
313
|
+
'seed': seed,
|
|
314
|
+
'num_thread': n_threads,
|
|
315
|
+
'num_ctx': ctx_size,
|
|
316
|
+
}
|
|
317
|
+
# Remove None values, as ollama-python expects them to be absent
|
|
318
|
+
options = {k: v for k, v in options.items() if v is not None}
|
|
319
|
+
|
|
320
|
+
full_response_text = ""
|
|
321
|
+
|
|
322
|
+
try:
|
|
323
|
+
# 3. Call the Ollama API
|
|
324
|
+
if stream:
|
|
325
|
+
response_stream = self.ollama_client.chat(
|
|
326
|
+
model=self.model_name,
|
|
327
|
+
messages=messages,
|
|
328
|
+
stream=True,
|
|
329
|
+
options=options if options else None
|
|
330
|
+
)
|
|
331
|
+
for chunk in response_stream:
|
|
332
|
+
chunk_content = chunk.get('message', {}).get('content', '')
|
|
333
|
+
if chunk_content:
|
|
334
|
+
full_response_text += chunk_content
|
|
335
|
+
if streaming_callback:
|
|
336
|
+
if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
337
|
+
break
|
|
338
|
+
return full_response_text
|
|
339
|
+
else: # Not streaming
|
|
340
|
+
response_dict = self.ollama_client.chat(
|
|
341
|
+
model=self.model_name,
|
|
342
|
+
messages=messages,
|
|
343
|
+
stream=False,
|
|
344
|
+
options=options if options else None
|
|
345
|
+
)
|
|
346
|
+
return response_dict.get('message', {}).get('content', '')
|
|
347
|
+
|
|
348
|
+
except ollama.ResponseError as e:
|
|
349
|
+
error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
|
|
350
|
+
ASCIIColors.error(error_message)
|
|
351
|
+
return {"status": "error", "message": error_message}
|
|
352
|
+
except ollama.RequestError as e:
|
|
353
|
+
error_message = f"Ollama API RequestError: {str(e)}"
|
|
354
|
+
ASCIIColors.error(error_message)
|
|
355
|
+
return {"status": "error", "message": error_message}
|
|
356
|
+
except Exception as ex:
|
|
357
|
+
error_message = f"An unexpected error occurred: {str(ex)}"
|
|
358
|
+
trace_exception(ex)
|
|
359
|
+
return {"status": "error", "message": error_message}
|
|
261
360
|
def tokenize(self, text: str) -> list:
|
|
262
361
|
"""
|
|
263
362
|
Tokenize the input text into a list of characters.
|
|
@@ -5,6 +5,7 @@ from lollms_client.lollms_llm_binding import LollmsLLMBinding
|
|
|
5
5
|
from lollms_client.lollms_types import MSG_TYPE
|
|
6
6
|
from lollms_client.lollms_utilities import encode_image
|
|
7
7
|
from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
|
|
8
|
+
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
8
9
|
from typing import Optional, Callable, List, Union
|
|
9
10
|
from ascii_colors import ASCIIColors, trace_exception
|
|
10
11
|
from typing import List, Dict
|
|
@@ -207,6 +208,114 @@ class OpenAIBinding(LollmsLLMBinding):
|
|
|
207
208
|
|
|
208
209
|
return output
|
|
209
210
|
|
|
211
|
+
def chat(self,
|
|
212
|
+
discussion: LollmsDiscussion,
|
|
213
|
+
branch_tip_id: Optional[str] = None,
|
|
214
|
+
n_predict: Optional[int] = None,
|
|
215
|
+
stream: Optional[bool] = None,
|
|
216
|
+
temperature: float = 0.7,
|
|
217
|
+
top_k: int = 40,
|
|
218
|
+
top_p: float = 0.9,
|
|
219
|
+
repeat_penalty: float = 1.1,
|
|
220
|
+
repeat_last_n: int = 64,
|
|
221
|
+
seed: Optional[int] = None,
|
|
222
|
+
n_threads: Optional[int] = None,
|
|
223
|
+
ctx_size: Optional[int] = None,
|
|
224
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
|
|
225
|
+
) -> Union[str, dict]:
|
|
226
|
+
"""
|
|
227
|
+
Conduct a chat session with the OpenAI model using a LollmsDiscussion object.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
discussion (LollmsDiscussion): The discussion object containing the conversation history.
|
|
231
|
+
branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
|
|
232
|
+
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
233
|
+
stream (Optional[bool]): Whether to stream the output.
|
|
234
|
+
temperature (float): Sampling temperature.
|
|
235
|
+
top_k (int): Top-k sampling parameter (Note: not all OpenAI models use this).
|
|
236
|
+
top_p (float): Top-p sampling parameter.
|
|
237
|
+
repeat_penalty (float): Frequency penalty for repeated tokens.
|
|
238
|
+
seed (Optional[int]): Random seed for generation.
|
|
239
|
+
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Union[str, dict]: The generated text or an error dictionary.
|
|
243
|
+
"""
|
|
244
|
+
# 1. Export the discussion to the OpenAI chat format
|
|
245
|
+
# This handles system prompts, user/assistant roles, and multi-modal content automatically.
|
|
246
|
+
messages = discussion.export("openai_chat", branch_tip_id)
|
|
247
|
+
|
|
248
|
+
# Build the request parameters
|
|
249
|
+
params = {
|
|
250
|
+
"model": self.model_name,
|
|
251
|
+
"messages": messages,
|
|
252
|
+
"max_tokens": n_predict,
|
|
253
|
+
"n": 1,
|
|
254
|
+
"temperature": temperature,
|
|
255
|
+
"top_p": top_p,
|
|
256
|
+
"frequency_penalty": repeat_penalty,
|
|
257
|
+
"stream": stream
|
|
258
|
+
}
|
|
259
|
+
# Add seed if available, as it's supported by newer OpenAI models
|
|
260
|
+
if seed is not None:
|
|
261
|
+
params["seed"] = seed
|
|
262
|
+
|
|
263
|
+
# Remove None values, as the API expects them to be absent
|
|
264
|
+
params = {k: v for k, v in params.items() if v is not None}
|
|
265
|
+
|
|
266
|
+
output = ""
|
|
267
|
+
# 2. Call the API
|
|
268
|
+
try:
|
|
269
|
+
# Check if we should use the chat completions or legacy completions endpoint
|
|
270
|
+
if self.completion_format == ELF_COMPLETION_FORMAT.Chat:
|
|
271
|
+
completion = self.client.chat.completions.create(**params)
|
|
272
|
+
|
|
273
|
+
if stream:
|
|
274
|
+
for chunk in completion:
|
|
275
|
+
# The streaming response for chat has a different structure
|
|
276
|
+
delta = chunk.choices[0].delta
|
|
277
|
+
if delta.content:
|
|
278
|
+
word = delta.content
|
|
279
|
+
if streaming_callback is not None:
|
|
280
|
+
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
281
|
+
break
|
|
282
|
+
output += word
|
|
283
|
+
else:
|
|
284
|
+
output = completion.choices[0].message.content
|
|
285
|
+
|
|
286
|
+
else: # Fallback to legacy completion format (not recommended for chat)
|
|
287
|
+
# We need to format the messages list into a single string prompt
|
|
288
|
+
legacy_prompt = discussion.export("openai_completion", branch_tip_id)
|
|
289
|
+
legacy_params = {
|
|
290
|
+
"model": self.model_name,
|
|
291
|
+
"prompt": legacy_prompt,
|
|
292
|
+
"max_tokens": n_predict,
|
|
293
|
+
"n": 1,
|
|
294
|
+
"temperature": temperature,
|
|
295
|
+
"top_p": top_p,
|
|
296
|
+
"frequency_penalty": repeat_penalty,
|
|
297
|
+
"stream": stream
|
|
298
|
+
}
|
|
299
|
+
completion = self.client.completions.create(**legacy_params)
|
|
300
|
+
|
|
301
|
+
if stream:
|
|
302
|
+
for chunk in completion:
|
|
303
|
+
word = chunk.choices[0].text
|
|
304
|
+
if streaming_callback is not None:
|
|
305
|
+
if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
306
|
+
break
|
|
307
|
+
output += word
|
|
308
|
+
else:
|
|
309
|
+
output = completion.choices[0].text
|
|
310
|
+
|
|
311
|
+
except Exception as e:
|
|
312
|
+
# Handle API errors gracefully
|
|
313
|
+
error_message = f"An error occurred with the OpenAI API: {e}"
|
|
314
|
+
if streaming_callback:
|
|
315
|
+
streaming_callback(error_message, MSG_TYPE.MSG_TYPE_EXCEPTION)
|
|
316
|
+
return {"status": "error", "message": error_message}
|
|
317
|
+
|
|
318
|
+
return output
|
|
210
319
|
def tokenize(self, text: str) -> list:
|
|
211
320
|
"""
|
|
212
321
|
Tokenize the input text into a list of characters.
|
lollms_client/lollms_core.py
CHANGED
|
@@ -12,6 +12,7 @@ from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingM
|
|
|
12
12
|
from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
|
|
13
13
|
from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
|
|
14
14
|
|
|
15
|
+
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
15
16
|
import json, re
|
|
16
17
|
from enum import Enum
|
|
17
18
|
import base64
|
|
@@ -386,6 +387,7 @@ class LollmsClient():
|
|
|
386
387
|
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
387
388
|
user_keyword:Optional[str]="!@>user:",
|
|
388
389
|
ai_keyword:Optional[str]="!@>assistant:",
|
|
390
|
+
**kwargs
|
|
389
391
|
) -> Union[str, dict]:
|
|
390
392
|
"""
|
|
391
393
|
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
@@ -434,6 +436,64 @@ class LollmsClient():
|
|
|
434
436
|
raise RuntimeError("LLM binding not initialized.")
|
|
435
437
|
|
|
436
438
|
|
|
439
|
+
def chat(self,
|
|
440
|
+
discussion: LollmsDiscussion,
|
|
441
|
+
branch_tip_id: Optional[str] = None,
|
|
442
|
+
n_predict: Optional[int] = None,
|
|
443
|
+
stream: Optional[bool] = None,
|
|
444
|
+
temperature: Optional[float] = None,
|
|
445
|
+
top_k: Optional[int] = None,
|
|
446
|
+
top_p: Optional[float] = None,
|
|
447
|
+
repeat_penalty: Optional[float] = None,
|
|
448
|
+
repeat_last_n: Optional[int] = None,
|
|
449
|
+
seed: Optional[int] = None,
|
|
450
|
+
n_threads: Optional[int] = None,
|
|
451
|
+
ctx_size: Optional[int] = None,
|
|
452
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
|
|
453
|
+
) -> Union[str, dict]:
|
|
454
|
+
"""
|
|
455
|
+
High-level method to perform a chat generation using a LollmsDiscussion object.
|
|
456
|
+
|
|
457
|
+
This is the recommended method for conversational interactions. It uses the
|
|
458
|
+
discussion object to correctly format the context for the model, including
|
|
459
|
+
system prompts, roles, and multi-modal content.
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
discussion (LollmsDiscussion): The discussion object to use for context.
|
|
463
|
+
branch_tip_id (Optional[str]): The ID of the message to use as the end of the conversation branch. If None, the active branch is used.
|
|
464
|
+
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
465
|
+
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
466
|
+
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
467
|
+
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
468
|
+
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
469
|
+
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
470
|
+
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
471
|
+
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
472
|
+
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
473
|
+
ctx_size (Optional[int]): Context size override for this generation.
|
|
474
|
+
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
475
|
+
|
|
476
|
+
Returns:
|
|
477
|
+
Union[str, dict]: Generated text or an error dictionary if failed.
|
|
478
|
+
"""
|
|
479
|
+
if self.binding:
|
|
480
|
+
return self.binding.chat(
|
|
481
|
+
discussion=discussion,
|
|
482
|
+
branch_tip_id=branch_tip_id,
|
|
483
|
+
n_predict=n_predict if n_predict is not None else self.default_n_predict,
|
|
484
|
+
stream=stream if stream is not None else self.default_stream,
|
|
485
|
+
temperature=temperature if temperature is not None else self.default_temperature,
|
|
486
|
+
top_k=top_k if top_k is not None else self.default_top_k,
|
|
487
|
+
top_p=top_p if top_p is not None else self.default_top_p,
|
|
488
|
+
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
|
|
489
|
+
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
|
|
490
|
+
seed=seed if seed is not None else self.default_seed,
|
|
491
|
+
n_threads=n_threads if n_threads is not None else self.default_n_threads,
|
|
492
|
+
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
|
|
493
|
+
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
|
|
494
|
+
)
|
|
495
|
+
raise RuntimeError("LLM binding not initialized.")
|
|
496
|
+
|
|
437
497
|
def embed(self, text, **kwargs):
|
|
438
498
|
"""
|
|
439
499
|
Generate embeddings for the input text using the active LLM binding.
|