lollms-client 0.16.0__tar.gz → 0.17.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- {lollms_client-0.16.0 → lollms_client-0.17.1}/PKG-INFO +1 -1
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/simple_text_gen_with_image_test.py +8 -8
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/text_gen.py +1 -1
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/__init__.py +1 -1
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/llamacpp/__init__.py +61 -11
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/lollms/__init__.py +31 -24
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/ollama/__init__.py +47 -27
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/openai/__init__.py +62 -35
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/openllm/__init__.py +4 -1
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/tensor_rt/__init__.py +4 -1
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/transformers/__init__.py +3 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/vllm/__init__.py +4 -1
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_core.py +22 -9
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_llm_binding.py +78 -22
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_utilities.py +5 -3
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client.egg-info/PKG-INFO +1 -1
- {lollms_client-0.16.0 → lollms_client-0.17.1}/LICENSE +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/README.md +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/article_summary/article_summary.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/deep_analyze/deep_analyse.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/deep_analyze/deep_analyze_multiple_files.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/function_call/functions_call_with images.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/generate_and_speak/generate_and_speak.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/generate_game_sfx/generate_game_fx.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/personality_test/chat_test.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/personality_test/chat_with_aristotle.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/personality_test/tesks_test.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/simple_text_gen_test.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/test_local_models/local_chat.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/text_2_audio.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/text_2_image.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/text_and_image_2_audio.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/text_gen_system_prompt.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_config.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_discussion.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_functions.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_js_analyzer.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_python_analyzer.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_stt_binding.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_tasks.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_tti_binding.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_ttm_binding.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_tts_binding.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_ttv_binding.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_types.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/stt_bindings/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/stt_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/stt_bindings/whisper/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/stt_bindings/whispercpp/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tti_bindings/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tti_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttm_bindings/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttm_bindings/audiocraft/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttm_bindings/bark/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttm_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tts_bindings/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tts_bindings/bark/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tts_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tts_bindings/piper_tts/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tts_bindings/xtts/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttv_bindings/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttv_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client.egg-info/SOURCES.txt +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client.egg-info/dependency_links.txt +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client.egg-info/requires.txt +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client.egg-info/top_level.txt +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/pyproject.toml +0 -0
- {lollms_client-0.16.0 → lollms_client-0.17.1}/setup.cfg +0 -0
|
@@ -10,14 +10,14 @@ from ascii_colors import ASCIIColors, trace_exception
|
|
|
10
10
|
# MODEL_NAME = None # Server will use its default or last loaded model
|
|
11
11
|
|
|
12
12
|
# Option 2: Ollama binding
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
# Option
|
|
18
|
-
BINDING_NAME = "llamacpp"
|
|
19
|
-
MODELS_PATH = r"E:\drumber" # Change to your own models folder
|
|
20
|
-
MODEL_NAME = "llava-v1.6-mistral-7b.Q3_K_XS.gguf" # Change to your vision capable model (make sure you have a mmprj file with the gguf model with the same name but without the quantization name and with mmproj- prefix (mmproj-llava-v1.6-mistral-7b.gguf))
|
|
13
|
+
BINDING_NAME = "ollama"
|
|
14
|
+
HOST_ADDRESS = "http://localhost:11434" # Default Ollama host
|
|
15
|
+
MODEL_NAME = "llava:latest" # Or "llama3:latest", "phi3:latest", etc. - ensure it's pulled in Ollama
|
|
16
|
+
|
|
17
|
+
# Option 3: llamacpp binding
|
|
18
|
+
# BINDING_NAME = "llamacpp"
|
|
19
|
+
# MODELS_PATH = r"E:\drumber" # Change to your own models folder
|
|
20
|
+
# MODEL_NAME = "llava-v1.6-mistral-7b.Q3_K_XS.gguf" # Change to your vision capable model (make sure you have a mmprj file with the gguf model with the same name but without the quantization name and with mmproj- prefix (mmproj-llava-v1.6-mistral-7b.gguf))
|
|
21
21
|
# You can also add a clip_model_path parameter to your lc_params
|
|
22
22
|
img = "E:\\drumber\\1711741182996.jpg"
|
|
23
23
|
# Option 3: OpenAI binding (requires OPENAI_API_KEY environment variable or service_key)
|
|
@@ -15,7 +15,7 @@ lc = LollmsClient("llamacpp", models_path=r"E:\drumber", model_name="llava-v1.6-
|
|
|
15
15
|
def cb(chunk, type):
|
|
16
16
|
print(chunk,end="",flush=True)
|
|
17
17
|
|
|
18
|
-
response = lc.generate_text(prompt="
|
|
18
|
+
response = lc.generate_text(prompt="!@>user: Hi there\n!@>assistant: Hi there, how can I help you?!@>user: what is 1+1?\n!@>assistant: ", stream=False, temperature=0.5, streaming_callback=cb, split=True)
|
|
19
19
|
print()
|
|
20
20
|
print(response)
|
|
21
21
|
print()
|
|
@@ -6,7 +6,7 @@ from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
|
|
|
6
6
|
from lollms_client.lollms_utilities import PromptReshaper # Keep general utilities
|
|
7
7
|
from lollms_client.lollms_functions import FunctionCalling_Library
|
|
8
8
|
|
|
9
|
-
__version__ = "0.
|
|
9
|
+
__version__ = "0.17.1"
|
|
10
10
|
|
|
11
11
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
12
12
|
__all__ = [
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/llamacpp/__init__.py
RENAMED
|
@@ -475,7 +475,12 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
475
475
|
temperature: float = 0.7, top_k: int = 40, top_p: float = 0.9,
|
|
476
476
|
repeat_penalty: float = 1.1, repeat_last_n: Optional[int] = 64,
|
|
477
477
|
seed: Optional[int] = None, stream: bool = False, use_chat_format: bool = True,
|
|
478
|
-
images: Optional[List[str]] = None,
|
|
478
|
+
images: Optional[List[str]] = None,
|
|
479
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
480
|
+
user_keyword:Optional[str]="!@>user:",
|
|
481
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
482
|
+
|
|
483
|
+
**extra_params) -> Dict:
|
|
479
484
|
payload_params = {
|
|
480
485
|
"temperature": self.server_args.get("temperature", 0.7), "top_k": self.server_args.get("top_k", 40),
|
|
481
486
|
"top_p": self.server_args.get("top_p", 0.9), "repeat_penalty": self.server_args.get("repeat_penalty", 1.1),
|
|
@@ -495,6 +500,10 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
495
500
|
messages = []
|
|
496
501
|
if system_prompt and system_prompt.strip(): messages.append({"role": "system", "content": system_prompt})
|
|
497
502
|
user_content: Union[str, List[Dict[str, Any]]] = prompt
|
|
503
|
+
if split:
|
|
504
|
+
messages += self.split_discussion(user_content,user_keyword=user_keyword, ai_keyword=ai_keyword)
|
|
505
|
+
else:
|
|
506
|
+
messages.append({"role": "user", "content": user_content})
|
|
498
507
|
if images and self.clip_model_path: # Use the binding's current clip_model_path
|
|
499
508
|
image_parts = []
|
|
500
509
|
for img_path in images:
|
|
@@ -503,8 +512,7 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
503
512
|
image_type = Path(img_path).suffix[1:].lower() or "png"; image_type = "jpeg" if image_type == "jpg" else image_type
|
|
504
513
|
image_parts.append({"type": "image_url", "image_url": {"url": f"data:image/{image_type};base64,{encoded_string}"}})
|
|
505
514
|
except Exception as ex: trace_exception(ex)
|
|
506
|
-
|
|
507
|
-
messages.append({"role": "user", "content": user_content})
|
|
515
|
+
messages[-1]["content"] =[{"type": "text", "text": messages[-1]["content"]}] + image_parts # type: ignore
|
|
508
516
|
final_payload = {"messages": messages, "stream": stream, **payload_params}
|
|
509
517
|
if 'n_predict' in final_payload: final_payload['max_tokens'] = final_payload.pop('n_predict')
|
|
510
518
|
return final_payload
|
|
@@ -521,16 +529,57 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
521
529
|
if image_data_list: final_payload["image_data"] = image_data_list
|
|
522
530
|
return final_payload
|
|
523
531
|
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
532
|
+
|
|
533
|
+
def generate_text(self,
|
|
534
|
+
prompt: str,
|
|
535
|
+
images: Optional[List[str]] = None,
|
|
536
|
+
system_prompt: str = "",
|
|
537
|
+
n_predict: Optional[int] = None,
|
|
538
|
+
stream: Optional[bool] = None,
|
|
539
|
+
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
540
|
+
top_k: int = 40, # Ollama default is 40
|
|
541
|
+
top_p: float = 0.9, # Ollama default is 0.9
|
|
542
|
+
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
543
|
+
repeat_last_n: int = 64, # Ollama default is 64
|
|
544
|
+
seed: Optional[int] = None,
|
|
545
|
+
n_threads: Optional[int] = None,
|
|
546
|
+
ctx_size: int | None = None,
|
|
547
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
548
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
549
|
+
user_keyword:Optional[str]="!@>user:",
|
|
550
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
551
|
+
**generation_kwargs
|
|
552
|
+
) -> Union[str, dict]:
|
|
553
|
+
"""
|
|
554
|
+
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
prompt (str): The input prompt for text generation.
|
|
558
|
+
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
559
|
+
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
560
|
+
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
561
|
+
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
562
|
+
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
563
|
+
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
564
|
+
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
565
|
+
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
566
|
+
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
567
|
+
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
568
|
+
ctx_size (int | None): Context size override for this generation.
|
|
569
|
+
streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
|
|
570
|
+
- First parameter (str): The chunk of text received.
|
|
571
|
+
- Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
572
|
+
split:Optional[bool]: put to true if the prompt is a discussion
|
|
573
|
+
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
574
|
+
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
575
|
+
|
|
576
|
+
Returns:
|
|
577
|
+
Union[str, dict]: Generated text or error dictionary if failed.
|
|
578
|
+
"""
|
|
530
579
|
if not self.server_process or not self.server_process.is_healthy:
|
|
531
580
|
return {"status": False, "error": "Llama.cpp server is not running or not healthy."}
|
|
532
581
|
|
|
533
|
-
_use_chat_format =
|
|
582
|
+
_use_chat_format = True
|
|
534
583
|
payload = self._prepare_generation_payload(
|
|
535
584
|
prompt=prompt, system_prompt=system_prompt, n_predict=n_predict,
|
|
536
585
|
temperature=temperature if temperature is not None else self.server_args.get("temperature",0.7),
|
|
@@ -539,7 +588,8 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
539
588
|
repeat_penalty=repeat_penalty if repeat_penalty is not None else self.server_args.get("repeat_penalty",1.1),
|
|
540
589
|
repeat_last_n=repeat_last_n if repeat_last_n is not None else self.server_args.get("repeat_last_n",64),
|
|
541
590
|
seed=seed if seed is not None else self.server_args.get("seed", -1), stream=stream,
|
|
542
|
-
use_chat_format=_use_chat_format, images=images,
|
|
591
|
+
use_chat_format=_use_chat_format, images=images,
|
|
592
|
+
split= split, user_keyword=user_keyword, ai_keyword=ai_keyword, **generation_kwargs
|
|
543
593
|
)
|
|
544
594
|
endpoint = "/v1/chat/completions" if _use_chat_format else "/completion"
|
|
545
595
|
request_url = self._get_request_url(endpoint)
|
|
@@ -46,43 +46,50 @@ class LollmsLLMBinding(LollmsLLMBinding):
|
|
|
46
46
|
self.personality = personality
|
|
47
47
|
self.model = None
|
|
48
48
|
|
|
49
|
-
def generate_text(self,
|
|
49
|
+
def generate_text(self,
|
|
50
50
|
prompt: str,
|
|
51
51
|
images: Optional[List[str]] = None,
|
|
52
52
|
system_prompt: str = "",
|
|
53
53
|
n_predict: Optional[int] = None,
|
|
54
|
-
stream: bool =
|
|
55
|
-
temperature: float =
|
|
56
|
-
top_k: int =
|
|
57
|
-
top_p: float =
|
|
58
|
-
repeat_penalty: float =
|
|
59
|
-
repeat_last_n: int =
|
|
54
|
+
stream: Optional[bool] = None,
|
|
55
|
+
temperature: Optional[float] = None,
|
|
56
|
+
top_k: Optional[int] = None,
|
|
57
|
+
top_p: Optional[float] = None,
|
|
58
|
+
repeat_penalty: Optional[float] = None,
|
|
59
|
+
repeat_last_n: Optional[int] = None,
|
|
60
60
|
seed: Optional[int] = None,
|
|
61
|
-
n_threads: int =
|
|
61
|
+
n_threads: Optional[int] = None,
|
|
62
62
|
ctx_size: int | None = None,
|
|
63
|
-
streaming_callback: Optional[Callable[[str,
|
|
63
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
64
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
65
|
+
user_keyword:Optional[str]="!@>user:",
|
|
66
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
67
|
+
) -> Union[str, dict]:
|
|
64
68
|
"""
|
|
65
|
-
Generate text using the
|
|
69
|
+
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
66
70
|
|
|
67
71
|
Args:
|
|
68
72
|
prompt (str): The input prompt for text generation.
|
|
69
73
|
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
|
|
81
|
-
- First parameter (str): The chunk of text received
|
|
82
|
-
- Second parameter (str): The message type (
|
|
74
|
+
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
75
|
+
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
76
|
+
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
77
|
+
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
78
|
+
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
79
|
+
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
80
|
+
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
81
|
+
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
82
|
+
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
83
|
+
ctx_size (int | None): Context size override for this generation.
|
|
84
|
+
streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
|
|
85
|
+
- First parameter (str): The chunk of text received.
|
|
86
|
+
- Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
87
|
+
split:Optional[bool]: put to true if the prompt is a discussion
|
|
88
|
+
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
89
|
+
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
83
90
|
|
|
84
91
|
Returns:
|
|
85
|
-
Union[str, dict]: Generated text
|
|
92
|
+
Union[str, dict]: Generated text or error dictionary if failed.
|
|
86
93
|
"""
|
|
87
94
|
# Determine endpoint based on presence of images
|
|
88
95
|
endpoint = "/lollms_generate_with_images" if images else "/lollms_generate"
|
|
@@ -109,47 +109,53 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
109
109
|
self.ollama_client = None # Ensure it's None if initialization fails
|
|
110
110
|
# Optionally re-raise or handle so the binding is clearly unusable
|
|
111
111
|
raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
|
|
112
|
-
|
|
113
|
-
def generate_text(self,
|
|
112
|
+
|
|
113
|
+
def generate_text(self,
|
|
114
114
|
prompt: str,
|
|
115
|
-
images: Optional[List[str]] = None,
|
|
115
|
+
images: Optional[List[str]] = None,
|
|
116
116
|
system_prompt: str = "",
|
|
117
117
|
n_predict: Optional[int] = None,
|
|
118
|
-
stream: bool =
|
|
118
|
+
stream: Optional[bool] = None,
|
|
119
119
|
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
120
120
|
top_k: int = 40, # Ollama default is 40
|
|
121
121
|
top_p: float = 0.9, # Ollama default is 0.9
|
|
122
122
|
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
123
123
|
repeat_last_n: int = 64, # Ollama default is 64
|
|
124
124
|
seed: Optional[int] = None,
|
|
125
|
-
n_threads: Optional[int] = None,
|
|
126
|
-
ctx_size:
|
|
127
|
-
streaming_callback: Optional[Callable[[str,
|
|
128
|
-
|
|
125
|
+
n_threads: Optional[int] = None,
|
|
126
|
+
ctx_size: int | None = None,
|
|
127
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
128
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
129
|
+
user_keyword:Optional[str]="!@>user:",
|
|
130
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
131
|
+
) -> Union[str, dict]:
|
|
129
132
|
"""
|
|
130
|
-
Generate text using the
|
|
133
|
+
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
131
134
|
|
|
132
135
|
Args:
|
|
133
136
|
prompt (str): The input prompt for text generation.
|
|
134
137
|
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
135
|
-
n_predict (Optional[int]): Maximum number of tokens to generate
|
|
136
|
-
stream (bool): Whether to stream the output.
|
|
137
|
-
temperature (float): Sampling temperature.
|
|
138
|
-
top_k (int): Top-k sampling parameter.
|
|
139
|
-
top_p (float): Top-p sampling parameter.
|
|
140
|
-
repeat_penalty (float): Penalty for repeated tokens.
|
|
141
|
-
repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
|
|
142
|
-
seed (Optional[int]): Random seed for generation.
|
|
143
|
-
n_threads (Optional[int]): Number of threads to use
|
|
144
|
-
ctx_size (
|
|
145
|
-
streaming_callback (Optional[Callable[[str,
|
|
146
|
-
- First parameter (str): The chunk of text received
|
|
147
|
-
- Second parameter (
|
|
148
|
-
|
|
138
|
+
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
139
|
+
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
140
|
+
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
141
|
+
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
142
|
+
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
143
|
+
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
144
|
+
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
145
|
+
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
146
|
+
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
147
|
+
ctx_size (int | None): Context size override for this generation.
|
|
148
|
+
streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
|
|
149
|
+
- First parameter (str): The chunk of text received.
|
|
150
|
+
- Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
151
|
+
split:Optional[bool]: put to true if the prompt is a discussion
|
|
152
|
+
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
153
|
+
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
149
154
|
|
|
150
155
|
Returns:
|
|
151
|
-
Union[str,
|
|
156
|
+
Union[str, dict]: Generated text or error dictionary if failed.
|
|
152
157
|
"""
|
|
158
|
+
|
|
153
159
|
if not self.ollama_client:
|
|
154
160
|
return {"status": False, "error": "Ollama client not initialized."}
|
|
155
161
|
|
|
@@ -175,8 +181,15 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
175
181
|
# If images were base64 strings, they would need decoding to bytes first.
|
|
176
182
|
processed_images.append(img_path)
|
|
177
183
|
|
|
178
|
-
messages = [
|
|
179
|
-
|
|
184
|
+
messages = [
|
|
185
|
+
{'role': 'system', 'content':system_prompt},
|
|
186
|
+
]
|
|
187
|
+
if split:
|
|
188
|
+
messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
|
|
189
|
+
if processed_images:
|
|
190
|
+
messages[-1]["images"]=processed_images
|
|
191
|
+
else:
|
|
192
|
+
messages.append({'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None})
|
|
180
193
|
if stream:
|
|
181
194
|
response_stream = self.ollama_client.chat(
|
|
182
195
|
model=self.model_name,
|
|
@@ -201,7 +214,14 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
201
214
|
)
|
|
202
215
|
return response_dict.get('message', {}).get('content', '')
|
|
203
216
|
else: # Text-only
|
|
204
|
-
messages = [
|
|
217
|
+
messages = [
|
|
218
|
+
{'role': 'system', 'content':system_prompt},
|
|
219
|
+
]
|
|
220
|
+
if split:
|
|
221
|
+
messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
|
|
222
|
+
else:
|
|
223
|
+
messages.append({'role': 'user', 'content': prompt})
|
|
224
|
+
|
|
205
225
|
if stream:
|
|
206
226
|
response_stream = self.ollama_client.chat(
|
|
207
227
|
model=self.model_name,
|
|
@@ -55,42 +55,50 @@ class OpenAIBinding(LollmsLLMBinding):
|
|
|
55
55
|
self.completion_format = ELF_COMPLETION_FORMAT.Chat
|
|
56
56
|
|
|
57
57
|
|
|
58
|
-
def generate_text(self,
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
58
|
+
def generate_text(self,
|
|
59
|
+
prompt: str,
|
|
60
|
+
images: Optional[List[str]] = None,
|
|
61
|
+
system_prompt: str = "",
|
|
62
|
+
n_predict: Optional[int] = None,
|
|
63
|
+
stream: Optional[bool] = None,
|
|
64
|
+
temperature: float = 0.7,
|
|
65
|
+
top_k: int = 40,
|
|
66
|
+
top_p: float = 0.9,
|
|
67
|
+
repeat_penalty: float = 1.1,
|
|
68
|
+
repeat_last_n: int = 64,
|
|
69
|
+
seed: Optional[int] = None,
|
|
70
|
+
n_threads: Optional[int] = None,
|
|
71
|
+
ctx_size: int | None = None,
|
|
72
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
73
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
74
|
+
user_keyword:Optional[str]="!@>user:",
|
|
75
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
76
|
+
) -> Union[str, dict]:
|
|
73
77
|
"""
|
|
74
|
-
Generate text
|
|
78
|
+
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
75
79
|
|
|
76
80
|
Args:
|
|
77
81
|
prompt (str): The input prompt for text generation.
|
|
78
82
|
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
79
|
-
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
80
|
-
stream (bool): Whether to stream the output.
|
|
81
|
-
temperature (float): Sampling temperature.
|
|
82
|
-
top_k (int): Top-k sampling parameter.
|
|
83
|
-
top_p (float): Top-p sampling parameter.
|
|
84
|
-
repeat_penalty (float): Penalty for repeated tokens.
|
|
85
|
-
repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
|
|
86
|
-
seed (Optional[int]): Random seed for generation.
|
|
87
|
-
n_threads (int): Number of threads to use.
|
|
83
|
+
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
84
|
+
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
85
|
+
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
86
|
+
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
87
|
+
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
88
|
+
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
89
|
+
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
90
|
+
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
91
|
+
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
92
|
+
ctx_size (int | None): Context size override for this generation.
|
|
88
93
|
streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
|
|
89
94
|
- First parameter (str): The chunk of text received.
|
|
90
95
|
- Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
96
|
+
split:Optional[bool]: put to true if the prompt is a discussion
|
|
97
|
+
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
98
|
+
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
91
99
|
|
|
92
100
|
Returns:
|
|
93
|
-
str: Generated text or error dictionary if failed.
|
|
101
|
+
Union[str, dict]: Generated text or error dictionary if failed.
|
|
94
102
|
"""
|
|
95
103
|
count = 0
|
|
96
104
|
output = ""
|
|
@@ -101,16 +109,17 @@ class OpenAIBinding(LollmsLLMBinding):
|
|
|
101
109
|
{
|
|
102
110
|
"role": "system",
|
|
103
111
|
"content": system_prompt,
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
112
|
+
}
|
|
113
|
+
]
|
|
114
|
+
if split:
|
|
115
|
+
messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
|
|
116
|
+
if images:
|
|
117
|
+
messages[-1]["content"] = [
|
|
109
118
|
{
|
|
110
119
|
"type": "text",
|
|
111
|
-
"text":
|
|
120
|
+
"text": messages[-1]["content"]
|
|
112
121
|
}
|
|
113
|
-
]
|
|
122
|
+
]+[
|
|
114
123
|
{
|
|
115
124
|
"type": "image_url",
|
|
116
125
|
"image_url": {
|
|
@@ -119,8 +128,26 @@ class OpenAIBinding(LollmsLLMBinding):
|
|
|
119
128
|
}
|
|
120
129
|
for image_path in images
|
|
121
130
|
]
|
|
122
|
-
|
|
123
|
-
|
|
131
|
+
else:
|
|
132
|
+
messages.append({
|
|
133
|
+
'role': 'user',
|
|
134
|
+
'content': [
|
|
135
|
+
{
|
|
136
|
+
"type": "text",
|
|
137
|
+
"text": prompt
|
|
138
|
+
}
|
|
139
|
+
] + [
|
|
140
|
+
{
|
|
141
|
+
"type": "image_url",
|
|
142
|
+
"image_url": {
|
|
143
|
+
"url": f"data:image/jpeg;base64,{encode_image(image_path)}"
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
for image_path in images
|
|
147
|
+
]
|
|
148
|
+
}
|
|
149
|
+
)
|
|
150
|
+
|
|
124
151
|
else:
|
|
125
152
|
messages = [{"role": "user", "content": prompt}]
|
|
126
153
|
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/openllm/__init__.py
RENAMED
|
@@ -154,7 +154,10 @@ class OpenLLMBinding(LollmsLLMBinding):
|
|
|
154
154
|
seed: Optional[int] = None,
|
|
155
155
|
# n_threads: Optional[int] = None, # Server-side config for OpenLLM
|
|
156
156
|
# ctx_size: Optional[int] = None, # Server-side config, though some models might allow via llm_config
|
|
157
|
-
streaming_callback: Optional[Callable[[str, int], bool]] = None
|
|
157
|
+
streaming_callback: Optional[Callable[[str, int], bool]] = None,
|
|
158
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
159
|
+
user_keyword:Optional[str]="!@>user:",
|
|
160
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
158
161
|
) -> Union[str, Dict[str, any]]:
|
|
159
162
|
|
|
160
163
|
if not self.openllm_client:
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/pythonllamacpp/__init__.py
RENAMED
|
@@ -216,6 +216,9 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
|
|
|
216
216
|
streaming_callback: Optional[Callable[[str, int], bool]] = None,
|
|
217
217
|
use_chat_format: bool = True,
|
|
218
218
|
grammar: Optional[Union[str, LlamaGrammar]] = None,
|
|
219
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
220
|
+
user_keyword:Optional[str]="!@>user:",
|
|
221
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
219
222
|
**generation_kwargs
|
|
220
223
|
) -> Union[str, Dict[str, any]]:
|
|
221
224
|
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/tensor_rt/__init__.py
RENAMED
|
@@ -341,7 +341,10 @@ class VLLMBinding(LollmsLLMBinding):
|
|
|
341
341
|
repeat_last_n: int = 64, # Note: vLLM applies penalty to full context
|
|
342
342
|
seed: Optional[int] = None,
|
|
343
343
|
n_threads: int = 8, # Note: vLLM manages its own threading/parallelism
|
|
344
|
-
streaming_callback: Optional[Callable[[str, int], bool]] = None
|
|
344
|
+
streaming_callback: Optional[Callable[[str, int], bool]] = None,
|
|
345
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
346
|
+
user_keyword:Optional[str]="!@>user:",
|
|
347
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
345
348
|
) -> Union[str, Dict[str, any]]:
|
|
346
349
|
if not self.llm_engine: return {"status": False, "error": "Engine not loaded."}
|
|
347
350
|
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/transformers/__init__.py
RENAMED
|
@@ -312,6 +312,9 @@ class HuggingFaceHubBinding(LollmsLLMBinding):
|
|
|
312
312
|
seed: Optional[int] = None,
|
|
313
313
|
stop_words: Optional[List[str]] = None, # Added custom stop_words
|
|
314
314
|
streaming_callback: Optional[Callable[[str, int], bool]] = None,
|
|
315
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
316
|
+
user_keyword:Optional[str]="!@>user:",
|
|
317
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
315
318
|
use_chat_format_override: Optional[bool] = None,
|
|
316
319
|
**generation_kwargs
|
|
317
320
|
) -> Union[str, Dict[str, Any]]:
|
|
@@ -341,7 +341,10 @@ class VLLMBinding(LollmsLLMBinding):
|
|
|
341
341
|
repeat_last_n: int = 64, # Note: vLLM applies penalty to full context
|
|
342
342
|
seed: Optional[int] = None,
|
|
343
343
|
n_threads: int = 8, # Note: vLLM manages its own threading/parallelism
|
|
344
|
-
streaming_callback: Optional[Callable[[str, int], bool]] = None
|
|
344
|
+
streaming_callback: Optional[Callable[[str, int], bool]] = None,
|
|
345
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
346
|
+
user_keyword:Optional[str]="!@>user:",
|
|
347
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
345
348
|
) -> Union[str, Dict[str, any]]:
|
|
346
349
|
if not self.llm_engine: return {"status": False, "error": "Engine not loaded."}
|
|
347
350
|
|
|
@@ -11,7 +11,7 @@ from lollms_client.lollms_stt_binding import LollmsSTTBinding, LollmsSTTBindingM
|
|
|
11
11
|
from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingManager
|
|
12
12
|
from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
|
|
13
13
|
|
|
14
|
-
import
|
|
14
|
+
import re
|
|
15
15
|
from enum import Enum
|
|
16
16
|
import base64
|
|
17
17
|
import requests
|
|
@@ -61,11 +61,12 @@ class LollmsClient():
|
|
|
61
61
|
ctx_size: Optional[int] = 8192,
|
|
62
62
|
n_predict: Optional[int] = 4096,
|
|
63
63
|
stream: bool = False,
|
|
64
|
-
temperature: float = 0.
|
|
65
|
-
top_k: int =
|
|
66
|
-
top_p: float = 0.
|
|
67
|
-
repeat_penalty: float =
|
|
68
|
-
repeat_last_n: int =
|
|
64
|
+
temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
|
|
65
|
+
top_k: int = 40, # Ollama default is 40
|
|
66
|
+
top_p: float = 0.9, # Ollama default is 0.9
|
|
67
|
+
repeat_penalty: float = 1.1, # Ollama default is 1.1
|
|
68
|
+
repeat_last_n: int = 64, # Ollama default is 64
|
|
69
|
+
|
|
69
70
|
seed: Optional[int] = None,
|
|
70
71
|
n_threads: int = 8,
|
|
71
72
|
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
@@ -362,7 +363,11 @@ class LollmsClient():
|
|
|
362
363
|
seed: Optional[int] = None,
|
|
363
364
|
n_threads: Optional[int] = None,
|
|
364
365
|
ctx_size: int | None = None,
|
|
365
|
-
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
|
|
366
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
367
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
368
|
+
user_keyword:Optional[str]="!@>user:",
|
|
369
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
370
|
+
) -> Union[str, dict]:
|
|
366
371
|
"""
|
|
367
372
|
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
368
373
|
|
|
@@ -380,6 +385,9 @@ class LollmsClient():
|
|
|
380
385
|
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
381
386
|
ctx_size (int | None): Context size override for this generation.
|
|
382
387
|
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
388
|
+
split:Optional[bool]: put to true if the prompt is a discussion
|
|
389
|
+
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
390
|
+
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
383
391
|
|
|
384
392
|
Returns:
|
|
385
393
|
Union[str, dict]: Generated text or error dictionary if failed.
|
|
@@ -399,7 +407,10 @@ class LollmsClient():
|
|
|
399
407
|
seed=seed if seed is not None else self.default_seed,
|
|
400
408
|
n_threads=n_threads if n_threads is not None else self.default_n_threads,
|
|
401
409
|
ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
|
|
402
|
-
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
|
|
410
|
+
streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
|
|
411
|
+
split= split,
|
|
412
|
+
user_keyword=user_keyword,
|
|
413
|
+
ai_keyword=ai_keyword
|
|
403
414
|
)
|
|
404
415
|
raise RuntimeError("LLM binding not initialized.")
|
|
405
416
|
|
|
@@ -981,7 +992,6 @@ Do not split the code in multiple tags.
|
|
|
981
992
|
Ranks answers for a question from best to worst using LLM JSON generation.
|
|
982
993
|
(Implementation requires self.generate_code which uses self.generate_text)
|
|
983
994
|
"""
|
|
984
|
-
# ... (Implementation as provided before, relies on self.generate_code) ...
|
|
985
995
|
if not callback:
|
|
986
996
|
callback = self.sink
|
|
987
997
|
|
|
@@ -1567,6 +1577,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
1567
1577
|
callback("Deep analysis complete.", MSG_TYPE.MSG_TYPE_STEP_END)
|
|
1568
1578
|
return final_output
|
|
1569
1579
|
|
|
1580
|
+
|
|
1570
1581
|
def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):
|
|
1571
1582
|
"""
|
|
1572
1583
|
Chunks text based on token count.
|
|
@@ -1646,3 +1657,5 @@ def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators
|
|
|
1646
1657
|
break
|
|
1647
1658
|
|
|
1648
1659
|
return chunks
|
|
1660
|
+
|
|
1661
|
+
|
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
import importlib
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Optional, Callable, List
|
|
5
|
+
from typing import Optional, Callable, List, Union
|
|
6
6
|
from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
|
|
7
7
|
import importlib
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import Optional
|
|
10
10
|
from ascii_colors import trace_exception
|
|
11
|
-
|
|
11
|
+
from lollms_client.lollms_types import MSG_TYPE
|
|
12
|
+
import re
|
|
12
13
|
class LollmsLLMBinding(ABC):
|
|
13
14
|
"""Abstract base class for all LOLLMS LLM bindings"""
|
|
14
15
|
|
|
@@ -25,41 +26,50 @@ class LollmsLLMBinding(ABC):
|
|
|
25
26
|
self.model_name = None #Must be set by the instance
|
|
26
27
|
|
|
27
28
|
@abstractmethod
|
|
28
|
-
def generate_text(self,
|
|
29
|
+
def generate_text(self,
|
|
29
30
|
prompt: str,
|
|
30
31
|
images: Optional[List[str]] = None,
|
|
31
32
|
system_prompt: str = "",
|
|
32
33
|
n_predict: Optional[int] = None,
|
|
33
|
-
stream: bool =
|
|
34
|
-
temperature: float =
|
|
35
|
-
top_k: int =
|
|
36
|
-
top_p: float =
|
|
37
|
-
repeat_penalty: float =
|
|
38
|
-
repeat_last_n: int =
|
|
34
|
+
stream: Optional[bool] = None,
|
|
35
|
+
temperature: Optional[float] = None,
|
|
36
|
+
top_k: Optional[int] = None,
|
|
37
|
+
top_p: Optional[float] = None,
|
|
38
|
+
repeat_penalty: Optional[float] = None,
|
|
39
|
+
repeat_last_n: Optional[int] = None,
|
|
39
40
|
seed: Optional[int] = None,
|
|
40
|
-
n_threads: int =
|
|
41
|
-
|
|
41
|
+
n_threads: Optional[int] = None,
|
|
42
|
+
ctx_size: int | None = None,
|
|
43
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
44
|
+
split:Optional[bool]=False, # put to true if the prompt is a discussion
|
|
45
|
+
user_keyword:Optional[str]="!@>user:",
|
|
46
|
+
ai_keyword:Optional[str]="!@>assistant:",
|
|
47
|
+
) -> Union[str, dict]:
|
|
42
48
|
"""
|
|
43
|
-
Generate text
|
|
49
|
+
Generate text using the active LLM binding, using instance defaults if parameters are not provided.
|
|
44
50
|
|
|
45
51
|
Args:
|
|
46
52
|
prompt (str): The input prompt for text generation.
|
|
47
53
|
images (Optional[List[str]]): List of image file paths for multimodal generation.
|
|
48
|
-
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
49
|
-
stream (bool): Whether to stream the output.
|
|
50
|
-
temperature (float): Sampling temperature.
|
|
51
|
-
top_k (int): Top-k sampling parameter.
|
|
52
|
-
top_p (float): Top-p sampling parameter.
|
|
53
|
-
repeat_penalty (float): Penalty for repeated tokens.
|
|
54
|
-
repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
|
|
55
|
-
seed (Optional[int]): Random seed for generation.
|
|
56
|
-
n_threads (int): Number of threads to use.
|
|
54
|
+
n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
|
|
55
|
+
stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
|
|
56
|
+
temperature (Optional[float]): Sampling temperature. Uses instance default if None.
|
|
57
|
+
top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
|
|
58
|
+
top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
|
|
59
|
+
repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
|
|
60
|
+
repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
|
|
61
|
+
seed (Optional[int]): Random seed for generation. Uses instance default if None.
|
|
62
|
+
n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
|
|
63
|
+
ctx_size (int | None): Context size override for this generation.
|
|
57
64
|
streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
|
|
58
65
|
- First parameter (str): The chunk of text received.
|
|
59
66
|
- Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
|
|
67
|
+
split:Optional[bool]: put to true if the prompt is a discussion
|
|
68
|
+
user_keyword:Optional[str]: when splitting we use this to extract user prompt
|
|
69
|
+
ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
|
|
60
70
|
|
|
61
71
|
Returns:
|
|
62
|
-
str: Generated text or error dictionary if failed.
|
|
72
|
+
Union[str, dict]: Generated text or error dictionary if failed.
|
|
63
73
|
"""
|
|
64
74
|
pass
|
|
65
75
|
|
|
@@ -146,6 +156,52 @@ class LollmsLLMBinding(ABC):
|
|
|
146
156
|
pass
|
|
147
157
|
|
|
148
158
|
|
|
159
|
+
def split_discussion(self, lollms_prompt_string: str, system_keyword="!@>system:", user_keyword="!@>user:", ai_keyword="!@>assistant:") -> list:
|
|
160
|
+
"""
|
|
161
|
+
Splits a LoLLMs prompt into a list of OpenAI-style messages.
|
|
162
|
+
If the very first chunk has no prefix, it's assigned to "system".
|
|
163
|
+
"""
|
|
164
|
+
# Regex to split on any of the three prefixes (lookahead)
|
|
165
|
+
pattern = r"(?={}|{}|{})".format(
|
|
166
|
+
re.escape(system_keyword),
|
|
167
|
+
re.escape(user_keyword),
|
|
168
|
+
re.escape(ai_keyword)
|
|
169
|
+
)
|
|
170
|
+
parts = re.split(pattern, lollms_prompt_string)
|
|
171
|
+
messages = []
|
|
172
|
+
|
|
173
|
+
for part in parts:
|
|
174
|
+
part = part.strip()
|
|
175
|
+
if not part:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
# Determine role and strip prefix if present
|
|
179
|
+
if part.startswith(system_keyword):
|
|
180
|
+
role = "system"
|
|
181
|
+
content = part[len(system_keyword):].strip()
|
|
182
|
+
elif part.startswith(user_keyword):
|
|
183
|
+
role = "user"
|
|
184
|
+
content = part[len(user_keyword):].strip()
|
|
185
|
+
elif part.startswith(ai_keyword):
|
|
186
|
+
role = "assistant"
|
|
187
|
+
content = part[len(ai_keyword):].strip()
|
|
188
|
+
else:
|
|
189
|
+
# No prefix: if it's the first valid chunk, treat as system
|
|
190
|
+
if not messages:
|
|
191
|
+
role = "system"
|
|
192
|
+
content = part
|
|
193
|
+
else:
|
|
194
|
+
# otherwise skip unrecognized segments
|
|
195
|
+
continue
|
|
196
|
+
|
|
197
|
+
messages.append({"role": role, "content": content})
|
|
198
|
+
if messages[-1]["content"]=="":
|
|
199
|
+
del messages[-1]
|
|
200
|
+
return messages
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
|
|
149
205
|
class LollmsLLMBindingManager:
|
|
150
206
|
"""Manages binding discovery and instantiation"""
|
|
151
207
|
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import urllib
|
|
2
2
|
import numpy
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
|
|
4
|
+
import pipmaster as pm
|
|
5
5
|
from PIL import Image
|
|
6
6
|
import io
|
|
7
7
|
import base64
|
|
8
|
+
import re
|
|
9
|
+
import numpy as np
|
|
8
10
|
class PromptReshaper:
|
|
9
11
|
def __init__(self, template:str):
|
|
10
12
|
self.template = template
|
|
@@ -122,8 +124,8 @@ def remove_text_from_string(string: str, text_to_find:str):
|
|
|
122
124
|
|
|
123
125
|
|
|
124
126
|
def process_ai_output(output, images, output_folder):
|
|
125
|
-
if not
|
|
126
|
-
|
|
127
|
+
if not pm.is_installed("opencv-python"):
|
|
128
|
+
pm.install("opencv-python")
|
|
127
129
|
import cv2
|
|
128
130
|
images = [cv2.imread(str(img)) for img in images]
|
|
129
131
|
# Find all bounding box entries in the output
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/examples/deep_analyze/deep_analyze_multiple_files.py
RENAMED
|
File without changes
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/examples/function_call/functions_call_with images.py
RENAMED
|
File without changes
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/examples/generate_and_speak/generate_and_speak.py
RENAMED
|
File without changes
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/examples/generate_game_sfx/generate_game_fx.py
RENAMED
|
File without changes
|
|
File without changes
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/examples/personality_test/chat_with_aristotle.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/stt_bindings/whisper/__init__.py
RENAMED
|
File without changes
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/stt_bindings/whispercpp/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttm_bindings/audiocraft/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tts_bindings/piper_tts/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|