lollms-client 0.16.0__tar.gz → 0.17.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

Files changed (70) hide show
  1. {lollms_client-0.16.0 → lollms_client-0.17.1}/PKG-INFO +1 -1
  2. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/simple_text_gen_with_image_test.py +8 -8
  3. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/text_gen.py +1 -1
  4. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/__init__.py +1 -1
  5. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/llamacpp/__init__.py +61 -11
  6. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/lollms/__init__.py +31 -24
  7. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/ollama/__init__.py +47 -27
  8. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/openai/__init__.py +62 -35
  9. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/openllm/__init__.py +4 -1
  10. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -0
  11. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/tensor_rt/__init__.py +4 -1
  12. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/transformers/__init__.py +3 -0
  13. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/vllm/__init__.py +4 -1
  14. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_core.py +22 -9
  15. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_llm_binding.py +78 -22
  16. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_utilities.py +5 -3
  17. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client.egg-info/PKG-INFO +1 -1
  18. {lollms_client-0.16.0 → lollms_client-0.17.1}/LICENSE +0 -0
  19. {lollms_client-0.16.0 → lollms_client-0.17.1}/README.md +0 -0
  20. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/article_summary/article_summary.py +0 -0
  21. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/deep_analyze/deep_analyse.py +0 -0
  22. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/deep_analyze/deep_analyze_multiple_files.py +0 -0
  23. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/function_call/functions_call_with images.py +0 -0
  24. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/generate_and_speak/generate_and_speak.py +0 -0
  25. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/generate_game_sfx/generate_game_fx.py +0 -0
  26. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/personality_test/chat_test.py +0 -0
  27. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/personality_test/chat_with_aristotle.py +0 -0
  28. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/personality_test/tesks_test.py +0 -0
  29. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/simple_text_gen_test.py +0 -0
  30. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/test_local_models/local_chat.py +0 -0
  31. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/text_2_audio.py +0 -0
  32. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/text_2_image.py +0 -0
  33. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/text_and_image_2_audio.py +0 -0
  34. {lollms_client-0.16.0 → lollms_client-0.17.1}/examples/text_gen_system_prompt.py +0 -0
  35. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/llm_bindings/__init__.py +0 -0
  36. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_config.py +0 -0
  37. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_discussion.py +0 -0
  38. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_functions.py +0 -0
  39. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_js_analyzer.py +0 -0
  40. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_python_analyzer.py +0 -0
  41. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_stt_binding.py +0 -0
  42. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_tasks.py +0 -0
  43. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_tti_binding.py +0 -0
  44. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_ttm_binding.py +0 -0
  45. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_tts_binding.py +0 -0
  46. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_ttv_binding.py +0 -0
  47. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/lollms_types.py +0 -0
  48. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/stt_bindings/__init__.py +0 -0
  49. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/stt_bindings/lollms/__init__.py +0 -0
  50. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/stt_bindings/whisper/__init__.py +0 -0
  51. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/stt_bindings/whispercpp/__init__.py +0 -0
  52. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tti_bindings/__init__.py +0 -0
  53. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tti_bindings/lollms/__init__.py +0 -0
  54. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttm_bindings/__init__.py +0 -0
  55. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttm_bindings/audiocraft/__init__.py +0 -0
  56. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttm_bindings/bark/__init__.py +0 -0
  57. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttm_bindings/lollms/__init__.py +0 -0
  58. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tts_bindings/__init__.py +0 -0
  59. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tts_bindings/bark/__init__.py +0 -0
  60. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tts_bindings/lollms/__init__.py +0 -0
  61. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tts_bindings/piper_tts/__init__.py +0 -0
  62. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/tts_bindings/xtts/__init__.py +0 -0
  63. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttv_bindings/__init__.py +0 -0
  64. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client/ttv_bindings/lollms/__init__.py +0 -0
  65. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client.egg-info/SOURCES.txt +0 -0
  66. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client.egg-info/dependency_links.txt +0 -0
  67. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client.egg-info/requires.txt +0 -0
  68. {lollms_client-0.16.0 → lollms_client-0.17.1}/lollms_client.egg-info/top_level.txt +0 -0
  69. {lollms_client-0.16.0 → lollms_client-0.17.1}/pyproject.toml +0 -0
  70. {lollms_client-0.16.0 → lollms_client-0.17.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.16.0
3
+ Version: 0.17.1
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Author-email: ParisNeo <parisneoai@gmail.com>
6
6
  License: Apache Software License
@@ -10,14 +10,14 @@ from ascii_colors import ASCIIColors, trace_exception
10
10
  # MODEL_NAME = None # Server will use its default or last loaded model
11
11
 
12
12
  # Option 2: Ollama binding
13
- # BINDING_NAME = "ollama"
14
- # HOST_ADDRESS = "http://localhost:11434" # Default Ollama host
15
- # MODEL_NAME = "llava:latest" # Or "llama3:latest", "phi3:latest", etc. - ensure it's pulled in Ollama
16
-
17
- # Option 2: llamacpp binding
18
- BINDING_NAME = "llamacpp"
19
- MODELS_PATH = r"E:\drumber" # Change to your own models folder
20
- MODEL_NAME = "llava-v1.6-mistral-7b.Q3_K_XS.gguf" # Change to your vision capable model (make sure you have a mmprj file with the gguf model with the same name but without the quantization name and with mmproj- prefix (mmproj-llava-v1.6-mistral-7b.gguf))
13
+ BINDING_NAME = "ollama"
14
+ HOST_ADDRESS = "http://localhost:11434" # Default Ollama host
15
+ MODEL_NAME = "llava:latest" # Or "llama3:latest", "phi3:latest", etc. - ensure it's pulled in Ollama
16
+
17
+ # Option 3: llamacpp binding
18
+ # BINDING_NAME = "llamacpp"
19
+ # MODELS_PATH = r"E:\drumber" # Change to your own models folder
20
+ # MODEL_NAME = "llava-v1.6-mistral-7b.Q3_K_XS.gguf" # Change to your vision capable model (make sure you have a mmprj file with the gguf model with the same name but without the quantization name and with mmproj- prefix (mmproj-llava-v1.6-mistral-7b.gguf))
21
21
  # You can also add a clip_model_path parameter to your lc_params
22
22
  img = "E:\\drumber\\1711741182996.jpg"
23
23
  # Option 3: OpenAI binding (requires OPENAI_API_KEY environment variable or service_key)
@@ -15,7 +15,7 @@ lc = LollmsClient("llamacpp", models_path=r"E:\drumber", model_name="llava-v1.6-
15
15
  def cb(chunk, type):
16
16
  print(chunk,end="",flush=True)
17
17
 
18
- response = lc.generate_text(prompt="One plus one equals ", stream=False, temperature=0.5, streaming_callback=cb)
18
+ response = lc.generate_text(prompt="!@>user: Hi there\n!@>assistant: Hi there, how can I help you?!@>user: what is 1+1?\n!@>assistant: ", stream=False, temperature=0.5, streaming_callback=cb, split=True)
19
19
  print()
20
20
  print(response)
21
21
  print()
@@ -6,7 +6,7 @@ from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
6
6
  from lollms_client.lollms_utilities import PromptReshaper # Keep general utilities
7
7
  from lollms_client.lollms_functions import FunctionCalling_Library
8
8
 
9
- __version__ = "0.16.0"
9
+ __version__ = "0.17.1"
10
10
 
11
11
  # Optionally, you could define __all__ if you want to be explicit about exports
12
12
  __all__ = [
@@ -475,7 +475,12 @@ class LlamaCppServerBinding(LollmsLLMBinding):
475
475
  temperature: float = 0.7, top_k: int = 40, top_p: float = 0.9,
476
476
  repeat_penalty: float = 1.1, repeat_last_n: Optional[int] = 64,
477
477
  seed: Optional[int] = None, stream: bool = False, use_chat_format: bool = True,
478
- images: Optional[List[str]] = None, **extra_params) -> Dict:
478
+ images: Optional[List[str]] = None,
479
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
480
+ user_keyword:Optional[str]="!@>user:",
481
+ ai_keyword:Optional[str]="!@>assistant:",
482
+
483
+ **extra_params) -> Dict:
479
484
  payload_params = {
480
485
  "temperature": self.server_args.get("temperature", 0.7), "top_k": self.server_args.get("top_k", 40),
481
486
  "top_p": self.server_args.get("top_p", 0.9), "repeat_penalty": self.server_args.get("repeat_penalty", 1.1),
@@ -495,6 +500,10 @@ class LlamaCppServerBinding(LollmsLLMBinding):
495
500
  messages = []
496
501
  if system_prompt and system_prompt.strip(): messages.append({"role": "system", "content": system_prompt})
497
502
  user_content: Union[str, List[Dict[str, Any]]] = prompt
503
+ if split:
504
+ messages += self.split_discussion(user_content,user_keyword=user_keyword, ai_keyword=ai_keyword)
505
+ else:
506
+ messages.append({"role": "user", "content": user_content})
498
507
  if images and self.clip_model_path: # Use the binding's current clip_model_path
499
508
  image_parts = []
500
509
  for img_path in images:
@@ -503,8 +512,7 @@ class LlamaCppServerBinding(LollmsLLMBinding):
503
512
  image_type = Path(img_path).suffix[1:].lower() or "png"; image_type = "jpeg" if image_type == "jpg" else image_type
504
513
  image_parts.append({"type": "image_url", "image_url": {"url": f"data:image/{image_type};base64,{encoded_string}"}})
505
514
  except Exception as ex: trace_exception(ex)
506
- user_content = [{"type": "text", "text": prompt}] + image_parts # type: ignore
507
- messages.append({"role": "user", "content": user_content})
515
+ messages[-1]["content"] =[{"type": "text", "text": messages[-1]["content"]}] + image_parts # type: ignore
508
516
  final_payload = {"messages": messages, "stream": stream, **payload_params}
509
517
  if 'n_predict' in final_payload: final_payload['max_tokens'] = final_payload.pop('n_predict')
510
518
  return final_payload
@@ -521,16 +529,57 @@ class LlamaCppServerBinding(LollmsLLMBinding):
521
529
  if image_data_list: final_payload["image_data"] = image_data_list
522
530
  return final_payload
523
531
 
524
- def generate_text(self, prompt: str, images: Optional[List[str]] = None, system_prompt: str = "",
525
- n_predict: Optional[int] = None, stream: bool = False, temperature: float = None,
526
- top_k: int = None, top_p: float = None, repeat_penalty: float = None,
527
- repeat_last_n: Optional[int] = None, seed: Optional[int] = None,
528
- streaming_callback: Optional[Callable[[str, int], bool]] = None,
529
- use_chat_format_override: Optional[bool] = None, **generation_kwargs) -> Union[str, Dict[str, any]]:
532
+
533
+ def generate_text(self,
534
+ prompt: str,
535
+ images: Optional[List[str]] = None,
536
+ system_prompt: str = "",
537
+ n_predict: Optional[int] = None,
538
+ stream: Optional[bool] = None,
539
+ temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
540
+ top_k: int = 40, # Ollama default is 40
541
+ top_p: float = 0.9, # Ollama default is 0.9
542
+ repeat_penalty: float = 1.1, # Ollama default is 1.1
543
+ repeat_last_n: int = 64, # Ollama default is 64
544
+ seed: Optional[int] = None,
545
+ n_threads: Optional[int] = None,
546
+ ctx_size: int | None = None,
547
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
548
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
549
+ user_keyword:Optional[str]="!@>user:",
550
+ ai_keyword:Optional[str]="!@>assistant:",
551
+ **generation_kwargs
552
+ ) -> Union[str, dict]:
553
+ """
554
+ Generate text using the active LLM binding, using instance defaults if parameters are not provided.
555
+
556
+ Args:
557
+ prompt (str): The input prompt for text generation.
558
+ images (Optional[List[str]]): List of image file paths for multimodal generation.
559
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
560
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
561
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
562
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
563
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
564
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
565
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
566
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
567
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
568
+ ctx_size (int | None): Context size override for this generation.
569
+ streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
570
+ - First parameter (str): The chunk of text received.
571
+ - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
572
+ split:Optional[bool]: put to true if the prompt is a discussion
573
+ user_keyword:Optional[str]: when splitting we use this to extract user prompt
574
+ ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
575
+
576
+ Returns:
577
+ Union[str, dict]: Generated text or error dictionary if failed.
578
+ """
530
579
  if not self.server_process or not self.server_process.is_healthy:
531
580
  return {"status": False, "error": "Llama.cpp server is not running or not healthy."}
532
581
 
533
- _use_chat_format = use_chat_format_override if use_chat_format_override is not None else (self.default_completion_format == ELF_COMPLETION_FORMAT.Chat)
582
+ _use_chat_format = True
534
583
  payload = self._prepare_generation_payload(
535
584
  prompt=prompt, system_prompt=system_prompt, n_predict=n_predict,
536
585
  temperature=temperature if temperature is not None else self.server_args.get("temperature",0.7),
@@ -539,7 +588,8 @@ class LlamaCppServerBinding(LollmsLLMBinding):
539
588
  repeat_penalty=repeat_penalty if repeat_penalty is not None else self.server_args.get("repeat_penalty",1.1),
540
589
  repeat_last_n=repeat_last_n if repeat_last_n is not None else self.server_args.get("repeat_last_n",64),
541
590
  seed=seed if seed is not None else self.server_args.get("seed", -1), stream=stream,
542
- use_chat_format=_use_chat_format, images=images, **generation_kwargs
591
+ use_chat_format=_use_chat_format, images=images,
592
+ split= split, user_keyword=user_keyword, ai_keyword=ai_keyword, **generation_kwargs
543
593
  )
544
594
  endpoint = "/v1/chat/completions" if _use_chat_format else "/completion"
545
595
  request_url = self._get_request_url(endpoint)
@@ -46,43 +46,50 @@ class LollmsLLMBinding(LollmsLLMBinding):
46
46
  self.personality = personality
47
47
  self.model = None
48
48
 
49
- def generate_text(self,
49
+ def generate_text(self,
50
50
  prompt: str,
51
51
  images: Optional[List[str]] = None,
52
52
  system_prompt: str = "",
53
53
  n_predict: Optional[int] = None,
54
- stream: bool = False,
55
- temperature: float = 0.1,
56
- top_k: int = 50,
57
- top_p: float = 0.95,
58
- repeat_penalty: float = 0.8,
59
- repeat_last_n: int = 40,
54
+ stream: Optional[bool] = None,
55
+ temperature: Optional[float] = None,
56
+ top_k: Optional[int] = None,
57
+ top_p: Optional[float] = None,
58
+ repeat_penalty: Optional[float] = None,
59
+ repeat_last_n: Optional[int] = None,
60
60
  seed: Optional[int] = None,
61
- n_threads: int = 8,
61
+ n_threads: Optional[int] = None,
62
62
  ctx_size: int | None = None,
63
- streaming_callback: Optional[Callable[[str, str], None]] = None) -> Union[str, dict]:
63
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
64
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
65
+ user_keyword:Optional[str]="!@>user:",
66
+ ai_keyword:Optional[str]="!@>assistant:",
67
+ ) -> Union[str, dict]:
64
68
  """
65
- Generate text using the LOLLMS service, with optional image support.
69
+ Generate text using the active LLM binding, using instance defaults if parameters are not provided.
66
70
 
67
71
  Args:
68
72
  prompt (str): The input prompt for text generation.
69
73
  images (Optional[List[str]]): List of image file paths for multimodal generation.
70
- If provided, uses the /lollms_generate_with_images endpoint.
71
- n_predict (Optional[int]): Maximum number of tokens to generate.
72
- stream (bool): Whether to stream the output. Defaults to False.
73
- temperature (float): Sampling temperature. Defaults to 0.1.
74
- top_k (int): Top-k sampling parameter. Defaults to 50.
75
- top_p (float): Top-p sampling parameter. Defaults to 0.95.
76
- repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8.
77
- repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
78
- seed (Optional[int]): Random seed for generation.
79
- n_threads (int): Number of threads to use. Defaults to 8.
80
- streaming_callback (Optional[Callable[[str, str], None]]): Callback for streaming output.
81
- - First parameter (str): The chunk of text received from the stream.
82
- - Second parameter (str): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
74
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
75
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
76
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
77
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
78
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
79
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
80
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
81
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
82
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
83
+ ctx_size (int | None): Context size override for this generation.
84
+ streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
85
+ - First parameter (str): The chunk of text received.
86
+ - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
87
+ split:Optional[bool]: put to true if the prompt is a discussion
88
+ user_keyword:Optional[str]: when splitting we use this to extract user prompt
89
+ ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
83
90
 
84
91
  Returns:
85
- Union[str, dict]: Generated text if successful, or a dictionary with status and error if failed.
92
+ Union[str, dict]: Generated text or error dictionary if failed.
86
93
  """
87
94
  # Determine endpoint based on presence of images
88
95
  endpoint = "/lollms_generate_with_images" if images else "/lollms_generate"
@@ -109,47 +109,53 @@ class OllamaBinding(LollmsLLMBinding):
109
109
  self.ollama_client = None # Ensure it's None if initialization fails
110
110
  # Optionally re-raise or handle so the binding is clearly unusable
111
111
  raise ConnectionError(f"Could not connect or initialize Ollama client at {self.host_address}: {e}") from e
112
-
113
- def generate_text(self,
112
+
113
+ def generate_text(self,
114
114
  prompt: str,
115
- images: Optional[List[str]] = None, # List of image file paths
115
+ images: Optional[List[str]] = None,
116
116
  system_prompt: str = "",
117
117
  n_predict: Optional[int] = None,
118
- stream: bool = False,
118
+ stream: Optional[bool] = None,
119
119
  temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
120
120
  top_k: int = 40, # Ollama default is 40
121
121
  top_p: float = 0.9, # Ollama default is 0.9
122
122
  repeat_penalty: float = 1.1, # Ollama default is 1.1
123
123
  repeat_last_n: int = 64, # Ollama default is 64
124
124
  seed: Optional[int] = None,
125
- n_threads: Optional[int] = None, # Ollama calls this num_thread
126
- ctx_size: Optional[int] = None, # Ollama calls this num_ctx
127
- streaming_callback: Optional[Callable[[str, int], bool]] = None
128
- ) -> Union[str, Dict[str, any]]:
125
+ n_threads: Optional[int] = None,
126
+ ctx_size: int | None = None,
127
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
128
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
129
+ user_keyword:Optional[str]="!@>user:",
130
+ ai_keyword:Optional[str]="!@>assistant:",
131
+ ) -> Union[str, dict]:
129
132
  """
130
- Generate text using the Ollama service, with optional image support.
133
+ Generate text using the active LLM binding, using instance defaults if parameters are not provided.
131
134
 
132
135
  Args:
133
136
  prompt (str): The input prompt for text generation.
134
137
  images (Optional[List[str]]): List of image file paths for multimodal generation.
135
- n_predict (Optional[int]): Maximum number of tokens to generate (num_predict).
136
- stream (bool): Whether to stream the output. Defaults to False.
137
- temperature (float): Sampling temperature.
138
- top_k (int): Top-k sampling parameter.
139
- top_p (float): Top-p sampling parameter.
140
- repeat_penalty (float): Penalty for repeated tokens.
141
- repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
142
- seed (Optional[int]): Random seed for generation.
143
- n_threads (Optional[int]): Number of threads to use (num_thread).
144
- ctx_size (Optional[int]): Context window size (num_ctx).
145
- streaming_callback (Optional[Callable[[str, int], bool]]): Callback for streaming output.
146
- - First parameter (str): The chunk of text received from the stream.
147
- - Second parameter (int): The message type (typically MSG_TYPE.MSG_TYPE_CHUNK).
148
- Return False to stop streaming.
138
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
139
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
140
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
141
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
142
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
143
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
144
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
145
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
146
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
147
+ ctx_size (int | None): Context size override for this generation.
148
+ streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
149
+ - First parameter (str): The chunk of text received.
150
+ - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
151
+ split:Optional[bool]: put to true if the prompt is a discussion
152
+ user_keyword:Optional[str]: when splitting we use this to extract user prompt
153
+ ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
149
154
 
150
155
  Returns:
151
- Union[str, Dict[str, any]]: Generated text if successful, or a dictionary with status and error if failed.
156
+ Union[str, dict]: Generated text or error dictionary if failed.
152
157
  """
158
+
153
159
  if not self.ollama_client:
154
160
  return {"status": False, "error": "Ollama client not initialized."}
155
161
 
@@ -175,8 +181,15 @@ class OllamaBinding(LollmsLLMBinding):
175
181
  # If images were base64 strings, they would need decoding to bytes first.
176
182
  processed_images.append(img_path)
177
183
 
178
- messages = [{'role': 'system', 'content':system_prompt},{'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None}]
179
-
184
+ messages = [
185
+ {'role': 'system', 'content':system_prompt},
186
+ ]
187
+ if split:
188
+ messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
189
+ if processed_images:
190
+ messages[-1]["images"]=processed_images
191
+ else:
192
+ messages.append({'role': 'user', 'content': prompt, 'images': processed_images if processed_images else None})
180
193
  if stream:
181
194
  response_stream = self.ollama_client.chat(
182
195
  model=self.model_name,
@@ -201,7 +214,14 @@ class OllamaBinding(LollmsLLMBinding):
201
214
  )
202
215
  return response_dict.get('message', {}).get('content', '')
203
216
  else: # Text-only
204
- messages = [{'role': 'system', 'content':system_prompt},{'role': 'user', 'content': prompt}]
217
+ messages = [
218
+ {'role': 'system', 'content':system_prompt},
219
+ ]
220
+ if split:
221
+ messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
222
+ else:
223
+ messages.append({'role': 'user', 'content': prompt})
224
+
205
225
  if stream:
206
226
  response_stream = self.ollama_client.chat(
207
227
  model=self.model_name,
@@ -55,42 +55,50 @@ class OpenAIBinding(LollmsLLMBinding):
55
55
  self.completion_format = ELF_COMPLETION_FORMAT.Chat
56
56
 
57
57
 
58
- def generate_text(self,
59
- prompt: str,
60
- images: Optional[List[str]] = None,
61
- system_prompt: str = "",
62
- n_predict: Optional[int] = None,
63
- stream: bool = False,
64
- temperature: float = 0.1,
65
- top_k: int = 50,
66
- top_p: float = 0.95,
67
- repeat_penalty: float = 0.8,
68
- repeat_last_n: int = 40,
69
- seed: Optional[int] = None,
70
- n_threads: int = 8,
71
- ctx_size: int | None = None,
72
- streaming_callback: Optional[Callable[[str, str], None]] = None) -> str:
58
+ def generate_text(self,
59
+ prompt: str,
60
+ images: Optional[List[str]] = None,
61
+ system_prompt: str = "",
62
+ n_predict: Optional[int] = None,
63
+ stream: Optional[bool] = None,
64
+ temperature: float = 0.7,
65
+ top_k: int = 40,
66
+ top_p: float = 0.9,
67
+ repeat_penalty: float = 1.1,
68
+ repeat_last_n: int = 64,
69
+ seed: Optional[int] = None,
70
+ n_threads: Optional[int] = None,
71
+ ctx_size: int | None = None,
72
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
73
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
74
+ user_keyword:Optional[str]="!@>user:",
75
+ ai_keyword:Optional[str]="!@>assistant:",
76
+ ) -> Union[str, dict]:
73
77
  """
74
- Generate text based on the provided prompt and parameters.
78
+ Generate text using the active LLM binding, using instance defaults if parameters are not provided.
75
79
 
76
80
  Args:
77
81
  prompt (str): The input prompt for text generation.
78
82
  images (Optional[List[str]]): List of image file paths for multimodal generation.
79
- n_predict (Optional[int]): Maximum number of tokens to generate.
80
- stream (bool): Whether to stream the output. Defaults to False.
81
- temperature (float): Sampling temperature. Defaults to 0.1.
82
- top_k (int): Top-k sampling parameter. Defaults to 50.
83
- top_p (float): Top-p sampling parameter. Defaults to 0.95.
84
- repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8.
85
- repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
86
- seed (Optional[int]): Random seed for generation.
87
- n_threads (int): Number of threads to use. Defaults to 8.
83
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
84
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
85
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
86
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
87
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
88
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
89
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
90
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
91
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
92
+ ctx_size (int | None): Context size override for this generation.
88
93
  streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
89
94
  - First parameter (str): The chunk of text received.
90
95
  - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
96
+ split:Optional[bool]: put to true if the prompt is a discussion
97
+ user_keyword:Optional[str]: when splitting we use this to extract user prompt
98
+ ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
91
99
 
92
100
  Returns:
93
- str: Generated text or error dictionary if failed.
101
+ Union[str, dict]: Generated text or error dictionary if failed.
94
102
  """
95
103
  count = 0
96
104
  output = ""
@@ -101,16 +109,17 @@ class OpenAIBinding(LollmsLLMBinding):
101
109
  {
102
110
  "role": "system",
103
111
  "content": system_prompt,
104
- },
105
-
106
- {
107
- "role": "user",
108
- "content": [
112
+ }
113
+ ]
114
+ if split:
115
+ messages += self.split_discussion(prompt,user_keyword=user_keyword, ai_keyword=ai_keyword)
116
+ if images:
117
+ messages[-1]["content"] = [
109
118
  {
110
119
  "type": "text",
111
- "text": prompt
120
+ "text": messages[-1]["content"]
112
121
  }
113
- ] + [
122
+ ]+[
114
123
  {
115
124
  "type": "image_url",
116
125
  "image_url": {
@@ -119,8 +128,26 @@ class OpenAIBinding(LollmsLLMBinding):
119
128
  }
120
129
  for image_path in images
121
130
  ]
122
- }
123
- ]
131
+ else:
132
+ messages.append({
133
+ 'role': 'user',
134
+ 'content': [
135
+ {
136
+ "type": "text",
137
+ "text": prompt
138
+ }
139
+ ] + [
140
+ {
141
+ "type": "image_url",
142
+ "image_url": {
143
+ "url": f"data:image/jpeg;base64,{encode_image(image_path)}"
144
+ }
145
+ }
146
+ for image_path in images
147
+ ]
148
+ }
149
+ )
150
+
124
151
  else:
125
152
  messages = [{"role": "user", "content": prompt}]
126
153
 
@@ -154,7 +154,10 @@ class OpenLLMBinding(LollmsLLMBinding):
154
154
  seed: Optional[int] = None,
155
155
  # n_threads: Optional[int] = None, # Server-side config for OpenLLM
156
156
  # ctx_size: Optional[int] = None, # Server-side config, though some models might allow via llm_config
157
- streaming_callback: Optional[Callable[[str, int], bool]] = None
157
+ streaming_callback: Optional[Callable[[str, int], bool]] = None,
158
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
159
+ user_keyword:Optional[str]="!@>user:",
160
+ ai_keyword:Optional[str]="!@>assistant:",
158
161
  ) -> Union[str, Dict[str, any]]:
159
162
 
160
163
  if not self.openllm_client:
@@ -216,6 +216,9 @@ class PythonLlamaCppBinding(LollmsLLMBinding):
216
216
  streaming_callback: Optional[Callable[[str, int], bool]] = None,
217
217
  use_chat_format: bool = True,
218
218
  grammar: Optional[Union[str, LlamaGrammar]] = None,
219
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
220
+ user_keyword:Optional[str]="!@>user:",
221
+ ai_keyword:Optional[str]="!@>assistant:",
219
222
  **generation_kwargs
220
223
  ) -> Union[str, Dict[str, any]]:
221
224
 
@@ -341,7 +341,10 @@ class VLLMBinding(LollmsLLMBinding):
341
341
  repeat_last_n: int = 64, # Note: vLLM applies penalty to full context
342
342
  seed: Optional[int] = None,
343
343
  n_threads: int = 8, # Note: vLLM manages its own threading/parallelism
344
- streaming_callback: Optional[Callable[[str, int], bool]] = None
344
+ streaming_callback: Optional[Callable[[str, int], bool]] = None,
345
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
346
+ user_keyword:Optional[str]="!@>user:",
347
+ ai_keyword:Optional[str]="!@>assistant:",
345
348
  ) -> Union[str, Dict[str, any]]:
346
349
  if not self.llm_engine: return {"status": False, "error": "Engine not loaded."}
347
350
 
@@ -312,6 +312,9 @@ class HuggingFaceHubBinding(LollmsLLMBinding):
312
312
  seed: Optional[int] = None,
313
313
  stop_words: Optional[List[str]] = None, # Added custom stop_words
314
314
  streaming_callback: Optional[Callable[[str, int], bool]] = None,
315
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
316
+ user_keyword:Optional[str]="!@>user:",
317
+ ai_keyword:Optional[str]="!@>assistant:",
315
318
  use_chat_format_override: Optional[bool] = None,
316
319
  **generation_kwargs
317
320
  ) -> Union[str, Dict[str, Any]]:
@@ -341,7 +341,10 @@ class VLLMBinding(LollmsLLMBinding):
341
341
  repeat_last_n: int = 64, # Note: vLLM applies penalty to full context
342
342
  seed: Optional[int] = None,
343
343
  n_threads: int = 8, # Note: vLLM manages its own threading/parallelism
344
- streaming_callback: Optional[Callable[[str, int], bool]] = None
344
+ streaming_callback: Optional[Callable[[str, int], bool]] = None,
345
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
346
+ user_keyword:Optional[str]="!@>user:",
347
+ ai_keyword:Optional[str]="!@>assistant:",
345
348
  ) -> Union[str, Dict[str, any]]:
346
349
  if not self.llm_engine: return {"status": False, "error": "Engine not loaded."}
347
350
 
@@ -11,7 +11,7 @@ from lollms_client.lollms_stt_binding import LollmsSTTBinding, LollmsSTTBindingM
11
11
  from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingManager
12
12
  from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
13
13
 
14
- import json
14
+ import re
15
15
  from enum import Enum
16
16
  import base64
17
17
  import requests
@@ -61,11 +61,12 @@ class LollmsClient():
61
61
  ctx_size: Optional[int] = 8192,
62
62
  n_predict: Optional[int] = 4096,
63
63
  stream: bool = False,
64
- temperature: float = 0.1,
65
- top_k: int = 50,
66
- top_p: float = 0.95,
67
- repeat_penalty: float = 0.8,
68
- repeat_last_n: int = 40,
64
+ temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
65
+ top_k: int = 40, # Ollama default is 40
66
+ top_p: float = 0.9, # Ollama default is 0.9
67
+ repeat_penalty: float = 1.1, # Ollama default is 1.1
68
+ repeat_last_n: int = 64, # Ollama default is 64
69
+
69
70
  seed: Optional[int] = None,
70
71
  n_threads: int = 8,
71
72
  streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
@@ -362,7 +363,11 @@ class LollmsClient():
362
363
  seed: Optional[int] = None,
363
364
  n_threads: Optional[int] = None,
364
365
  ctx_size: int | None = None,
365
- streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None) -> Union[str, dict]:
366
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
367
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
368
+ user_keyword:Optional[str]="!@>user:",
369
+ ai_keyword:Optional[str]="!@>assistant:",
370
+ ) -> Union[str, dict]:
366
371
  """
367
372
  Generate text using the active LLM binding, using instance defaults if parameters are not provided.
368
373
 
@@ -380,6 +385,9 @@ class LollmsClient():
380
385
  n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
381
386
  ctx_size (int | None): Context size override for this generation.
382
387
  streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
388
+ split:Optional[bool]: put to true if the prompt is a discussion
389
+ user_keyword:Optional[str]: when splitting we use this to extract user prompt
390
+ ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
383
391
 
384
392
  Returns:
385
393
  Union[str, dict]: Generated text or error dictionary if failed.
@@ -399,7 +407,10 @@ class LollmsClient():
399
407
  seed=seed if seed is not None else self.default_seed,
400
408
  n_threads=n_threads if n_threads is not None else self.default_n_threads,
401
409
  ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
402
- streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
410
+ streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback,
411
+ split= split,
412
+ user_keyword=user_keyword,
413
+ ai_keyword=ai_keyword
403
414
  )
404
415
  raise RuntimeError("LLM binding not initialized.")
405
416
 
@@ -981,7 +992,6 @@ Do not split the code in multiple tags.
981
992
  Ranks answers for a question from best to worst using LLM JSON generation.
982
993
  (Implementation requires self.generate_code which uses self.generate_text)
983
994
  """
984
- # ... (Implementation as provided before, relies on self.generate_code) ...
985
995
  if not callback:
986
996
  callback = self.sink
987
997
 
@@ -1567,6 +1577,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
1567
1577
  callback("Deep analysis complete.", MSG_TYPE.MSG_TYPE_STEP_END)
1568
1578
  return final_output
1569
1579
 
1580
+
1570
1581
  def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):
1571
1582
  """
1572
1583
  Chunks text based on token count.
@@ -1646,3 +1657,5 @@ def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators
1646
1657
  break
1647
1658
 
1648
1659
  return chunks
1660
+
1661
+
@@ -2,13 +2,14 @@
2
2
  from abc import ABC, abstractmethod
3
3
  import importlib
4
4
  from pathlib import Path
5
- from typing import Optional, Callable, List
5
+ from typing import Optional, Callable, List, Union
6
6
  from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
7
7
  import importlib
8
8
  from pathlib import Path
9
9
  from typing import Optional
10
10
  from ascii_colors import trace_exception
11
-
11
+ from lollms_client.lollms_types import MSG_TYPE
12
+ import re
12
13
  class LollmsLLMBinding(ABC):
13
14
  """Abstract base class for all LOLLMS LLM bindings"""
14
15
 
@@ -25,41 +26,50 @@ class LollmsLLMBinding(ABC):
25
26
  self.model_name = None #Must be set by the instance
26
27
 
27
28
  @abstractmethod
28
- def generate_text(self,
29
+ def generate_text(self,
29
30
  prompt: str,
30
31
  images: Optional[List[str]] = None,
31
32
  system_prompt: str = "",
32
33
  n_predict: Optional[int] = None,
33
- stream: bool = False,
34
- temperature: float = 0.1,
35
- top_k: int = 50,
36
- top_p: float = 0.95,
37
- repeat_penalty: float = 0.8,
38
- repeat_last_n: int = 40,
34
+ stream: Optional[bool] = None,
35
+ temperature: Optional[float] = None,
36
+ top_k: Optional[int] = None,
37
+ top_p: Optional[float] = None,
38
+ repeat_penalty: Optional[float] = None,
39
+ repeat_last_n: Optional[int] = None,
39
40
  seed: Optional[int] = None,
40
- n_threads: int = 8,
41
- streaming_callback: Optional[Callable[[str, str], None]] = None) -> str:
41
+ n_threads: Optional[int] = None,
42
+ ctx_size: int | None = None,
43
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
44
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
45
+ user_keyword:Optional[str]="!@>user:",
46
+ ai_keyword:Optional[str]="!@>assistant:",
47
+ ) -> Union[str, dict]:
42
48
  """
43
- Generate text based on the provided prompt and parameters.
49
+ Generate text using the active LLM binding, using instance defaults if parameters are not provided.
44
50
 
45
51
  Args:
46
52
  prompt (str): The input prompt for text generation.
47
53
  images (Optional[List[str]]): List of image file paths for multimodal generation.
48
- n_predict (Optional[int]): Maximum number of tokens to generate.
49
- stream (bool): Whether to stream the output. Defaults to False.
50
- temperature (float): Sampling temperature. Defaults to 0.1.
51
- top_k (int): Top-k sampling parameter. Defaults to 50.
52
- top_p (float): Top-p sampling parameter. Defaults to 0.95.
53
- repeat_penalty (float): Penalty for repeated tokens. Defaults to 0.8.
54
- repeat_last_n (int): Number of previous tokens to consider for repeat penalty. Defaults to 40.
55
- seed (Optional[int]): Random seed for generation.
56
- n_threads (int): Number of threads to use. Defaults to 8.
54
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
55
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
56
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
57
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
58
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
59
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
60
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
61
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
62
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
63
+ ctx_size (int | None): Context size override for this generation.
57
64
  streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
58
65
  - First parameter (str): The chunk of text received.
59
66
  - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
67
+ split:Optional[bool]: put to true if the prompt is a discussion
68
+ user_keyword:Optional[str]: when splitting we use this to extract user prompt
69
+ ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
60
70
 
61
71
  Returns:
62
- str: Generated text or error dictionary if failed.
72
+ Union[str, dict]: Generated text or error dictionary if failed.
63
73
  """
64
74
  pass
65
75
 
@@ -146,6 +156,52 @@ class LollmsLLMBinding(ABC):
146
156
  pass
147
157
 
148
158
 
159
+ def split_discussion(self, lollms_prompt_string: str, system_keyword="!@>system:", user_keyword="!@>user:", ai_keyword="!@>assistant:") -> list:
160
+ """
161
+ Splits a LoLLMs prompt into a list of OpenAI-style messages.
162
+ If the very first chunk has no prefix, it's assigned to "system".
163
+ """
164
+ # Regex to split on any of the three prefixes (lookahead)
165
+ pattern = r"(?={}|{}|{})".format(
166
+ re.escape(system_keyword),
167
+ re.escape(user_keyword),
168
+ re.escape(ai_keyword)
169
+ )
170
+ parts = re.split(pattern, lollms_prompt_string)
171
+ messages = []
172
+
173
+ for part in parts:
174
+ part = part.strip()
175
+ if not part:
176
+ continue
177
+
178
+ # Determine role and strip prefix if present
179
+ if part.startswith(system_keyword):
180
+ role = "system"
181
+ content = part[len(system_keyword):].strip()
182
+ elif part.startswith(user_keyword):
183
+ role = "user"
184
+ content = part[len(user_keyword):].strip()
185
+ elif part.startswith(ai_keyword):
186
+ role = "assistant"
187
+ content = part[len(ai_keyword):].strip()
188
+ else:
189
+ # No prefix: if it's the first valid chunk, treat as system
190
+ if not messages:
191
+ role = "system"
192
+ content = part
193
+ else:
194
+ # otherwise skip unrecognized segments
195
+ continue
196
+
197
+ messages.append({"role": role, "content": content})
198
+ if messages[-1]["content"]=="":
199
+ del messages[-1]
200
+ return messages
201
+
202
+
203
+
204
+
149
205
  class LollmsLLMBindingManager:
150
206
  """Manages binding discovery and instantiation"""
151
207
 
@@ -1,10 +1,12 @@
1
1
  import urllib
2
2
  import numpy
3
3
  from pathlib import Path
4
- from pipmaster import PackageManager
4
+ import pipmaster as pm
5
5
  from PIL import Image
6
6
  import io
7
7
  import base64
8
+ import re
9
+ import numpy as np
8
10
  class PromptReshaper:
9
11
  def __init__(self, template:str):
10
12
  self.template = template
@@ -122,8 +124,8 @@ def remove_text_from_string(string: str, text_to_find:str):
122
124
 
123
125
 
124
126
  def process_ai_output(output, images, output_folder):
125
- if not PackageManager.is_installed("cv2"):
126
- PackageManager.install("opencv-python")
127
+ if not pm.is_installed("opencv-python"):
128
+ pm.install("opencv-python")
127
129
  import cv2
128
130
  images = [cv2.imread(str(img)) for img in images]
129
131
  # Find all bounding box entries in the output
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.16.0
3
+ Version: 0.17.1
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Author-email: ParisNeo <parisneoai@gmail.com>
6
6
  License: Apache Software License
File without changes
File without changes
File without changes