lollms-client 0.15.2__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

Files changed (39) hide show
  1. examples/generate_and_speak/generate_and_speak.py +251 -0
  2. examples/generate_game_sfx/generate_game_fx.py +240 -0
  3. examples/simple_text_gen_with_image_test.py +8 -8
  4. examples/text_2_image.py +0 -1
  5. examples/text_gen.py +1 -1
  6. lollms_client/__init__.py +1 -1
  7. lollms_client/llm_bindings/llamacpp/__init__.py +61 -11
  8. lollms_client/llm_bindings/lollms/__init__.py +31 -24
  9. lollms_client/llm_bindings/ollama/__init__.py +47 -27
  10. lollms_client/llm_bindings/openai/__init__.py +62 -35
  11. lollms_client/llm_bindings/openllm/__init__.py +4 -1
  12. lollms_client/llm_bindings/pythonllamacpp/__init__.py +3 -0
  13. lollms_client/llm_bindings/tensor_rt/__init__.py +4 -1
  14. lollms_client/llm_bindings/transformers/__init__.py +3 -0
  15. lollms_client/llm_bindings/vllm/__init__.py +4 -1
  16. lollms_client/lollms_core.py +65 -33
  17. lollms_client/lollms_llm_binding.py +76 -22
  18. lollms_client/lollms_stt_binding.py +3 -15
  19. lollms_client/lollms_tti_binding.py +5 -29
  20. lollms_client/lollms_ttm_binding.py +5 -28
  21. lollms_client/lollms_tts_binding.py +4 -28
  22. lollms_client/lollms_ttv_binding.py +4 -28
  23. lollms_client/lollms_utilities.py +5 -3
  24. lollms_client/stt_bindings/lollms/__init__.py +5 -4
  25. lollms_client/stt_bindings/whisper/__init__.py +304 -0
  26. lollms_client/stt_bindings/whispercpp/__init__.py +380 -0
  27. lollms_client/tti_bindings/lollms/__init__.py +4 -6
  28. lollms_client/ttm_bindings/audiocraft/__init__.py +281 -0
  29. lollms_client/ttm_bindings/bark/__init__.py +339 -0
  30. lollms_client/tts_bindings/bark/__init__.py +336 -0
  31. lollms_client/tts_bindings/piper_tts/__init__.py +343 -0
  32. lollms_client/tts_bindings/xtts/__init__.py +317 -0
  33. lollms_client-0.17.0.dist-info/METADATA +183 -0
  34. lollms_client-0.17.0.dist-info/RECORD +65 -0
  35. lollms_client-0.15.2.dist-info/METADATA +0 -192
  36. lollms_client-0.15.2.dist-info/RECORD +0 -56
  37. {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/WHEEL +0 -0
  38. {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/licenses/LICENSE +0 -0
  39. {lollms_client-0.15.2.dist-info → lollms_client-0.17.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,251 @@
1
+ # lollms_client/examples/text_and_speech_demo/generate_and_speak.py
2
+ from pathlib import Path
3
+ import time
4
+ import argparse
5
+
6
+ # Ensure pygame is installed for this example
7
+ try:
8
+ import pipmaster as pm
9
+ pm.ensure_packages(["pygame"])
10
+ import pygame
11
+ PYGAME_AVAILABLE = True
12
+ except ImportError:
13
+ print("Pygame not found or pipmaster failed. Please install it manually: pip install pygame")
14
+ PYGAME_AVAILABLE = False
15
+ except Exception as e:
16
+ print(f"Could not ensure pygame: {e}")
17
+ PYGAME_AVAILABLE = False
18
+
19
+ from lollms_client import LollmsClient, MSG_TYPE
20
+ from ascii_colors import ASCIIColors, trace_exception
21
+
22
+ # --- Configuration ---
23
+ SPEECH_OUTPUT_DIR = Path(__file__).parent / "speech_output"
24
+ SPEECH_OUTPUT_DIR.mkdir(exist_ok=True)
25
+
26
+ # Default path for Piper voices relative to this example script for convenience
27
+ DEFAULT_PIPER_VOICES_SUBDIR = Path(__file__).parent / "piper_voices_for_demo"
28
+ DEFAULT_PIPER_VOICE_FILENAME = "en_US-lessac-medium.onnx" # A common, good quality English voice
29
+
30
+ def text_stream_callback(chunk: str, message_type: MSG_TYPE, params: dict = None, metadata: list = None) -> bool:
31
+ if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
32
+ print(chunk, end="", flush=True)
33
+ elif message_type == MSG_TYPE.MSG_TYPE_STEP_START:
34
+ ASCIIColors.yellow(f"\n>> Starting step: {chunk}")
35
+ elif message_type == MSG_TYPE.MSG_TYPE_STEP_END:
36
+ ASCIIColors.green(f"\n<< Finished step: {chunk}")
37
+ return True
38
+
39
+ def ensure_default_piper_voice_for_demo(voices_dir: Path, voice_filename: str):
40
+ """Helper to download a default Piper voice if not present for the demo."""
41
+ voices_dir.mkdir(exist_ok=True)
42
+ onnx_path = voices_dir / voice_filename
43
+ json_path = voices_dir / f"{voice_filename}.json"
44
+
45
+ if not onnx_path.exists() or not json_path.exists():
46
+ ASCIIColors.info(f"Default Piper test voice '{voice_filename}' not found in {voices_dir}. Attempting to download...")
47
+ try:
48
+ import requests
49
+ # Construct URLs (assuming en_US/lessac/medium structure)
50
+ voice_parts = voice_filename.split('-') # e.g., ['en_US', 'lessac', 'medium.onnx']
51
+ lang_code = voice_parts[0].split('_')[0] # en
52
+ voice_name_path = "/".join(voice_parts[0:2]) # en_US/lessac
53
+ quality_path = voice_parts[2].split('.')[0] # medium
54
+
55
+ # Base URL for Piper voices on Hugging Face
56
+ PIPER_VOICES_HF_BASE_URL = "https://huggingface.co/rhasspy/piper-voices/resolve/main/"
57
+
58
+ onnx_url = f"{PIPER_VOICES_HF_BASE_URL}{lang_code}/{voice_name_path}/{quality_path}/{voice_filename}"
59
+ json_url = f"{PIPER_VOICES_HF_BASE_URL}{lang_code}/{voice_name_path}/{quality_path}/{voice_filename}.json"
60
+
61
+
62
+ if not onnx_path.exists():
63
+ ASCIIColors.info(f"Downloading {onnx_url} to {onnx_path}")
64
+ r_onnx = requests.get(onnx_url, stream=True)
65
+ r_onnx.raise_for_status()
66
+ with open(onnx_path, 'wb') as f:
67
+ for chunk in r_onnx.iter_content(chunk_size=8192): f.write(chunk)
68
+
69
+ if not json_path.exists():
70
+ ASCIIColors.info(f"Downloading {json_url} to {json_path}")
71
+ r_json = requests.get(json_url)
72
+ r_json.raise_for_status()
73
+ with open(json_path, 'w', encoding='utf-8') as f: f.write(r_json.text)
74
+ ASCIIColors.green(f"Default Piper test voice '{voice_filename}' downloaded successfully to {voices_dir}.")
75
+ return True
76
+ except Exception as e_download:
77
+ ASCIIColors.error(f"Failed to download default Piper test voice '{voice_filename}': {e_download}")
78
+ ASCIIColors.warning(f"Please manually download '{voice_filename}' and '{voice_filename}.json' "
79
+ f"from rhasspy.github.io/piper-voices/ or Hugging Face "
80
+ f"and place them in {voices_dir.resolve()}")
81
+ return False
82
+ return True
83
+
84
+
85
+ def main():
86
+ parser = argparse.ArgumentParser(description="Generate text with an LLM and synthesize it to speech using LOLLMS.")
87
+ # LLM Arguments
88
+ parser.add_argument(
89
+ "--llm_binding", type=str, default="ollama", choices=["ollama", "openai", "lollms", "llamacpp", "pythonllamacpp", "transformers", "vllm"],
90
+ help="The LLM binding to use for text generation."
91
+ )
92
+ parser.add_argument(
93
+ "--llm_model", type=str, default="mistral",
94
+ help="Model name or path for the LLM binding."
95
+ )
96
+ parser.add_argument("--llm_host", type=str, default=None, help="Host address for server-based LLM bindings.")
97
+ parser.add_argument("--models_path", type=str, default=None, help="Path to models directory for local LLM bindings.")
98
+ parser.add_argument("--openai_key", type=str, default=None, help="OpenAI API key.")
99
+
100
+ # TTS Arguments
101
+ parser.add_argument(
102
+ "--tts_binding", type=str, default="bark", choices=["bark", "lollms", "xtts", "piper"],
103
+ help="The TTS binding to use for speech synthesis."
104
+ )
105
+ # Bark specific
106
+ parser.add_argument("--bark_model", type=str, default="suno/bark-small", help="Bark model ID for TTS.")
107
+ parser.add_argument("--bark_voice_preset", type=str, default="v2/en_speaker_6", help="Bark voice preset.")
108
+ # XTTS specific
109
+ parser.add_argument("--xtts_model", type=str, default="tts_models/multilingual/multi-dataset/xtts_v2", help="XTTS model identifier for Coqui TTS.")
110
+ parser.add_argument("--xtts_speaker_wav", type=str, default=None, help="Path to speaker WAV for XTTS voice cloning.")
111
+ parser.add_argument("--xtts_language", type=str, default="en", help="Language for XTTS.")
112
+ # Piper specific
113
+ parser.add_argument("--piper_default_voice_model_path", type=str, default=None, help="Path to the default .onnx Piper voice model.")
114
+ parser.add_argument("--piper_voices_dir", type=str, default=str(DEFAULT_PIPER_VOICES_SUBDIR), help="Directory containing Piper voice models.")
115
+ parser.add_argument("--piper_voice_file", type=str, default=DEFAULT_PIPER_VOICE_FILENAME, help="Filename of the Piper voice to use from piper_voices_dir (e.g., en_US-ryan-medium.onnx).")
116
+
117
+ # Common TTS/LLM args
118
+ parser.add_argument("--tts_host", type=str, default=None, help="Host address for server-based TTS bindings (e.g., lollms TTS).")
119
+ parser.add_argument("--device", type=str, default=None, choices=["cpu", "cuda", "mps", None], help="Device for local TTS/LLM models.")
120
+ args = parser.parse_args()
121
+
122
+ ASCIIColors.red("--- LOLLMS Text Generation and Speech Synthesis Demo ---")
123
+ ASCIIColors.info(f"Using LLM Binding: {args.llm_binding} (Model: {args.llm_model})")
124
+ ASCIIColors.info(f"Using TTS Binding: {args.tts_binding}")
125
+ if args.tts_binding == "bark":
126
+ ASCIIColors.info(f" Bark Model: {args.bark_model}, Voice Preset: {args.bark_voice_preset}")
127
+ elif args.tts_binding == "xtts":
128
+ ASCIIColors.info(f" XTTS Model: {args.xtts_model}, Speaker WAV: {args.xtts_speaker_wav or 'Default in binding'}, Lang: {args.xtts_language}")
129
+ elif args.tts_binding == "piper":
130
+ ASCIIColors.info(f" Piper Voices Dir: {args.piper_voices_dir}, Voice File: {args.piper_voice_file}")
131
+ # Ensure default Piper voice for demo if Piper is selected and no specific default path is given
132
+ if not args.piper_default_voice_model_path:
133
+ ensure_default_piper_voice_for_demo(Path(args.piper_voices_dir), args.piper_voice_file)
134
+ args.piper_default_voice_model_path = str(Path(args.piper_voices_dir) / args.piper_voice_file)
135
+
136
+
137
+ llm_binding_config = {}
138
+ if args.llm_binding == "openai" and args.openai_key: llm_binding_config["service_key"] = args.openai_key
139
+ elif args.llm_binding in ["llamacpp", "pythonllamacpp", "transformers", "vllm"]:
140
+ if args.device: llm_binding_config["device"] = args.device
141
+ if args.llm_binding == "pythonllamacpp": llm_binding_config["n_gpu_layers"] = -1 if args.device == "cuda" else 0
142
+
143
+ tts_binding_config = {"device": args.device}
144
+ if args.tts_binding == "bark":
145
+ tts_binding_config["model_name"] = args.bark_model
146
+ tts_binding_config["default_voice"] = args.bark_voice_preset
147
+ elif args.tts_binding == "xtts":
148
+ tts_binding_config["model_name"] = args.xtts_model
149
+ tts_binding_config["default_speaker_wav"] = args.xtts_speaker_wav
150
+ tts_binding_config["default_language"] = args.xtts_language
151
+ elif args.tts_binding == "piper":
152
+ tts_binding_config["default_voice_model_path"] = args.piper_default_voice_model_path
153
+ tts_binding_config["piper_voices_dir"] = args.piper_voices_dir
154
+ elif args.tts_binding == "lollms":
155
+ tts_binding_config["model_name"] = "default_lollms_voice" # Placeholder, server handles actual voice
156
+
157
+ lollms_client = None
158
+ try:
159
+ ASCIIColors.magenta("Initializing LollmsClient...")
160
+ lollms_client = LollmsClient(
161
+ binding_name=args.llm_binding, model_name=args.llm_model,
162
+ host_address=args.llm_host, models_path=args.models_path,
163
+ llm_binding_config=llm_binding_config,
164
+ tts_binding_name=args.tts_binding, tts_host_address=args.tts_host,
165
+ tts_binding_config=tts_binding_config,
166
+ verify_ssl_certificate=False
167
+ )
168
+ ASCIIColors.green("LollmsClient initialized.")
169
+ except Exception as e:
170
+ ASCIIColors.error(f"Failed to initialize LollmsClient: {e}"); trace_exception(e)
171
+ return
172
+
173
+ generated_text = ""
174
+ text_prompt = "Craft a very short, cheerful message about the joy of discovery."
175
+ ASCIIColors.cyan(f"\n--- Generating Text (Prompt: '{text_prompt[:50]}...') ---")
176
+ if not lollms_client.binding:
177
+ ASCIIColors.error("LLM binding not available."); return
178
+ try:
179
+ print(f"{ASCIIColors.YELLOW}AI is thinking: {ASCIIColors.RESET}", end="")
180
+ generated_text = lollms_client.generate_text(
181
+ prompt=text_prompt, n_predict=100, stream=True,
182
+ streaming_callback=text_stream_callback, temperature=0.7
183
+ )
184
+ print("\n"); ASCIIColors.green("Text generation complete.")
185
+ ASCIIColors.magenta("Generated Text:\n"); ASCIIColors.yellow(generated_text)
186
+ except Exception as e:
187
+ ASCIIColors.error(f"Text generation failed: {e}"); trace_exception(e); return
188
+ if not generated_text:
189
+ ASCIIColors.warning("LLM did not generate any text."); return
190
+
191
+ speech_file_path = None
192
+ ASCIIColors.cyan(f"\n--- Synthesizing Speech (using {args.tts_binding}) ---")
193
+ if not lollms_client.tts:
194
+ ASCIIColors.error("TTS binding not available."); return
195
+ try:
196
+ tts_call_kwargs = {}
197
+ if args.tts_binding == "bark":
198
+ # For Bark, 'voice' in generate_audio is the voice_preset.
199
+ # If not using the default from init, pass it here.
200
+ # tts_call_kwargs['voice'] = args.bark_voice_preset
201
+ pass # Uses default_voice from init if args.bark_voice_preset not specified to override
202
+ elif args.tts_binding == "xtts":
203
+ tts_call_kwargs['language'] = args.xtts_language
204
+ # 'voice' for XTTS is the speaker_wav path. If not using default from init, pass here.
205
+ # tts_call_kwargs['voice'] = args.xtts_speaker_wav
206
+ elif args.tts_binding == "piper":
207
+ # 'voice' for Piper is the .onnx filename.
208
+ tts_call_kwargs['voice'] = args.piper_voice_file
209
+ # Example Piper specific param:
210
+ # tts_call_kwargs['length_scale'] = 1.0
211
+
212
+ audio_bytes = lollms_client.tts.generate_audio(text=generated_text, **tts_call_kwargs)
213
+
214
+ if audio_bytes:
215
+ filename_stem = f"speech_output_{args.llm_binding}_{args.tts_binding}"
216
+ speech_file_path = SPEECH_OUTPUT_DIR / f"{filename_stem.replace('/', '_')}.wav"
217
+ with open(speech_file_path, "wb") as f: f.write(audio_bytes)
218
+ ASCIIColors.green(f"Speech synthesized and saved to: {speech_file_path}")
219
+ elif args.tts_binding == "lollms":
220
+ ASCIIColors.warning("LOLLMS TTS binding returned empty bytes. Server might have saved file if 'fn' was used.")
221
+ speech_file_path = None
222
+ else:
223
+ ASCIIColors.warning("Speech synthesis returned empty bytes."); speech_file_path = None
224
+ except Exception as e:
225
+ ASCIIColors.error(f"Speech synthesis failed: {e}"); trace_exception(e); return
226
+
227
+ if speech_file_path and PYGAME_AVAILABLE:
228
+ ASCIIColors.magenta("\n--- Playing Synthesized Speech ---")
229
+ try:
230
+ pygame.mixer.init()
231
+ speech_sound = pygame.mixer.Sound(str(speech_file_path))
232
+ ASCIIColors.cyan("Playing audio... Press Ctrl+C in console to stop playback early.")
233
+ speech_sound.play()
234
+ while pygame.mixer.get_busy():
235
+ pygame.time.Clock().tick(10)
236
+ for event in pygame.event.get():
237
+ if event.type == pygame.QUIT: pygame.mixer.stop(); break
238
+ ASCIIColors.green("Playback finished.")
239
+ except pygame.error as e: ASCIIColors.warning(f"Could not play audio with pygame: {e}")
240
+ except KeyboardInterrupt: pygame.mixer.stop(); ASCIIColors.yellow("\nPlayback interrupted.")
241
+ finally: pygame.quit()
242
+ elif not PYGAME_AVAILABLE:
243
+ ASCIIColors.warning("Pygame is not available for playback.")
244
+ if speech_file_path: ASCIIColors.info(f"Generated speech: {speech_file_path.resolve()}")
245
+ elif not speech_file_path:
246
+ ASCIIColors.warning("No speech file generated/path unknown. Skipping playback.")
247
+
248
+ ASCIIColors.red("\n--- Demo Finished ---")
249
+
250
+ if __name__ == "__main__":
251
+ main()
@@ -0,0 +1,240 @@
1
+ # lollms_client/examples/game_sfx_generation/generate_game_sfx.py
2
+ from pathlib import Path
3
+ import time
4
+ import argparse # For command-line arguments
5
+
6
+ # Ensure pygame is installed for this example
7
+ try:
8
+ import pipmaster as pm
9
+ pm.ensure_packages(["pygame"])
10
+ import pygame
11
+ PYGAME_AVAILABLE = True
12
+ except ImportError:
13
+ print("Pygame not found or pipmaster failed. Please install it manually: pip install pygame")
14
+ PYGAME_AVAILABLE = False
15
+ except Exception as e:
16
+ print(f"Could not ensure pygame: {e}")
17
+ PYGAME_AVAILABLE = False
18
+
19
+
20
+ from lollms_client import LollmsClient # Removed LollmsDiscussion, LollmsMessage as not used
21
+ from ascii_colors import ASCIIColors, trace_exception
22
+
23
+ # --- Configuration ---
24
+ # Output directory for generated sound effects
25
+ SFX_OUTPUT_DIR = Path(__file__).parent / "sfx_output"
26
+ SFX_OUTPUT_DIR.mkdir(exist_ok=True)
27
+
28
+ # Sound effect descriptions. Note: 'duration' is more relevant for audiocraft.
29
+ # Bark's duration is more implicit based on prompt content.
30
+ SOUND_EFFECTS_TO_GENERATE = [
31
+ {
32
+ "filename": "sfx_crunch",
33
+ "prompt": "Sound effect of a single, sharp, dry crunch, like stepping on a crisp autumn leaf or a piece of dry wood breaking.",
34
+ "duration": 1, # audiocraft specific
35
+ "bark_params": {"fine_temperature": 0.4, "coarse_temperature": 0.6} # Example bark specific
36
+ },
37
+ {
38
+ "filename": "sfx_death_electronic",
39
+ "prompt": "Short, impactful electronic death sound effect for a video game character, like a quick digital zap or a brief power-down sound.",
40
+ "duration": 1.5,
41
+ "bark_params": {"voice_preset": None} # Try without preset for more raw SFX
42
+ },
43
+ {
44
+ "filename": "sfx_powerup_positive",
45
+ "prompt": "Bright, positive, short power-up collection sound effect, like a magical chime, a sparkling shimmer, or an uplifting notification. [SFX]",
46
+ "duration": 1.5
47
+ },
48
+ {
49
+ "filename": "sfx_laser_shot",
50
+ "prompt": "Sound effect of a futuristic laser gun firing a single shot, a quick 'pew' sound. [SFX: laser pew]",
51
+ "duration": 0.5
52
+ },
53
+ {
54
+ "filename": "sfx_coin_collect",
55
+ "prompt": "Classic video game coin collection sound effect, a short, metallic, cheerful 'ding' or 'jingle'. [SFX: coin]",
56
+ "duration": 0.7
57
+ }
58
+ ]
59
+
60
+ def generate_sfx(lollms_client: LollmsClient, sfx_info: dict) -> Path | None:
61
+ """Generates a single sound effect using the LollmsClient's TTM binding."""
62
+ filename_stem = sfx_info["filename"]
63
+ prompt = sfx_info["prompt"]
64
+
65
+ # Default output format
66
+ output_format = "wav" # WAV is generally best for SFX in pygame
67
+ output_path = SFX_OUTPUT_DIR / f"{filename_stem}_{lollms_client.ttm.binding_name}.{output_format}" # Add binding name to filename
68
+
69
+ ASCIIColors.cyan(f"\nGenerating SFX using '{lollms_client.ttm.binding_name}': '{filename_stem}'")
70
+ ASCIIColors.info(f"Prompt: '{prompt[:60]}...'")
71
+
72
+
73
+ if not lollms_client.ttm:
74
+ ASCIIColors.error("TTM (Text-to-Music/Sound) binding is not available in LollmsClient.")
75
+ return None
76
+
77
+ ttm_params = {"progress": True} # Common param for both
78
+
79
+ if lollms_client.ttm.binding_name == "audiocraft":
80
+ ttm_params["duration"] = sfx_info.get("duration", 1.0)
81
+ ttm_params["temperature"] = sfx_info.get("audiocraft_temperature", 1.0)
82
+ ttm_params["cfg_coef"] = sfx_info.get("audiocraft_cfg_coef", 3.0)
83
+ ASCIIColors.info(f"AudioCraft Params: duration={ttm_params['duration']}, temp={ttm_params['temperature']}, cfg={ttm_params['cfg_coef']}")
84
+ elif lollms_client.ttm.binding_name == "bark":
85
+ # Bark duration is implicit. Parameters are different.
86
+ bark_specific_params = sfx_info.get("bark_params", {})
87
+ ttm_params["voice_preset"] = bark_specific_params.get("voice_preset", None) # None might be good for SFX
88
+ ttm_params["fine_temperature"] = bark_specific_params.get("fine_temperature", 0.5)
89
+ ttm_params["coarse_temperature"] = bark_specific_params.get("coarse_temperature", 0.7)
90
+ ASCIIColors.info(f"Bark Params: preset={ttm_params['voice_preset']}, fine_temp={ttm_params['fine_temperature']}, coarse_temp={ttm_params['coarse_temperature']}")
91
+ else:
92
+ ASCIIColors.warning(f"Unknown TTM binding '{lollms_client.ttm.binding_name}'. Using generic parameters.")
93
+
94
+
95
+ try:
96
+ music_bytes = lollms_client.ttm.generate_music(prompt=prompt, **ttm_params)
97
+
98
+ if music_bytes:
99
+ with open(output_path, "wb") as f:
100
+ f.write(music_bytes)
101
+ ASCIIColors.green(f"SFX '{filename_stem}' ({lollms_client.ttm.binding_name}) saved to: {output_path}")
102
+ return output_path
103
+ else:
104
+ ASCIIColors.warning(f"SFX generation for '{filename_stem}' ({lollms_client.ttm.binding_name}) returned empty bytes.")
105
+ return None
106
+ except Exception as e:
107
+ ASCIIColors.error(f"Error generating SFX '{filename_stem}' ({lollms_client.ttm.binding_name}): {e}")
108
+ trace_exception(e)
109
+ return None
110
+
111
+ def main():
112
+ parser = argparse.ArgumentParser(description="Generate game sound effects using LOLLMS TTM bindings.")
113
+ parser.add_argument(
114
+ "--ttm_binding",
115
+ type=str,
116
+ choices=["audiocraft", "bark"],
117
+ default="bark", # Default to audiocraft
118
+ help="The TTM binding to use for generation."
119
+ )
120
+ parser.add_argument(
121
+ "--audiocraft_model",
122
+ type=str,
123
+ default="facebook/musicgen-small",
124
+ help="Hugging Face model ID for AudioCraft (e.g., facebook/musicgen-small, facebook/musicgen-melody)."
125
+ )
126
+ parser.add_argument(
127
+ "--bark_model",
128
+ type=str,
129
+ default="suno/bark-small",
130
+ help="Hugging Face model ID for Bark (e.g., suno/bark-small, suno/bark)."
131
+ )
132
+ parser.add_argument(
133
+ "--device",
134
+ type=str,
135
+ default=None, # Auto-detect
136
+ choices=["cpu", "cuda", "mps", None],
137
+ help="Device to run the TTM model on (cpu, cuda, mps, or auto-detect)."
138
+ )
139
+ args = parser.parse_args()
140
+
141
+ ASCIIColors.red(f"--- LOLLMS Game SFX Generation Example (Using: {args.ttm_binding}) ---")
142
+
143
+ ttm_binding_config = {"device": args.device} # Common device config
144
+ if args.ttm_binding == "audiocraft":
145
+ ttm_binding_config["model_name"] = args.audiocraft_model
146
+ ttm_binding_config["output_format"] = "wav" # Audiocraft binding defaults to wav for bytes
147
+ elif args.ttm_binding == "bark":
148
+ ttm_binding_config["model_name"] = args.bark_model
149
+ # Bark binding currently outputs WAV by default for bytes
150
+ else:
151
+ ASCIIColors.error(f"Unsupported TTM binding: {args.ttm_binding}")
152
+ return
153
+
154
+ try:
155
+ ASCIIColors.magenta(f"Initializing LollmsClient with {args.ttm_binding} for TTM...")
156
+ lollms_client = LollmsClient(
157
+ binding_name="lollms", # Can be a dummy if only using TTM
158
+ ttm_binding_name=args.ttm_binding,
159
+ ttm_binding_config=ttm_binding_config
160
+ )
161
+ ASCIIColors.green("LollmsClient initialized.")
162
+ except Exception as e:
163
+ ASCIIColors.error(f"Failed to initialize LollmsClient: {e}")
164
+ trace_exception(e)
165
+ return
166
+
167
+ if not lollms_client.ttm:
168
+ ASCIIColors.error(f"{args.ttm_binding.capitalize()} TTM binding could not be loaded. Exiting.")
169
+ return
170
+
171
+ generated_sfx_paths = {}
172
+ for sfx_info_item in SOUND_EFFECTS_TO_GENERATE:
173
+ sfx_path = generate_sfx(lollms_client, sfx_info_item)
174
+ if sfx_path:
175
+ generated_sfx_paths[sfx_info_item["filename"]] = {
176
+ "path": sfx_path,
177
+ "binding": args.ttm_binding # Store which binding generated it
178
+ }
179
+ time.sleep(0.5) # Small delay
180
+
181
+ ASCIIColors.red("\n--- SFX Generation Complete ---")
182
+ if not generated_sfx_paths:
183
+ ASCIIColors.warning("No sound effects were successfully generated.")
184
+ return
185
+
186
+ if not PYGAME_AVAILABLE:
187
+ ASCIIColors.warning("Pygame is not available. Skipping sound playback demo.")
188
+ ASCIIColors.info(f"Generated SFX can be found in: {SFX_OUTPUT_DIR.resolve()}")
189
+ return
190
+
191
+ ASCIIColors.magenta("\n--- Pygame SFX Playback Demo ---")
192
+ pygame.mixer.init()
193
+ game_sounds = {}
194
+ sfx_playback_order = [] # To map number keys to sounds
195
+
196
+ for filename_stem, sfx_data in generated_sfx_paths.items():
197
+ path = sfx_data["path"]
198
+ binding_used = sfx_data["binding"]
199
+ playback_name = f"{filename_stem} ({binding_used})"
200
+ try:
201
+ sound = pygame.mixer.Sound(str(path))
202
+ game_sounds[playback_name] = sound
203
+ sfx_playback_order.append(playback_name)
204
+ ASCIIColors.green(f"Loaded '{path.name}' into pygame as '{playback_name}'.")
205
+ except pygame.error as e:
206
+ ASCIIColors.warning(f"Could not load sound '{path.name}' into pygame: {e}")
207
+
208
+ if not game_sounds:
209
+ ASCIIColors.warning("No sounds loaded into pygame. Exiting demo.")
210
+ return
211
+
212
+ print("\nInstructions:")
213
+ for i, sfx_name_to_play in enumerate(sfx_playback_order):
214
+ print(f" Press key '{i+1}' to play: {sfx_name_to_play}")
215
+ print(" Press 'Q' to quit the demo.")
216
+
217
+ pygame.display.set_mode((400, 200))
218
+ pygame.display.set_caption(f"SFX Player ({args.ttm_binding.capitalize()})")
219
+
220
+ running = True
221
+ while running:
222
+ for event in pygame.event.get():
223
+ if event.type == pygame.QUIT: running = False
224
+ if event.type == pygame.KEYDOWN:
225
+ if event.key == pygame.K_q: running = False
226
+ for i in range(len(sfx_playback_order)):
227
+ if event.key == getattr(pygame, f"K_{i+1}", None): # Check if K_i+1 exists
228
+ sfx_name_to_play = sfx_playback_order[i]
229
+ if sfx_name_to_play in game_sounds:
230
+ ASCIIColors.cyan(f"Playing: {sfx_name_to_play}")
231
+ game_sounds[sfx_name_to_play].play()
232
+ break
233
+ pygame.time.Clock().tick(30)
234
+
235
+ pygame.quit()
236
+ ASCIIColors.red("--- Demo Finished ---")
237
+ ASCIIColors.info(f"Generated SFX are in: {SFX_OUTPUT_DIR.resolve()}")
238
+
239
+ if __name__ == "__main__":
240
+ main()
@@ -10,14 +10,14 @@ from ascii_colors import ASCIIColors, trace_exception
10
10
  # MODEL_NAME = None # Server will use its default or last loaded model
11
11
 
12
12
  # Option 2: Ollama binding
13
- # BINDING_NAME = "ollama"
14
- # HOST_ADDRESS = "http://localhost:11434" # Default Ollama host
15
- # MODEL_NAME = "llava:latest" # Or "llama3:latest", "phi3:latest", etc. - ensure it's pulled in Ollama
16
-
17
- # Option 2: llamacpp binding
18
- BINDING_NAME = "llamacpp"
19
- MODELS_PATH = r"E:\drumber" # Change to your own models folder
20
- MODEL_NAME = "llava-v1.6-mistral-7b.Q3_K_XS.gguf" # Change to your vision capable model (make sure you have a mmprj file with the gguf model with the same name but without the quantization name and with mmproj- prefix (mmproj-llava-v1.6-mistral-7b.gguf))
13
+ BINDING_NAME = "ollama"
14
+ HOST_ADDRESS = "http://localhost:11434" # Default Ollama host
15
+ MODEL_NAME = "llava:latest" # Or "llama3:latest", "phi3:latest", etc. - ensure it's pulled in Ollama
16
+
17
+ # Option 3: llamacpp binding
18
+ # BINDING_NAME = "llamacpp"
19
+ # MODELS_PATH = r"E:\drumber" # Change to your own models folder
20
+ # MODEL_NAME = "llava-v1.6-mistral-7b.Q3_K_XS.gguf" # Change to your vision capable model (make sure you have a mmprj file with the gguf model with the same name but without the quantization name and with mmproj- prefix (mmproj-llava-v1.6-mistral-7b.gguf))
21
21
  # You can also add a clip_model_path parameter to your lc_params
22
22
  img = "E:\\drumber\\1711741182996.jpg"
23
23
  # Option 3: OpenAI binding (requires OPENAI_API_KEY environment variable or service_key)
examples/text_2_image.py CHANGED
@@ -15,7 +15,6 @@ LOLLMS_CLIENT_ID = "my_lollms_client_id" # Replace with your actual client ID or
15
15
  # Initialize LollmsClient, enabling the TTI 'lollms' binding
16
16
  # The service_key here is used as client_id by the TTI binding for lollms
17
17
  lc = LollmsClient(
18
- host_address="http://localhost:9600",
19
18
  tti_binding_name="lollms"
20
19
  )
21
20
 
examples/text_gen.py CHANGED
@@ -15,7 +15,7 @@ lc = LollmsClient("llamacpp", models_path=r"E:\drumber", model_name="llava-v1.6-
15
15
  def cb(chunk, type):
16
16
  print(chunk,end="",flush=True)
17
17
 
18
- response = lc.generate_text(prompt="One plus one equals ", stream=False, temperature=0.5, streaming_callback=cb)
18
+ response = lc.generate_text(prompt="!@>user: Hi there\n!@>assistant: Hi there, how can I help you?!@>user: what is 1+1?\n!@>assistant: ", stream=False, temperature=0.5, streaming_callback=cb, split=True)
19
19
  print()
20
20
  print(response)
21
21
  print()
lollms_client/__init__.py CHANGED
@@ -6,7 +6,7 @@ from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
6
6
  from lollms_client.lollms_utilities import PromptReshaper # Keep general utilities
7
7
  from lollms_client.lollms_functions import FunctionCalling_Library
8
8
 
9
- __version__ = "0.15.2"
9
+ __version__ = "0.17.0"
10
10
 
11
11
  # Optionally, you could define __all__ if you want to be explicit about exports
12
12
  __all__ = [
@@ -475,7 +475,12 @@ class LlamaCppServerBinding(LollmsLLMBinding):
475
475
  temperature: float = 0.7, top_k: int = 40, top_p: float = 0.9,
476
476
  repeat_penalty: float = 1.1, repeat_last_n: Optional[int] = 64,
477
477
  seed: Optional[int] = None, stream: bool = False, use_chat_format: bool = True,
478
- images: Optional[List[str]] = None, **extra_params) -> Dict:
478
+ images: Optional[List[str]] = None,
479
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
480
+ user_keyword:Optional[str]="!@>user:",
481
+ ai_keyword:Optional[str]="!@>assistant:",
482
+
483
+ **extra_params) -> Dict:
479
484
  payload_params = {
480
485
  "temperature": self.server_args.get("temperature", 0.7), "top_k": self.server_args.get("top_k", 40),
481
486
  "top_p": self.server_args.get("top_p", 0.9), "repeat_penalty": self.server_args.get("repeat_penalty", 1.1),
@@ -495,6 +500,10 @@ class LlamaCppServerBinding(LollmsLLMBinding):
495
500
  messages = []
496
501
  if system_prompt and system_prompt.strip(): messages.append({"role": "system", "content": system_prompt})
497
502
  user_content: Union[str, List[Dict[str, Any]]] = prompt
503
+ if split:
504
+ messages += self.split_discussion(user_content,user_keyword=user_keyword, ai_keyword=ai_keyword)
505
+ else:
506
+ messages.append({"role": "user", "content": user_content})
498
507
  if images and self.clip_model_path: # Use the binding's current clip_model_path
499
508
  image_parts = []
500
509
  for img_path in images:
@@ -503,8 +512,7 @@ class LlamaCppServerBinding(LollmsLLMBinding):
503
512
  image_type = Path(img_path).suffix[1:].lower() or "png"; image_type = "jpeg" if image_type == "jpg" else image_type
504
513
  image_parts.append({"type": "image_url", "image_url": {"url": f"data:image/{image_type};base64,{encoded_string}"}})
505
514
  except Exception as ex: trace_exception(ex)
506
- user_content = [{"type": "text", "text": prompt}] + image_parts # type: ignore
507
- messages.append({"role": "user", "content": user_content})
515
+ messages[-1]["content"] =[{"type": "text", "text": messages[-1]["content"]}] + image_parts # type: ignore
508
516
  final_payload = {"messages": messages, "stream": stream, **payload_params}
509
517
  if 'n_predict' in final_payload: final_payload['max_tokens'] = final_payload.pop('n_predict')
510
518
  return final_payload
@@ -521,16 +529,57 @@ class LlamaCppServerBinding(LollmsLLMBinding):
521
529
  if image_data_list: final_payload["image_data"] = image_data_list
522
530
  return final_payload
523
531
 
524
- def generate_text(self, prompt: str, images: Optional[List[str]] = None, system_prompt: str = "",
525
- n_predict: Optional[int] = None, stream: bool = False, temperature: float = None,
526
- top_k: int = None, top_p: float = None, repeat_penalty: float = None,
527
- repeat_last_n: Optional[int] = None, seed: Optional[int] = None,
528
- streaming_callback: Optional[Callable[[str, int], bool]] = None,
529
- use_chat_format_override: Optional[bool] = None, **generation_kwargs) -> Union[str, Dict[str, any]]:
532
+
533
+ def generate_text(self,
534
+ prompt: str,
535
+ images: Optional[List[str]] = None,
536
+ system_prompt: str = "",
537
+ n_predict: Optional[int] = None,
538
+ stream: Optional[bool] = None,
539
+ temperature: float = 0.7, # Ollama default is 0.8, common default 0.7
540
+ top_k: int = 40, # Ollama default is 40
541
+ top_p: float = 0.9, # Ollama default is 0.9
542
+ repeat_penalty: float = 1.1, # Ollama default is 1.1
543
+ repeat_last_n: int = 64, # Ollama default is 64
544
+ seed: Optional[int] = None,
545
+ n_threads: Optional[int] = None,
546
+ ctx_size: int | None = None,
547
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
548
+ split:Optional[bool]=False, # put to true if the prompt is a discussion
549
+ user_keyword:Optional[str]="!@>user:",
550
+ ai_keyword:Optional[str]="!@>assistant:",
551
+ **generation_kwargs
552
+ ) -> Union[str, dict]:
553
+ """
554
+ Generate text using the active LLM binding, using instance defaults if parameters are not provided.
555
+
556
+ Args:
557
+ prompt (str): The input prompt for text generation.
558
+ images (Optional[List[str]]): List of image file paths for multimodal generation.
559
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
560
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
561
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
562
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
563
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
564
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
565
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
566
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
567
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
568
+ ctx_size (int | None): Context size override for this generation.
569
+ streaming_callback (Optional[Callable[[str, str], None]]): Callback function for streaming output.
570
+ - First parameter (str): The chunk of text received.
571
+ - Second parameter (str): The message type (e.g., MSG_TYPE.MSG_TYPE_CHUNK).
572
+ split:Optional[bool]: put to true if the prompt is a discussion
573
+ user_keyword:Optional[str]: when splitting we use this to extract user prompt
574
+ ai_keyword:Optional[str]": when splitting we use this to extract ai prompt
575
+
576
+ Returns:
577
+ Union[str, dict]: Generated text or error dictionary if failed.
578
+ """
530
579
  if not self.server_process or not self.server_process.is_healthy:
531
580
  return {"status": False, "error": "Llama.cpp server is not running or not healthy."}
532
581
 
533
- _use_chat_format = use_chat_format_override if use_chat_format_override is not None else (self.default_completion_format == ELF_COMPLETION_FORMAT.Chat)
582
+ _use_chat_format = True
534
583
  payload = self._prepare_generation_payload(
535
584
  prompt=prompt, system_prompt=system_prompt, n_predict=n_predict,
536
585
  temperature=temperature if temperature is not None else self.server_args.get("temperature",0.7),
@@ -539,7 +588,8 @@ class LlamaCppServerBinding(LollmsLLMBinding):
539
588
  repeat_penalty=repeat_penalty if repeat_penalty is not None else self.server_args.get("repeat_penalty",1.1),
540
589
  repeat_last_n=repeat_last_n if repeat_last_n is not None else self.server_args.get("repeat_last_n",64),
541
590
  seed=seed if seed is not None else self.server_args.get("seed", -1), stream=stream,
542
- use_chat_format=_use_chat_format, images=images, **generation_kwargs
591
+ use_chat_format=_use_chat_format, images=images,
592
+ split= split, user_keyword=user_keyword, ai_keyword=ai_keyword, **generation_kwargs
543
593
  )
544
594
  endpoint = "/v1/chat/completions" if _use_chat_format else "/completion"
545
595
  request_url = self._get_request_url(endpoint)