lollms-client 0.15.2__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- examples/generate_and_speak/generate_and_speak.py +251 -0
- examples/generate_game_sfx/generate_game_fx.py +240 -0
- examples/text_2_image.py +0 -1
- lollms_client/__init__.py +1 -1
- lollms_client/lollms_core.py +49 -29
- lollms_client/lollms_stt_binding.py +3 -15
- lollms_client/lollms_tti_binding.py +5 -29
- lollms_client/lollms_ttm_binding.py +5 -28
- lollms_client/lollms_tts_binding.py +4 -28
- lollms_client/lollms_ttv_binding.py +4 -28
- lollms_client/stt_bindings/lollms/__init__.py +5 -4
- lollms_client/stt_bindings/whisper/__init__.py +304 -0
- lollms_client/stt_bindings/whispercpp/__init__.py +380 -0
- lollms_client/tti_bindings/lollms/__init__.py +4 -6
- lollms_client/ttm_bindings/audiocraft/__init__.py +281 -0
- lollms_client/ttm_bindings/bark/__init__.py +339 -0
- lollms_client/tts_bindings/bark/__init__.py +336 -0
- lollms_client/tts_bindings/piper_tts/__init__.py +343 -0
- lollms_client/tts_bindings/xtts/__init__.py +317 -0
- lollms_client-0.16.0.dist-info/METADATA +183 -0
- {lollms_client-0.15.2.dist-info → lollms_client-0.16.0.dist-info}/RECORD +24 -15
- lollms_client-0.15.2.dist-info/METADATA +0 -192
- {lollms_client-0.15.2.dist-info → lollms_client-0.16.0.dist-info}/WHEEL +0 -0
- {lollms_client-0.15.2.dist-info → lollms_client-0.16.0.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.15.2.dist-info → lollms_client-0.16.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# lollms_client/examples/text_and_speech_demo/generate_and_speak.py
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import time
|
|
4
|
+
import argparse
|
|
5
|
+
|
|
6
|
+
# Ensure pygame is installed for this example
|
|
7
|
+
try:
|
|
8
|
+
import pipmaster as pm
|
|
9
|
+
pm.ensure_packages(["pygame"])
|
|
10
|
+
import pygame
|
|
11
|
+
PYGAME_AVAILABLE = True
|
|
12
|
+
except ImportError:
|
|
13
|
+
print("Pygame not found or pipmaster failed. Please install it manually: pip install pygame")
|
|
14
|
+
PYGAME_AVAILABLE = False
|
|
15
|
+
except Exception as e:
|
|
16
|
+
print(f"Could not ensure pygame: {e}")
|
|
17
|
+
PYGAME_AVAILABLE = False
|
|
18
|
+
|
|
19
|
+
from lollms_client import LollmsClient, MSG_TYPE
|
|
20
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
21
|
+
|
|
22
|
+
# --- Configuration ---
|
|
23
|
+
SPEECH_OUTPUT_DIR = Path(__file__).parent / "speech_output"
|
|
24
|
+
SPEECH_OUTPUT_DIR.mkdir(exist_ok=True)
|
|
25
|
+
|
|
26
|
+
# Default path for Piper voices relative to this example script for convenience
|
|
27
|
+
DEFAULT_PIPER_VOICES_SUBDIR = Path(__file__).parent / "piper_voices_for_demo"
|
|
28
|
+
DEFAULT_PIPER_VOICE_FILENAME = "en_US-lessac-medium.onnx" # A common, good quality English voice
|
|
29
|
+
|
|
30
|
+
def text_stream_callback(chunk: str, message_type: MSG_TYPE, params: dict = None, metadata: list = None) -> bool:
|
|
31
|
+
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
|
32
|
+
print(chunk, end="", flush=True)
|
|
33
|
+
elif message_type == MSG_TYPE.MSG_TYPE_STEP_START:
|
|
34
|
+
ASCIIColors.yellow(f"\n>> Starting step: {chunk}")
|
|
35
|
+
elif message_type == MSG_TYPE.MSG_TYPE_STEP_END:
|
|
36
|
+
ASCIIColors.green(f"\n<< Finished step: {chunk}")
|
|
37
|
+
return True
|
|
38
|
+
|
|
39
|
+
def ensure_default_piper_voice_for_demo(voices_dir: Path, voice_filename: str):
|
|
40
|
+
"""Helper to download a default Piper voice if not present for the demo."""
|
|
41
|
+
voices_dir.mkdir(exist_ok=True)
|
|
42
|
+
onnx_path = voices_dir / voice_filename
|
|
43
|
+
json_path = voices_dir / f"{voice_filename}.json"
|
|
44
|
+
|
|
45
|
+
if not onnx_path.exists() or not json_path.exists():
|
|
46
|
+
ASCIIColors.info(f"Default Piper test voice '{voice_filename}' not found in {voices_dir}. Attempting to download...")
|
|
47
|
+
try:
|
|
48
|
+
import requests
|
|
49
|
+
# Construct URLs (assuming en_US/lessac/medium structure)
|
|
50
|
+
voice_parts = voice_filename.split('-') # e.g., ['en_US', 'lessac', 'medium.onnx']
|
|
51
|
+
lang_code = voice_parts[0].split('_')[0] # en
|
|
52
|
+
voice_name_path = "/".join(voice_parts[0:2]) # en_US/lessac
|
|
53
|
+
quality_path = voice_parts[2].split('.')[0] # medium
|
|
54
|
+
|
|
55
|
+
# Base URL for Piper voices on Hugging Face
|
|
56
|
+
PIPER_VOICES_HF_BASE_URL = "https://huggingface.co/rhasspy/piper-voices/resolve/main/"
|
|
57
|
+
|
|
58
|
+
onnx_url = f"{PIPER_VOICES_HF_BASE_URL}{lang_code}/{voice_name_path}/{quality_path}/{voice_filename}"
|
|
59
|
+
json_url = f"{PIPER_VOICES_HF_BASE_URL}{lang_code}/{voice_name_path}/{quality_path}/{voice_filename}.json"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
if not onnx_path.exists():
|
|
63
|
+
ASCIIColors.info(f"Downloading {onnx_url} to {onnx_path}")
|
|
64
|
+
r_onnx = requests.get(onnx_url, stream=True)
|
|
65
|
+
r_onnx.raise_for_status()
|
|
66
|
+
with open(onnx_path, 'wb') as f:
|
|
67
|
+
for chunk in r_onnx.iter_content(chunk_size=8192): f.write(chunk)
|
|
68
|
+
|
|
69
|
+
if not json_path.exists():
|
|
70
|
+
ASCIIColors.info(f"Downloading {json_url} to {json_path}")
|
|
71
|
+
r_json = requests.get(json_url)
|
|
72
|
+
r_json.raise_for_status()
|
|
73
|
+
with open(json_path, 'w', encoding='utf-8') as f: f.write(r_json.text)
|
|
74
|
+
ASCIIColors.green(f"Default Piper test voice '{voice_filename}' downloaded successfully to {voices_dir}.")
|
|
75
|
+
return True
|
|
76
|
+
except Exception as e_download:
|
|
77
|
+
ASCIIColors.error(f"Failed to download default Piper test voice '{voice_filename}': {e_download}")
|
|
78
|
+
ASCIIColors.warning(f"Please manually download '{voice_filename}' and '{voice_filename}.json' "
|
|
79
|
+
f"from rhasspy.github.io/piper-voices/ or Hugging Face "
|
|
80
|
+
f"and place them in {voices_dir.resolve()}")
|
|
81
|
+
return False
|
|
82
|
+
return True
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def main():
|
|
86
|
+
parser = argparse.ArgumentParser(description="Generate text with an LLM and synthesize it to speech using LOLLMS.")
|
|
87
|
+
# LLM Arguments
|
|
88
|
+
parser.add_argument(
|
|
89
|
+
"--llm_binding", type=str, default="ollama", choices=["ollama", "openai", "lollms", "llamacpp", "pythonllamacpp", "transformers", "vllm"],
|
|
90
|
+
help="The LLM binding to use for text generation."
|
|
91
|
+
)
|
|
92
|
+
parser.add_argument(
|
|
93
|
+
"--llm_model", type=str, default="mistral",
|
|
94
|
+
help="Model name or path for the LLM binding."
|
|
95
|
+
)
|
|
96
|
+
parser.add_argument("--llm_host", type=str, default=None, help="Host address for server-based LLM bindings.")
|
|
97
|
+
parser.add_argument("--models_path", type=str, default=None, help="Path to models directory for local LLM bindings.")
|
|
98
|
+
parser.add_argument("--openai_key", type=str, default=None, help="OpenAI API key.")
|
|
99
|
+
|
|
100
|
+
# TTS Arguments
|
|
101
|
+
parser.add_argument(
|
|
102
|
+
"--tts_binding", type=str, default="bark", choices=["bark", "lollms", "xtts", "piper"],
|
|
103
|
+
help="The TTS binding to use for speech synthesis."
|
|
104
|
+
)
|
|
105
|
+
# Bark specific
|
|
106
|
+
parser.add_argument("--bark_model", type=str, default="suno/bark-small", help="Bark model ID for TTS.")
|
|
107
|
+
parser.add_argument("--bark_voice_preset", type=str, default="v2/en_speaker_6", help="Bark voice preset.")
|
|
108
|
+
# XTTS specific
|
|
109
|
+
parser.add_argument("--xtts_model", type=str, default="tts_models/multilingual/multi-dataset/xtts_v2", help="XTTS model identifier for Coqui TTS.")
|
|
110
|
+
parser.add_argument("--xtts_speaker_wav", type=str, default=None, help="Path to speaker WAV for XTTS voice cloning.")
|
|
111
|
+
parser.add_argument("--xtts_language", type=str, default="en", help="Language for XTTS.")
|
|
112
|
+
# Piper specific
|
|
113
|
+
parser.add_argument("--piper_default_voice_model_path", type=str, default=None, help="Path to the default .onnx Piper voice model.")
|
|
114
|
+
parser.add_argument("--piper_voices_dir", type=str, default=str(DEFAULT_PIPER_VOICES_SUBDIR), help="Directory containing Piper voice models.")
|
|
115
|
+
parser.add_argument("--piper_voice_file", type=str, default=DEFAULT_PIPER_VOICE_FILENAME, help="Filename of the Piper voice to use from piper_voices_dir (e.g., en_US-ryan-medium.onnx).")
|
|
116
|
+
|
|
117
|
+
# Common TTS/LLM args
|
|
118
|
+
parser.add_argument("--tts_host", type=str, default=None, help="Host address for server-based TTS bindings (e.g., lollms TTS).")
|
|
119
|
+
parser.add_argument("--device", type=str, default=None, choices=["cpu", "cuda", "mps", None], help="Device for local TTS/LLM models.")
|
|
120
|
+
args = parser.parse_args()
|
|
121
|
+
|
|
122
|
+
ASCIIColors.red("--- LOLLMS Text Generation and Speech Synthesis Demo ---")
|
|
123
|
+
ASCIIColors.info(f"Using LLM Binding: {args.llm_binding} (Model: {args.llm_model})")
|
|
124
|
+
ASCIIColors.info(f"Using TTS Binding: {args.tts_binding}")
|
|
125
|
+
if args.tts_binding == "bark":
|
|
126
|
+
ASCIIColors.info(f" Bark Model: {args.bark_model}, Voice Preset: {args.bark_voice_preset}")
|
|
127
|
+
elif args.tts_binding == "xtts":
|
|
128
|
+
ASCIIColors.info(f" XTTS Model: {args.xtts_model}, Speaker WAV: {args.xtts_speaker_wav or 'Default in binding'}, Lang: {args.xtts_language}")
|
|
129
|
+
elif args.tts_binding == "piper":
|
|
130
|
+
ASCIIColors.info(f" Piper Voices Dir: {args.piper_voices_dir}, Voice File: {args.piper_voice_file}")
|
|
131
|
+
# Ensure default Piper voice for demo if Piper is selected and no specific default path is given
|
|
132
|
+
if not args.piper_default_voice_model_path:
|
|
133
|
+
ensure_default_piper_voice_for_demo(Path(args.piper_voices_dir), args.piper_voice_file)
|
|
134
|
+
args.piper_default_voice_model_path = str(Path(args.piper_voices_dir) / args.piper_voice_file)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
llm_binding_config = {}
|
|
138
|
+
if args.llm_binding == "openai" and args.openai_key: llm_binding_config["service_key"] = args.openai_key
|
|
139
|
+
elif args.llm_binding in ["llamacpp", "pythonllamacpp", "transformers", "vllm"]:
|
|
140
|
+
if args.device: llm_binding_config["device"] = args.device
|
|
141
|
+
if args.llm_binding == "pythonllamacpp": llm_binding_config["n_gpu_layers"] = -1 if args.device == "cuda" else 0
|
|
142
|
+
|
|
143
|
+
tts_binding_config = {"device": args.device}
|
|
144
|
+
if args.tts_binding == "bark":
|
|
145
|
+
tts_binding_config["model_name"] = args.bark_model
|
|
146
|
+
tts_binding_config["default_voice"] = args.bark_voice_preset
|
|
147
|
+
elif args.tts_binding == "xtts":
|
|
148
|
+
tts_binding_config["model_name"] = args.xtts_model
|
|
149
|
+
tts_binding_config["default_speaker_wav"] = args.xtts_speaker_wav
|
|
150
|
+
tts_binding_config["default_language"] = args.xtts_language
|
|
151
|
+
elif args.tts_binding == "piper":
|
|
152
|
+
tts_binding_config["default_voice_model_path"] = args.piper_default_voice_model_path
|
|
153
|
+
tts_binding_config["piper_voices_dir"] = args.piper_voices_dir
|
|
154
|
+
elif args.tts_binding == "lollms":
|
|
155
|
+
tts_binding_config["model_name"] = "default_lollms_voice" # Placeholder, server handles actual voice
|
|
156
|
+
|
|
157
|
+
lollms_client = None
|
|
158
|
+
try:
|
|
159
|
+
ASCIIColors.magenta("Initializing LollmsClient...")
|
|
160
|
+
lollms_client = LollmsClient(
|
|
161
|
+
binding_name=args.llm_binding, model_name=args.llm_model,
|
|
162
|
+
host_address=args.llm_host, models_path=args.models_path,
|
|
163
|
+
llm_binding_config=llm_binding_config,
|
|
164
|
+
tts_binding_name=args.tts_binding, tts_host_address=args.tts_host,
|
|
165
|
+
tts_binding_config=tts_binding_config,
|
|
166
|
+
verify_ssl_certificate=False
|
|
167
|
+
)
|
|
168
|
+
ASCIIColors.green("LollmsClient initialized.")
|
|
169
|
+
except Exception as e:
|
|
170
|
+
ASCIIColors.error(f"Failed to initialize LollmsClient: {e}"); trace_exception(e)
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
generated_text = ""
|
|
174
|
+
text_prompt = "Craft a very short, cheerful message about the joy of discovery."
|
|
175
|
+
ASCIIColors.cyan(f"\n--- Generating Text (Prompt: '{text_prompt[:50]}...') ---")
|
|
176
|
+
if not lollms_client.binding:
|
|
177
|
+
ASCIIColors.error("LLM binding not available."); return
|
|
178
|
+
try:
|
|
179
|
+
print(f"{ASCIIColors.YELLOW}AI is thinking: {ASCIIColors.RESET}", end="")
|
|
180
|
+
generated_text = lollms_client.generate_text(
|
|
181
|
+
prompt=text_prompt, n_predict=100, stream=True,
|
|
182
|
+
streaming_callback=text_stream_callback, temperature=0.7
|
|
183
|
+
)
|
|
184
|
+
print("\n"); ASCIIColors.green("Text generation complete.")
|
|
185
|
+
ASCIIColors.magenta("Generated Text:\n"); ASCIIColors.yellow(generated_text)
|
|
186
|
+
except Exception as e:
|
|
187
|
+
ASCIIColors.error(f"Text generation failed: {e}"); trace_exception(e); return
|
|
188
|
+
if not generated_text:
|
|
189
|
+
ASCIIColors.warning("LLM did not generate any text."); return
|
|
190
|
+
|
|
191
|
+
speech_file_path = None
|
|
192
|
+
ASCIIColors.cyan(f"\n--- Synthesizing Speech (using {args.tts_binding}) ---")
|
|
193
|
+
if not lollms_client.tts:
|
|
194
|
+
ASCIIColors.error("TTS binding not available."); return
|
|
195
|
+
try:
|
|
196
|
+
tts_call_kwargs = {}
|
|
197
|
+
if args.tts_binding == "bark":
|
|
198
|
+
# For Bark, 'voice' in generate_audio is the voice_preset.
|
|
199
|
+
# If not using the default from init, pass it here.
|
|
200
|
+
# tts_call_kwargs['voice'] = args.bark_voice_preset
|
|
201
|
+
pass # Uses default_voice from init if args.bark_voice_preset not specified to override
|
|
202
|
+
elif args.tts_binding == "xtts":
|
|
203
|
+
tts_call_kwargs['language'] = args.xtts_language
|
|
204
|
+
# 'voice' for XTTS is the speaker_wav path. If not using default from init, pass here.
|
|
205
|
+
# tts_call_kwargs['voice'] = args.xtts_speaker_wav
|
|
206
|
+
elif args.tts_binding == "piper":
|
|
207
|
+
# 'voice' for Piper is the .onnx filename.
|
|
208
|
+
tts_call_kwargs['voice'] = args.piper_voice_file
|
|
209
|
+
# Example Piper specific param:
|
|
210
|
+
# tts_call_kwargs['length_scale'] = 1.0
|
|
211
|
+
|
|
212
|
+
audio_bytes = lollms_client.tts.generate_audio(text=generated_text, **tts_call_kwargs)
|
|
213
|
+
|
|
214
|
+
if audio_bytes:
|
|
215
|
+
filename_stem = f"speech_output_{args.llm_binding}_{args.tts_binding}"
|
|
216
|
+
speech_file_path = SPEECH_OUTPUT_DIR / f"{filename_stem.replace('/', '_')}.wav"
|
|
217
|
+
with open(speech_file_path, "wb") as f: f.write(audio_bytes)
|
|
218
|
+
ASCIIColors.green(f"Speech synthesized and saved to: {speech_file_path}")
|
|
219
|
+
elif args.tts_binding == "lollms":
|
|
220
|
+
ASCIIColors.warning("LOLLMS TTS binding returned empty bytes. Server might have saved file if 'fn' was used.")
|
|
221
|
+
speech_file_path = None
|
|
222
|
+
else:
|
|
223
|
+
ASCIIColors.warning("Speech synthesis returned empty bytes."); speech_file_path = None
|
|
224
|
+
except Exception as e:
|
|
225
|
+
ASCIIColors.error(f"Speech synthesis failed: {e}"); trace_exception(e); return
|
|
226
|
+
|
|
227
|
+
if speech_file_path and PYGAME_AVAILABLE:
|
|
228
|
+
ASCIIColors.magenta("\n--- Playing Synthesized Speech ---")
|
|
229
|
+
try:
|
|
230
|
+
pygame.mixer.init()
|
|
231
|
+
speech_sound = pygame.mixer.Sound(str(speech_file_path))
|
|
232
|
+
ASCIIColors.cyan("Playing audio... Press Ctrl+C in console to stop playback early.")
|
|
233
|
+
speech_sound.play()
|
|
234
|
+
while pygame.mixer.get_busy():
|
|
235
|
+
pygame.time.Clock().tick(10)
|
|
236
|
+
for event in pygame.event.get():
|
|
237
|
+
if event.type == pygame.QUIT: pygame.mixer.stop(); break
|
|
238
|
+
ASCIIColors.green("Playback finished.")
|
|
239
|
+
except pygame.error as e: ASCIIColors.warning(f"Could not play audio with pygame: {e}")
|
|
240
|
+
except KeyboardInterrupt: pygame.mixer.stop(); ASCIIColors.yellow("\nPlayback interrupted.")
|
|
241
|
+
finally: pygame.quit()
|
|
242
|
+
elif not PYGAME_AVAILABLE:
|
|
243
|
+
ASCIIColors.warning("Pygame is not available for playback.")
|
|
244
|
+
if speech_file_path: ASCIIColors.info(f"Generated speech: {speech_file_path.resolve()}")
|
|
245
|
+
elif not speech_file_path:
|
|
246
|
+
ASCIIColors.warning("No speech file generated/path unknown. Skipping playback.")
|
|
247
|
+
|
|
248
|
+
ASCIIColors.red("\n--- Demo Finished ---")
|
|
249
|
+
|
|
250
|
+
if __name__ == "__main__":
|
|
251
|
+
main()
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# lollms_client/examples/game_sfx_generation/generate_game_sfx.py
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import time
|
|
4
|
+
import argparse # For command-line arguments
|
|
5
|
+
|
|
6
|
+
# Ensure pygame is installed for this example
|
|
7
|
+
try:
|
|
8
|
+
import pipmaster as pm
|
|
9
|
+
pm.ensure_packages(["pygame"])
|
|
10
|
+
import pygame
|
|
11
|
+
PYGAME_AVAILABLE = True
|
|
12
|
+
except ImportError:
|
|
13
|
+
print("Pygame not found or pipmaster failed. Please install it manually: pip install pygame")
|
|
14
|
+
PYGAME_AVAILABLE = False
|
|
15
|
+
except Exception as e:
|
|
16
|
+
print(f"Could not ensure pygame: {e}")
|
|
17
|
+
PYGAME_AVAILABLE = False
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
from lollms_client import LollmsClient # Removed LollmsDiscussion, LollmsMessage as not used
|
|
21
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
22
|
+
|
|
23
|
+
# --- Configuration ---
|
|
24
|
+
# Output directory for generated sound effects
|
|
25
|
+
SFX_OUTPUT_DIR = Path(__file__).parent / "sfx_output"
|
|
26
|
+
SFX_OUTPUT_DIR.mkdir(exist_ok=True)
|
|
27
|
+
|
|
28
|
+
# Sound effect descriptions. Note: 'duration' is more relevant for audiocraft.
|
|
29
|
+
# Bark's duration is more implicit based on prompt content.
|
|
30
|
+
SOUND_EFFECTS_TO_GENERATE = [
|
|
31
|
+
{
|
|
32
|
+
"filename": "sfx_crunch",
|
|
33
|
+
"prompt": "Sound effect of a single, sharp, dry crunch, like stepping on a crisp autumn leaf or a piece of dry wood breaking.",
|
|
34
|
+
"duration": 1, # audiocraft specific
|
|
35
|
+
"bark_params": {"fine_temperature": 0.4, "coarse_temperature": 0.6} # Example bark specific
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"filename": "sfx_death_electronic",
|
|
39
|
+
"prompt": "Short, impactful electronic death sound effect for a video game character, like a quick digital zap or a brief power-down sound.",
|
|
40
|
+
"duration": 1.5,
|
|
41
|
+
"bark_params": {"voice_preset": None} # Try without preset for more raw SFX
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"filename": "sfx_powerup_positive",
|
|
45
|
+
"prompt": "Bright, positive, short power-up collection sound effect, like a magical chime, a sparkling shimmer, or an uplifting notification. [SFX]",
|
|
46
|
+
"duration": 1.5
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"filename": "sfx_laser_shot",
|
|
50
|
+
"prompt": "Sound effect of a futuristic laser gun firing a single shot, a quick 'pew' sound. [SFX: laser pew]",
|
|
51
|
+
"duration": 0.5
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"filename": "sfx_coin_collect",
|
|
55
|
+
"prompt": "Classic video game coin collection sound effect, a short, metallic, cheerful 'ding' or 'jingle'. [SFX: coin]",
|
|
56
|
+
"duration": 0.7
|
|
57
|
+
}
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
def generate_sfx(lollms_client: LollmsClient, sfx_info: dict) -> Path | None:
|
|
61
|
+
"""Generates a single sound effect using the LollmsClient's TTM binding."""
|
|
62
|
+
filename_stem = sfx_info["filename"]
|
|
63
|
+
prompt = sfx_info["prompt"]
|
|
64
|
+
|
|
65
|
+
# Default output format
|
|
66
|
+
output_format = "wav" # WAV is generally best for SFX in pygame
|
|
67
|
+
output_path = SFX_OUTPUT_DIR / f"{filename_stem}_{lollms_client.ttm.binding_name}.{output_format}" # Add binding name to filename
|
|
68
|
+
|
|
69
|
+
ASCIIColors.cyan(f"\nGenerating SFX using '{lollms_client.ttm.binding_name}': '{filename_stem}'")
|
|
70
|
+
ASCIIColors.info(f"Prompt: '{prompt[:60]}...'")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
if not lollms_client.ttm:
|
|
74
|
+
ASCIIColors.error("TTM (Text-to-Music/Sound) binding is not available in LollmsClient.")
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
ttm_params = {"progress": True} # Common param for both
|
|
78
|
+
|
|
79
|
+
if lollms_client.ttm.binding_name == "audiocraft":
|
|
80
|
+
ttm_params["duration"] = sfx_info.get("duration", 1.0)
|
|
81
|
+
ttm_params["temperature"] = sfx_info.get("audiocraft_temperature", 1.0)
|
|
82
|
+
ttm_params["cfg_coef"] = sfx_info.get("audiocraft_cfg_coef", 3.0)
|
|
83
|
+
ASCIIColors.info(f"AudioCraft Params: duration={ttm_params['duration']}, temp={ttm_params['temperature']}, cfg={ttm_params['cfg_coef']}")
|
|
84
|
+
elif lollms_client.ttm.binding_name == "bark":
|
|
85
|
+
# Bark duration is implicit. Parameters are different.
|
|
86
|
+
bark_specific_params = sfx_info.get("bark_params", {})
|
|
87
|
+
ttm_params["voice_preset"] = bark_specific_params.get("voice_preset", None) # None might be good for SFX
|
|
88
|
+
ttm_params["fine_temperature"] = bark_specific_params.get("fine_temperature", 0.5)
|
|
89
|
+
ttm_params["coarse_temperature"] = bark_specific_params.get("coarse_temperature", 0.7)
|
|
90
|
+
ASCIIColors.info(f"Bark Params: preset={ttm_params['voice_preset']}, fine_temp={ttm_params['fine_temperature']}, coarse_temp={ttm_params['coarse_temperature']}")
|
|
91
|
+
else:
|
|
92
|
+
ASCIIColors.warning(f"Unknown TTM binding '{lollms_client.ttm.binding_name}'. Using generic parameters.")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
music_bytes = lollms_client.ttm.generate_music(prompt=prompt, **ttm_params)
|
|
97
|
+
|
|
98
|
+
if music_bytes:
|
|
99
|
+
with open(output_path, "wb") as f:
|
|
100
|
+
f.write(music_bytes)
|
|
101
|
+
ASCIIColors.green(f"SFX '{filename_stem}' ({lollms_client.ttm.binding_name}) saved to: {output_path}")
|
|
102
|
+
return output_path
|
|
103
|
+
else:
|
|
104
|
+
ASCIIColors.warning(f"SFX generation for '{filename_stem}' ({lollms_client.ttm.binding_name}) returned empty bytes.")
|
|
105
|
+
return None
|
|
106
|
+
except Exception as e:
|
|
107
|
+
ASCIIColors.error(f"Error generating SFX '{filename_stem}' ({lollms_client.ttm.binding_name}): {e}")
|
|
108
|
+
trace_exception(e)
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
def main():
|
|
112
|
+
parser = argparse.ArgumentParser(description="Generate game sound effects using LOLLMS TTM bindings.")
|
|
113
|
+
parser.add_argument(
|
|
114
|
+
"--ttm_binding",
|
|
115
|
+
type=str,
|
|
116
|
+
choices=["audiocraft", "bark"],
|
|
117
|
+
default="bark", # Default to audiocraft
|
|
118
|
+
help="The TTM binding to use for generation."
|
|
119
|
+
)
|
|
120
|
+
parser.add_argument(
|
|
121
|
+
"--audiocraft_model",
|
|
122
|
+
type=str,
|
|
123
|
+
default="facebook/musicgen-small",
|
|
124
|
+
help="Hugging Face model ID for AudioCraft (e.g., facebook/musicgen-small, facebook/musicgen-melody)."
|
|
125
|
+
)
|
|
126
|
+
parser.add_argument(
|
|
127
|
+
"--bark_model",
|
|
128
|
+
type=str,
|
|
129
|
+
default="suno/bark-small",
|
|
130
|
+
help="Hugging Face model ID for Bark (e.g., suno/bark-small, suno/bark)."
|
|
131
|
+
)
|
|
132
|
+
parser.add_argument(
|
|
133
|
+
"--device",
|
|
134
|
+
type=str,
|
|
135
|
+
default=None, # Auto-detect
|
|
136
|
+
choices=["cpu", "cuda", "mps", None],
|
|
137
|
+
help="Device to run the TTM model on (cpu, cuda, mps, or auto-detect)."
|
|
138
|
+
)
|
|
139
|
+
args = parser.parse_args()
|
|
140
|
+
|
|
141
|
+
ASCIIColors.red(f"--- LOLLMS Game SFX Generation Example (Using: {args.ttm_binding}) ---")
|
|
142
|
+
|
|
143
|
+
ttm_binding_config = {"device": args.device} # Common device config
|
|
144
|
+
if args.ttm_binding == "audiocraft":
|
|
145
|
+
ttm_binding_config["model_name"] = args.audiocraft_model
|
|
146
|
+
ttm_binding_config["output_format"] = "wav" # Audiocraft binding defaults to wav for bytes
|
|
147
|
+
elif args.ttm_binding == "bark":
|
|
148
|
+
ttm_binding_config["model_name"] = args.bark_model
|
|
149
|
+
# Bark binding currently outputs WAV by default for bytes
|
|
150
|
+
else:
|
|
151
|
+
ASCIIColors.error(f"Unsupported TTM binding: {args.ttm_binding}")
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
ASCIIColors.magenta(f"Initializing LollmsClient with {args.ttm_binding} for TTM...")
|
|
156
|
+
lollms_client = LollmsClient(
|
|
157
|
+
binding_name="lollms", # Can be a dummy if only using TTM
|
|
158
|
+
ttm_binding_name=args.ttm_binding,
|
|
159
|
+
ttm_binding_config=ttm_binding_config
|
|
160
|
+
)
|
|
161
|
+
ASCIIColors.green("LollmsClient initialized.")
|
|
162
|
+
except Exception as e:
|
|
163
|
+
ASCIIColors.error(f"Failed to initialize LollmsClient: {e}")
|
|
164
|
+
trace_exception(e)
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
if not lollms_client.ttm:
|
|
168
|
+
ASCIIColors.error(f"{args.ttm_binding.capitalize()} TTM binding could not be loaded. Exiting.")
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
generated_sfx_paths = {}
|
|
172
|
+
for sfx_info_item in SOUND_EFFECTS_TO_GENERATE:
|
|
173
|
+
sfx_path = generate_sfx(lollms_client, sfx_info_item)
|
|
174
|
+
if sfx_path:
|
|
175
|
+
generated_sfx_paths[sfx_info_item["filename"]] = {
|
|
176
|
+
"path": sfx_path,
|
|
177
|
+
"binding": args.ttm_binding # Store which binding generated it
|
|
178
|
+
}
|
|
179
|
+
time.sleep(0.5) # Small delay
|
|
180
|
+
|
|
181
|
+
ASCIIColors.red("\n--- SFX Generation Complete ---")
|
|
182
|
+
if not generated_sfx_paths:
|
|
183
|
+
ASCIIColors.warning("No sound effects were successfully generated.")
|
|
184
|
+
return
|
|
185
|
+
|
|
186
|
+
if not PYGAME_AVAILABLE:
|
|
187
|
+
ASCIIColors.warning("Pygame is not available. Skipping sound playback demo.")
|
|
188
|
+
ASCIIColors.info(f"Generated SFX can be found in: {SFX_OUTPUT_DIR.resolve()}")
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
ASCIIColors.magenta("\n--- Pygame SFX Playback Demo ---")
|
|
192
|
+
pygame.mixer.init()
|
|
193
|
+
game_sounds = {}
|
|
194
|
+
sfx_playback_order = [] # To map number keys to sounds
|
|
195
|
+
|
|
196
|
+
for filename_stem, sfx_data in generated_sfx_paths.items():
|
|
197
|
+
path = sfx_data["path"]
|
|
198
|
+
binding_used = sfx_data["binding"]
|
|
199
|
+
playback_name = f"{filename_stem} ({binding_used})"
|
|
200
|
+
try:
|
|
201
|
+
sound = pygame.mixer.Sound(str(path))
|
|
202
|
+
game_sounds[playback_name] = sound
|
|
203
|
+
sfx_playback_order.append(playback_name)
|
|
204
|
+
ASCIIColors.green(f"Loaded '{path.name}' into pygame as '{playback_name}'.")
|
|
205
|
+
except pygame.error as e:
|
|
206
|
+
ASCIIColors.warning(f"Could not load sound '{path.name}' into pygame: {e}")
|
|
207
|
+
|
|
208
|
+
if not game_sounds:
|
|
209
|
+
ASCIIColors.warning("No sounds loaded into pygame. Exiting demo.")
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
print("\nInstructions:")
|
|
213
|
+
for i, sfx_name_to_play in enumerate(sfx_playback_order):
|
|
214
|
+
print(f" Press key '{i+1}' to play: {sfx_name_to_play}")
|
|
215
|
+
print(" Press 'Q' to quit the demo.")
|
|
216
|
+
|
|
217
|
+
pygame.display.set_mode((400, 200))
|
|
218
|
+
pygame.display.set_caption(f"SFX Player ({args.ttm_binding.capitalize()})")
|
|
219
|
+
|
|
220
|
+
running = True
|
|
221
|
+
while running:
|
|
222
|
+
for event in pygame.event.get():
|
|
223
|
+
if event.type == pygame.QUIT: running = False
|
|
224
|
+
if event.type == pygame.KEYDOWN:
|
|
225
|
+
if event.key == pygame.K_q: running = False
|
|
226
|
+
for i in range(len(sfx_playback_order)):
|
|
227
|
+
if event.key == getattr(pygame, f"K_{i+1}", None): # Check if K_i+1 exists
|
|
228
|
+
sfx_name_to_play = sfx_playback_order[i]
|
|
229
|
+
if sfx_name_to_play in game_sounds:
|
|
230
|
+
ASCIIColors.cyan(f"Playing: {sfx_name_to_play}")
|
|
231
|
+
game_sounds[sfx_name_to_play].play()
|
|
232
|
+
break
|
|
233
|
+
pygame.time.Clock().tick(30)
|
|
234
|
+
|
|
235
|
+
pygame.quit()
|
|
236
|
+
ASCIIColors.red("--- Demo Finished ---")
|
|
237
|
+
ASCIIColors.info(f"Generated SFX are in: {SFX_OUTPUT_DIR.resolve()}")
|
|
238
|
+
|
|
239
|
+
if __name__ == "__main__":
|
|
240
|
+
main()
|
examples/text_2_image.py
CHANGED
|
@@ -15,7 +15,6 @@ LOLLMS_CLIENT_ID = "my_lollms_client_id" # Replace with your actual client ID or
|
|
|
15
15
|
# Initialize LollmsClient, enabling the TTI 'lollms' binding
|
|
16
16
|
# The service_key here is used as client_id by the TTI binding for lollms
|
|
17
17
|
lc = LollmsClient(
|
|
18
|
-
host_address="http://localhost:9600",
|
|
19
18
|
tti_binding_name="lollms"
|
|
20
19
|
)
|
|
21
20
|
|
lollms_client/__init__.py
CHANGED
|
@@ -6,7 +6,7 @@ from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
|
|
|
6
6
|
from lollms_client.lollms_utilities import PromptReshaper # Keep general utilities
|
|
7
7
|
from lollms_client.lollms_functions import FunctionCalling_Library
|
|
8
8
|
|
|
9
|
-
__version__ = "0.
|
|
9
|
+
__version__ = "0.16.0"
|
|
10
10
|
|
|
11
11
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
12
12
|
__all__ = [
|
lollms_client/lollms_core.py
CHANGED
|
@@ -48,6 +48,13 @@ class LollmsClient():
|
|
|
48
48
|
ttv_bindings_dir: Path = Path(__file__).parent / "ttv_bindings",
|
|
49
49
|
ttm_bindings_dir: Path = Path(__file__).parent / "ttm_bindings",
|
|
50
50
|
|
|
51
|
+
# Configurations
|
|
52
|
+
tts_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
|
|
53
|
+
tti_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
|
|
54
|
+
stt_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
|
|
55
|
+
ttv_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
|
|
56
|
+
ttm_binding_config: Optional[Dict[str, any]] = None, # Renamed for clarity
|
|
57
|
+
|
|
51
58
|
# General Parameters (mostly defaults for LLM generation)
|
|
52
59
|
service_key: Optional[str] = None, # Shared service key/client_id
|
|
53
60
|
verify_ssl_certificate: bool = True,
|
|
@@ -84,6 +91,11 @@ class LollmsClient():
|
|
|
84
91
|
stt_bindings_dir (Path): Directory for STT bindings.
|
|
85
92
|
ttv_bindings_dir (Path): Directory for TTV bindings.
|
|
86
93
|
ttm_bindings_dir (Path): Directory for TTM bindings.
|
|
94
|
+
tts_binding_config (Optional[Dict]): Additional config for the TTS binding.
|
|
95
|
+
tti_binding_config (Optional[Dict]): Additional config for the TTI binding.
|
|
96
|
+
stt_binding_config (Optional[Dict]): Additional config for the STT binding.
|
|
97
|
+
ttv_binding_config (Optional[Dict]): Additional config for the TTV binding.
|
|
98
|
+
ttm_binding_config (Optional[Dict]): Additional config for the TTM binding.
|
|
87
99
|
service_key (Optional[str]): Shared authentication key or client_id.
|
|
88
100
|
verify_ssl_certificate (bool): Whether to verify SSL certificates.
|
|
89
101
|
ctx_size (Optional[int]): Default context size for LLM.
|
|
@@ -144,54 +156,62 @@ class LollmsClient():
|
|
|
144
156
|
if tts_binding_name:
|
|
145
157
|
self.tts = self.tts_binding_manager.create_binding(
|
|
146
158
|
binding_name=tts_binding_name,
|
|
147
|
-
|
|
148
|
-
service_key=self.service_key,
|
|
149
|
-
verify_ssl_certificate=self.verify_ssl_certificate
|
|
159
|
+
**tts_binding_config
|
|
150
160
|
)
|
|
151
161
|
if self.tts is None:
|
|
152
162
|
ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
|
|
153
163
|
|
|
154
164
|
if tti_binding_name:
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
165
|
+
if tti_binding_config:
|
|
166
|
+
self.tti = self.tti_binding_manager.create_binding(
|
|
167
|
+
binding_name=tti_binding_name,
|
|
168
|
+
**tti_binding_config
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
self.tti = self.tti_binding_manager.create_binding(
|
|
172
|
+
binding_name=tti_binding_name
|
|
173
|
+
)
|
|
161
174
|
if self.tti is None:
|
|
162
175
|
ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
|
|
163
176
|
|
|
164
177
|
if stt_binding_name:
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
178
|
+
if stt_binding_config:
|
|
179
|
+
self.stt = self.stt_binding_manager.create_binding(
|
|
180
|
+
binding_name=stt_binding_name,
|
|
181
|
+
**stt_binding_config
|
|
182
|
+
)
|
|
183
|
+
else:
|
|
184
|
+
self.stt = self.stt_binding_manager.create_binding(
|
|
185
|
+
binding_name=stt_binding_name,
|
|
186
|
+
)
|
|
171
187
|
if self.stt is None:
|
|
172
188
|
ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
|
|
173
|
-
|
|
174
189
|
if ttv_binding_name:
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
190
|
+
if ttv_binding_config:
|
|
191
|
+
self.ttv = self.ttv_binding_manager.create_binding(
|
|
192
|
+
binding_name=ttv_binding_name,
|
|
193
|
+
**ttv_binding_config
|
|
194
|
+
)
|
|
195
|
+
else:
|
|
196
|
+
self.ttv = self.ttv_binding_manager.create_binding(
|
|
197
|
+
binding_name=ttv_binding_name
|
|
198
|
+
)
|
|
181
199
|
if self.ttv is None:
|
|
182
200
|
ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
|
|
183
201
|
|
|
184
202
|
if ttm_binding_name:
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
203
|
+
if ttm_binding_config:
|
|
204
|
+
self.ttm = self.ttm_binding_manager.create_binding(
|
|
205
|
+
binding_name=ttm_binding_name,
|
|
206
|
+
**ttm_binding_config
|
|
207
|
+
)
|
|
208
|
+
else:
|
|
209
|
+
self.ttm = self.ttm_binding_manager.create_binding(
|
|
210
|
+
binding_name=ttm_binding_name
|
|
211
|
+
)
|
|
191
212
|
if self.ttm is None:
|
|
192
213
|
ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
|
|
193
214
|
|
|
194
|
-
|
|
195
215
|
# --- Store Default Generation Parameters ---
|
|
196
216
|
self.default_ctx_size = ctx_size
|
|
197
217
|
self.default_n_predict = n_predict
|
|
@@ -9,26 +9,14 @@ class LollmsSTTBinding(ABC):
|
|
|
9
9
|
"""Abstract base class for all LOLLMS Speech-to-Text bindings."""
|
|
10
10
|
|
|
11
11
|
def __init__(self,
|
|
12
|
-
|
|
13
|
-
model_name: Optional[str] = None, # Can represent a default model
|
|
14
|
-
service_key: Optional[str] = None,
|
|
15
|
-
verify_ssl_certificate: bool = True):
|
|
12
|
+
binding_name:str="unknown"):
|
|
16
13
|
"""
|
|
17
14
|
Initialize the LollmsSTTBinding base class.
|
|
18
15
|
|
|
19
16
|
Args:
|
|
20
|
-
|
|
21
|
-
model_name (Optional[str]): A default identifier for the STT model.
|
|
22
|
-
service_key (Optional[str]): Authentication key for the service.
|
|
23
|
-
verify_ssl_certificate (bool): Whether to verify SSL certificates.
|
|
17
|
+
binding_name (Optional[str]): The binding name
|
|
24
18
|
"""
|
|
25
|
-
|
|
26
|
-
self.host_address = host_address.rstrip('/')
|
|
27
|
-
else:
|
|
28
|
-
self.host_address = None
|
|
29
|
-
self.model_name = model_name
|
|
30
|
-
self.service_key = service_key
|
|
31
|
-
self.verify_ssl_certificate = verify_ssl_certificate
|
|
19
|
+
self.binding_name = binding_name
|
|
32
20
|
|
|
33
21
|
@abstractmethod
|
|
34
22
|
def transcribe_audio(self, audio_path: Union[str, Path], model: Optional[str] = None, **kwargs) -> str:
|