atom-audio-engine 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {atom_audio_engine-0.1.0.dist-info → atom_audio_engine-0.1.2.dist-info}/METADATA +10 -5
  2. atom_audio_engine-0.1.2.dist-info/RECORD +57 -0
  3. atom_audio_engine-0.1.2.dist-info/top_level.txt +1 -0
  4. audio_engine/__init__.py +80 -0
  5. audio_engine/examples/__init__.py +1 -0
  6. audio_engine/examples/basic_stt_llm_tts.py +200 -0
  7. audio_engine/examples/geneface_animation.py +99 -0
  8. audio_engine/examples/personaplex_pipeline.py +116 -0
  9. audio_engine/examples/websocket_server.py +86 -0
  10. audio_engine/pipelines/personaplex/__init__.py +41 -0
  11. audio_engine/pipelines/personaplex/client.py +259 -0
  12. audio_engine/pipelines/personaplex/config.py +69 -0
  13. audio_engine/pipelines/personaplex/pipeline.py +301 -0
  14. audio_engine/pipelines/personaplex/types.py +173 -0
  15. audio_engine/pipelines/personaplex/utils.py +192 -0
  16. audio_engine/scripts/debug_pipeline.py +79 -0
  17. audio_engine/scripts/debug_tts.py +162 -0
  18. audio_engine/scripts/test_cartesia_connect.py +57 -0
  19. audio_engine/tests/__init__.py +1 -0
  20. audio_engine/tests/test_personaplex/__init__.py +1 -0
  21. audio_engine/tests/test_personaplex/test_personaplex.py +10 -0
  22. audio_engine/tests/test_personaplex/test_personaplex_client.py +259 -0
  23. audio_engine/tests/test_personaplex/test_personaplex_config.py +71 -0
  24. audio_engine/tests/test_personaplex/test_personaplex_message.py +80 -0
  25. audio_engine/tests/test_personaplex/test_personaplex_pipeline.py +226 -0
  26. audio_engine/tests/test_personaplex/test_personaplex_session.py +184 -0
  27. audio_engine/tests/test_personaplex/test_personaplex_transcript.py +184 -0
  28. audio_engine/tests/test_traditional_pipeline/__init__.py +1 -0
  29. audio_engine/tests/test_traditional_pipeline/test_cartesia_asr.py +474 -0
  30. audio_engine/tests/test_traditional_pipeline/test_config_env.py +97 -0
  31. audio_engine/tests/test_traditional_pipeline/test_conversation_context.py +115 -0
  32. audio_engine/tests/test_traditional_pipeline/test_pipeline_creation.py +64 -0
  33. audio_engine/tests/test_traditional_pipeline/test_pipeline_with_mocks.py +173 -0
  34. audio_engine/tests/test_traditional_pipeline/test_provider_factories.py +61 -0
  35. audio_engine/tests/test_traditional_pipeline/test_websocket_server.py +58 -0
  36. atom_audio_engine-0.1.0.dist-info/RECORD +0 -25
  37. atom_audio_engine-0.1.0.dist-info/top_level.txt +0 -8
  38. {atom_audio_engine-0.1.0.dist-info → atom_audio_engine-0.1.2.dist-info}/WHEEL +0 -0
  39. {asr → audio_engine/asr}/__init__.py +0 -0
  40. {asr → audio_engine/asr}/base.py +0 -0
  41. {asr → audio_engine/asr}/cartesia.py +0 -0
  42. {asr → audio_engine/asr}/deepgram.py +0 -0
  43. {core → audio_engine/core}/__init__.py +0 -0
  44. {core → audio_engine/core}/config.py +0 -0
  45. {core → audio_engine/core}/pipeline.py +0 -0
  46. {core → audio_engine/core}/types.py +0 -0
  47. {integrations → audio_engine/integrations}/__init__.py +0 -0
  48. {integrations → audio_engine/integrations}/geneface.py +0 -0
  49. {llm → audio_engine/llm}/__init__.py +0 -0
  50. {llm → audio_engine/llm}/base.py +0 -0
  51. {llm → audio_engine/llm}/groq.py +0 -0
  52. {pipelines → audio_engine/pipelines}/__init__.py +0 -0
  53. {streaming → audio_engine/streaming}/__init__.py +0 -0
  54. {streaming → audio_engine/streaming}/websocket_server.py +0 -0
  55. {tts → audio_engine/tts}/__init__.py +0 -0
  56. {tts → audio_engine/tts}/base.py +0 -0
  57. {tts → audio_engine/tts}/cartesia.py +0 -0
  58. {utils → audio_engine/utils}/__init__.py +0 -0
  59. {utils → audio_engine/utils}/audio.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atom-audio-engine
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: A pluggable, async-first Python framework for real-time audio-to-audio conversational AI
5
5
  Author-email: ATOM Group <info@atomgroup.ng>
6
6
  License-Expression: MIT
@@ -63,8 +63,13 @@ A pluggable audio-to-audio conversational engine with real-time streaming suppor
63
63
  ## Installation
64
64
 
65
65
  ```bash
66
- cd /Users/mayowaadebanjo/Projects/audio_engine
67
- pip install -r requirements.txt
66
+ pip install atom-audio-engine
67
+ ```
68
+
69
+ For development with all optional dependencies:
70
+
71
+ ```bash
72
+ pip install atom-audio-engine[all,dev]
68
73
  ```
69
74
 
70
75
  ## Quick Start
@@ -79,8 +84,8 @@ from audio_engine.tts import CartesiaTTS
79
84
 
80
85
  # Create pipeline with your providers
81
86
  pipeline = Pipeline(
82
- asr=WhisperASR(api_key="your-openai-key"),
83
- llm=AnthropicLLM(api_key="your-anthropic-key", model="claude-sonnet-4-20250514"),
87
+ asr=CartesiaASR(api_key="your-cartesia-key"),
88
+ llm=GroqLLM(api_key="your-groq-key", model="mixtral-8x7b-32768"),
84
89
  tts=CartesiaTTS(api_key="your-cartesia-key", voice_id="your-voice-id"),
85
90
  system_prompt="You are a helpful assistant.",
86
91
  )
@@ -0,0 +1,57 @@
1
+ audio_engine/__init__.py,sha256=AQ0uto-Jn3cNqW35MMtSyX5mhXJMFv9AQhjcAkqZ7L4,1499
2
+ audio_engine/asr/__init__.py,sha256=w0t2ahxgApZbZjSc748tN3tmKDeXzasfBh51ZjPF9uc,1203
3
+ audio_engine/asr/base.py,sha256=MFC_7HmyEDnhDwUn62CWZsiF9_-mBVVsUK-Yppiq4Vk,2378
4
+ audio_engine/asr/cartesia.py,sha256=BXnvscO9VaR3LsfEGn7lJ66udzUjz44JzZTmSizZqIg,13321
5
+ audio_engine/asr/deepgram.py,sha256=M59lgrVFMS6-3YQcYaUY7cUdt2-MBptt_VExdfnSXr0,6429
6
+ audio_engine/core/__init__.py,sha256=7naTEkqDjrPsejviXk662OR86xVCyckU7eMKVpjwYys,301
7
+ audio_engine/core/config.py,sha256=EF98O2Gt8q29FX3T6UeDwWNIbm77bni99SThiJKl5Tk,5203
8
+ audio_engine/core/pipeline.py,sha256=jX9jAlIfwU6V8GjqjivyK8Y7P41S-QS8xKYv5c9_qG0,8850
9
+ audio_engine/core/types.py,sha256=iFQPajgeS1YgMWXJvubA8sWbxLI1Z8nF-z1uucrgNm4,2295
10
+ audio_engine/examples/__init__.py,sha256=4oFCZaD-vg0o48hnj03ZsktG2JrtwJ7HXUYOwEYSNCY,44
11
+ audio_engine/examples/basic_stt_llm_tts.py,sha256=tw8IIAL0WSG2M9U5SuLri75AOb7YM-twvAVAspaYVQM,6354
12
+ audio_engine/examples/geneface_animation.py,sha256=ogjQAqPHT5EW6X3R8hn0tJwj-_QBbPiBFDZDl_olTGo,2945
13
+ audio_engine/examples/personaplex_pipeline.py,sha256=OcpN8i5qoAS3Nmuc62tESzpRwPxsjxTGTrY_qICLETo,3641
14
+ audio_engine/examples/websocket_server.py,sha256=HhTlAFnJQXJyOs_prwFJASuh6h-0FKEh2JGeJSChf_c,2398
15
+ audio_engine/integrations/__init__.py,sha256=1y4CTaqybOwmfk_xxkWANYkc-A7PgH0JFMZCTq33fe4,126
16
+ audio_engine/integrations/geneface.py,sha256=2oeVZazp2R9gN-YmQhzzrZb87CBpEiAyKA8hHUxUZJk,8788
17
+ audio_engine/llm/__init__.py,sha256=mwr0C1E1Wf5589fVt7emOFMA2fHoXxQ5t-3dOxkXQEI,997
18
+ audio_engine/llm/base.py,sha256=C-ZNOab0Ca-vlxWgnPzB8uZXFNYbPgAYfQLNvaal2KU,2873
19
+ audio_engine/llm/groq.py,sha256=oGSjJBW0TiCmOzzl1HTE8zUhPC78I3ywhAYFq7Te2IA,6694
20
+ audio_engine/pipelines/__init__.py,sha256=Q1iZjX38TigrZPBaFgv_5AXw21wBN1Z-4nfXPjV-xDI,49
21
+ audio_engine/pipelines/personaplex/__init__.py,sha256=nX37MS93pYUPKiYwY2aa9G-PEI4x2yKjdLqGeab7wWI,916
22
+ audio_engine/pipelines/personaplex/client.py,sha256=NAiG6V9nTWh8ozrb5jT-6h8fesTuJZDgh-l7DlHQm6M,8667
23
+ audio_engine/pipelines/personaplex/config.py,sha256=6fBteI-HjJJl3ZcK5QZCCa9kcKVNDgPptLIkJNZc9kg,2935
24
+ audio_engine/pipelines/personaplex/pipeline.py,sha256=WUkFalPQ9sxICeFpF-58HJxzfQ30vfZ4WAs-E5aI60s,10411
25
+ audio_engine/pipelines/personaplex/types.py,sha256=6MvU2hBukBflJxat3MtC6bGQY1b33jaOIiOi2tZJRnU,4727
26
+ audio_engine/pipelines/personaplex/utils.py,sha256=um_7nGRFH0QaLIIfLwPnBXgFW0fVGU7gkjF8Gm-Hq4U,5000
27
+ audio_engine/scripts/debug_pipeline.py,sha256=HkrrVzimrmFsbltbEPKoAuJ_5yzBWBCWyrEH0_ZHOQM,2276
28
+ audio_engine/scripts/debug_tts.py,sha256=Aj-vW8kmcR7lDa2FdTn1_6wrFw1vpP8Kjnh1rLwQ_ag,4479
29
+ audio_engine/scripts/test_cartesia_connect.py,sha256=KoaBWxmfzdMBqpnDXwT2fFzAJsJlKg3hMsUYvAeU-L8,1529
30
+ audio_engine/streaming/__init__.py,sha256=Pd_ICcYeW75DXMsFpMrJnn9N-RU5s1_Wb3WZ3YbOTC4,136
31
+ audio_engine/streaming/websocket_server.py,sha256=miqHoVkUjznpmpQQrgkyaURR6DsDJLzkP_OGrBFOBYk,10994
32
+ audio_engine/tests/__init__.py,sha256=1JoGYWcW0zfdTZAgxs7NZaK4Zo0zlvq79dXzVwKMP3I,34
33
+ audio_engine/tests/test_personaplex/__init__.py,sha256=1JoGYWcW0zfdTZAgxs7NZaK4Zo0zlvq79dXzVwKMP3I,34
34
+ audio_engine/tests/test_personaplex/test_personaplex.py,sha256=BrYWbWmWqlzdK3H5YZtpLr4DxtK5UeLpbdwUabuUTnE,457
35
+ audio_engine/tests/test_personaplex/test_personaplex_client.py,sha256=RlGNHa-IcKC7CCiTQJDhUYN9HNMun7Q45AsFSu5swZ8,8377
36
+ audio_engine/tests/test_personaplex/test_personaplex_config.py,sha256=c-86tJ81NSfPOk8tIV_JfDn3IcJnFrgCHVqJGyw14lM,2487
37
+ audio_engine/tests/test_personaplex/test_personaplex_message.py,sha256=6gAbQUk954x4-PXkFdNb0GadxuJIJ49tRixPteFCiw4,2636
38
+ audio_engine/tests/test_personaplex/test_personaplex_pipeline.py,sha256=GCvNRgUN72d81RK0klc3z5ecBhBMgf4rJXgq5auXv6M,7424
39
+ audio_engine/tests/test_personaplex/test_personaplex_session.py,sha256=pF2s649MAh0TlRs4ooQBCExN-VSuc_DntknyfLw8Pxw,5780
40
+ audio_engine/tests/test_personaplex/test_personaplex_transcript.py,sha256=XdNAghb1Gjg68BBcj6BPt-1K-6rzS9gD3tufnp8vVPo,6400
41
+ audio_engine/tests/test_traditional_pipeline/__init__.py,sha256=1JoGYWcW0zfdTZAgxs7NZaK4Zo0zlvq79dXzVwKMP3I,34
42
+ audio_engine/tests/test_traditional_pipeline/test_cartesia_asr.py,sha256=rLM_7s-UQJEJGL98A8ewXrgckruog6ei-lFtpPetIkk,15353
43
+ audio_engine/tests/test_traditional_pipeline/test_config_env.py,sha256=pZd0doTKzZg7e_ZwEKLe3pfmZTBdXIlrO1-CUU1lPmc,3192
44
+ audio_engine/tests/test_traditional_pipeline/test_conversation_context.py,sha256=t6lk_5QwGE1CfU1RIAGVIB6d6flfoqVLNgPYs-aE1PA,4049
45
+ audio_engine/tests/test_traditional_pipeline/test_pipeline_creation.py,sha256=U8s4vc36JU79YTFVyv7HQlFN3Hj2KRfh-gWQKhsjiSA,2278
46
+ audio_engine/tests/test_traditional_pipeline/test_pipeline_with_mocks.py,sha256=N5ajn2QevssnP0xEBeR87FumT0w1j7BdVAiMmBLqL2A,5583
47
+ audio_engine/tests/test_traditional_pipeline/test_provider_factories.py,sha256=a9Da5wjhXV6-E_Q7E8AquKxbcTKAhjd2eVKUGBj3zpo,2240
48
+ audio_engine/tests/test_traditional_pipeline/test_websocket_server.py,sha256=InR8GCRiRW09zJk9Htx6YQE_--_KJhpEJCuCs_lJjKE,1936
49
+ audio_engine/tts/__init__.py,sha256=85XrpIkxFrRvOn19mWphkeBjTaEcsrFECYK_ZoGv1dQ,987
50
+ audio_engine/tts/base.py,sha256=vo0MSiep9QJQtpdCmDJWN-okK-ERYRA6Sk_g6IXCYZk,4475
51
+ audio_engine/tts/cartesia.py,sha256=bxhkNbWpQmlPTZ8RWcVCQzG_Q2mYr3t1aAd9OonSSWQ,17011
52
+ audio_engine/utils/__init__.py,sha256=WIeVykg3MqyOoCYEWsuzGyVniP8SIl9FE881ieR7WuE,250
53
+ audio_engine/utils/audio.py,sha256=Z7avyNqhzZ2fnBxZ_d0qUglOCCvHSffBveg5CQWTCM0,5529
54
+ atom_audio_engine-0.1.2.dist-info/METADATA,sha256=l8ztaq4vAmVNT4qg1mHhJW7R2sjTHs1BJsjTPpM108w,6690
55
+ atom_audio_engine-0.1.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
56
+ atom_audio_engine-0.1.2.dist-info/top_level.txt,sha256=IyumwgFrsDL7nlZlBijX-0shiSVhhBCFPUNBRNKzWP4,13
57
+ atom_audio_engine-0.1.2.dist-info/RECORD,,
@@ -0,0 +1 @@
1
+ audio_engine
@@ -0,0 +1,80 @@
1
+ """
2
+ Audio Engine - Pluggable audio-to-audio conversational AI framework.
3
+
4
+ Orchestrates ASR → LLM → TTS pipeline with real-time streaming support.
5
+ """
6
+
7
+ __version__ = "0.1.0"
8
+
9
+ # Core exports
10
+ from .core.pipeline import Pipeline
11
+ from .core.config import (
12
+ AudioEngineConfig,
13
+ ASRConfig,
14
+ LLMConfig,
15
+ TTSConfig,
16
+ StreamingConfig,
17
+ )
18
+ from .core.types import (
19
+ AudioChunk,
20
+ TranscriptChunk,
21
+ ResponseChunk,
22
+ ConversationContext,
23
+ )
24
+
25
+ # ASR Providers
26
+ from .asr.base import BaseASR
27
+ from .asr.cartesia import CartesiaASR
28
+
29
+ try:
30
+ from .asr.deepgram import DeepgramASR
31
+ except ImportError:
32
+ pass
33
+
34
+ # LLM Providers
35
+ from .llm.base import BaseLLM
36
+ from .llm.groq import GroqLLM
37
+
38
+ # TTS Providers
39
+ from .tts.base import BaseTTS
40
+ from .tts.cartesia import CartesiaTTS
41
+
42
+ # Streaming
43
+ from .streaming.websocket_server import WebSocketServer
44
+
45
+ # Integrations
46
+ try:
47
+ from .integrations.geneface import GeneFacePipelineWrapper, GeneFaceConfig
48
+ except ImportError:
49
+ pass
50
+
51
+ __all__ = [
52
+ # Version
53
+ "__version__",
54
+ # Core
55
+ "Pipeline",
56
+ "AudioEngineConfig",
57
+ "ASRConfig",
58
+ "LLMConfig",
59
+ "TTSConfig",
60
+ "StreamingConfig",
61
+ "AudioChunk",
62
+ "TranscriptChunk",
63
+ "ResponseChunk",
64
+ "ConversationContext",
65
+ # ASR
66
+ "BaseASR",
67
+ "CartesiaASR",
68
+ "DeepgramASR",
69
+ # LLM
70
+ "BaseLLM",
71
+ "GroqLLM",
72
+ # TTS
73
+ "BaseTTS",
74
+ "CartesiaTTS",
75
+ # Streaming
76
+ "WebSocketServer",
77
+ # Integrations
78
+ "GeneFacePipelineWrapper",
79
+ "GeneFaceConfig",
80
+ ]
@@ -0,0 +1 @@
1
+ """Example scripts for the audio engine."""
@@ -0,0 +1,200 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Example: Basic STT-LLM-TTS Pipeline
4
+
5
+ Simple example showing how to use the core audio-to-audio pipeline:
6
+ Audio Input → ASR → LLM → TTS → Audio Output
7
+
8
+ Usage:
9
+ # Text input only (skips ASR)
10
+ python examples/basic_stt_llm_tts.py
11
+
12
+ # Audio file input (full STT-LLM-TTS)
13
+ python examples/basic_stt_llm_tts.py examples/audio_clip_1.mp3
14
+
15
+ Setup:
16
+ export CARTESIA_API_KEY="your-cartesia-key"
17
+ export GROQ_API_KEY="your-groq-key"
18
+ """
19
+
20
+ import asyncio
21
+ import logging
22
+ import sys
23
+ import wave
24
+ from pathlib import Path
25
+
26
+ # Add parent to path
27
+ sys.path.insert(0, str(Path(__file__).parent.parent))
28
+
29
+ from dotenv import load_dotenv
30
+ from core.config import AudioEngineConfig
31
+ from core.types import AudioChunk
32
+
33
+ # Load environment variables from .env file
34
+ load_dotenv(Path(__file__).parent.parent.parent / ".env")
35
+
36
+ # Setup logging to see debug messages
37
+ logging.basicConfig(
38
+ level=logging.DEBUG, format="%(name)s - %(levelname)s - %(message)s"
39
+ )
40
+
41
+
42
+ async def stream_audio_file(file_path: str, chunk_size: int = 4096):
43
+ """Stream audio from file (MP3, WAV, etc.) as chunks."""
44
+ try:
45
+ from pydub import AudioSegment
46
+ except ImportError:
47
+ print("❌ pydub required. Install: pip install pydub")
48
+ raise
49
+
50
+ file_path = Path(file_path)
51
+ if not file_path.exists():
52
+ raise FileNotFoundError(f"Audio file not found: {file_path}")
53
+
54
+ print(f"📁 Loading: {file_path.name}")
55
+
56
+ # Load and convert to 16kHz mono 16-bit PCM
57
+ audio = AudioSegment.from_file(str(file_path))
58
+ audio = audio.set_channels(1).set_frame_rate(16000).set_sample_width(2)
59
+
60
+ duration_sec = len(audio) / 1000.0
61
+ print(f"✓ Audio: {duration_sec:.2f}s @ 16kHz mono\n")
62
+
63
+ # Stream as chunks
64
+ audio_bytes = audio.raw_data
65
+ chunk_index = 0
66
+ offset = 0
67
+ total_chunks = (len(audio_bytes) + chunk_size - 1) // chunk_size
68
+
69
+ while offset < len(audio_bytes):
70
+ chunk_data = audio_bytes[offset : offset + chunk_size]
71
+ chunk_index += 1
72
+ is_final = offset + chunk_size >= len(audio_bytes)
73
+
74
+ # Yield as AudioChunk object
75
+ yield AudioChunk(
76
+ data=chunk_data, sample_rate=16000, format="pcm_s16le", is_final=is_final
77
+ )
78
+ offset += chunk_size
79
+
80
+
81
+ async def main():
82
+ """Run basic pipeline example."""
83
+ print("=" * 60)
84
+ print("Audio Engine: Basic STT-LLM-TTS Pipeline")
85
+ print("=" * 60)
86
+ print()
87
+
88
+ # Check if audio file provided as argument
89
+ audio_file = None
90
+ if len(sys.argv) > 1:
91
+ audio_file = sys.argv[1]
92
+
93
+ # Load configuration from environment
94
+ config = AudioEngineConfig.from_env()
95
+ print(f"✓ Config loaded:")
96
+ print(f" - ASR: {config.asr.provider}")
97
+ print(f" - LLM: {config.llm.provider}")
98
+ print(f" - TTS: {config.tts.provider}")
99
+ print()
100
+
101
+ # Create pipeline from config
102
+ pipeline = config.create_pipeline(
103
+ system_prompt="You are a helpful assistant. Keep responses brief."
104
+ )
105
+ print(f"✓ Pipeline created")
106
+ print()
107
+
108
+ if audio_file:
109
+ # Full pipeline: Audio → ASR → LLM → TTS
110
+ print("-" * 60)
111
+ print("FULL PIPELINE: STT → LLM → TTS")
112
+ print("-" * 60 + "\n")
113
+
114
+ transcript = ""
115
+ llm_response = ""
116
+ audio_output = bytearray()
117
+ chunk_count = 0
118
+
119
+ try:
120
+ audio_generator = stream_audio_file(audio_file)
121
+
122
+ async for result in pipeline.stream(audio_generator):
123
+ if hasattr(result, "text"):
124
+ class_name = result.__class__.__name__
125
+ if class_name == "TranscriptChunk":
126
+ transcript += result.text
127
+ print(f"🎤 STT: {result.text!r}")
128
+ elif class_name == "ResponseChunk":
129
+ llm_response += result.text
130
+ print(f"🧠 LLM: {result.text!r}")
131
+
132
+ elif hasattr(result, "data") and result.data:
133
+ audio_output.extend(result.data)
134
+ chunk_count += 1
135
+ if chunk_count % 5 == 0:
136
+ print(f"🔊 TTS: {len(audio_output)} bytes...")
137
+
138
+ if chunk_count > 0:
139
+ print(f"🔊 TTS: {len(audio_output)} bytes total")
140
+
141
+ # Save output
142
+ if audio_output:
143
+ output_dir = Path(__file__).parent / "output_samples"
144
+ output_dir.mkdir(exist_ok=True)
145
+ output_path = output_dir / "output_audio.wav"
146
+ with wave.open(str(output_path), "wb") as wav_file:
147
+ wav_file.setnchannels(1)
148
+ wav_file.setsampwidth(2)
149
+ wav_file.setframerate(16000)
150
+ wav_file.writeframes(bytes(audio_output))
151
+
152
+ duration_sec = len(audio_output) / 32000
153
+ print(f"\n✓ Audio saved to {output_path} ({duration_sec:.2f}s)")
154
+
155
+ # Print results
156
+ print("\n" + "=" * 60)
157
+ print("RESULTS")
158
+ print("=" * 60)
159
+ if transcript:
160
+ print(f"🎤 Transcribed: {transcript!r}")
161
+ if llm_response:
162
+ print(f"🧠 Response: {llm_response!r}")
163
+ print("=" * 60 + "\n")
164
+
165
+ except Exception as e:
166
+ print(f"❌ Error: {e}")
167
+ import traceback
168
+
169
+ traceback.print_exc()
170
+ return
171
+
172
+ else:
173
+ # Simplified pipeline: Text → LLM → TTS (no ASR)
174
+ print("-" * 60)
175
+ print("SIMPLIFIED PIPELINE: Text → LLM → TTS")
176
+ print("-" * 60 + "\n")
177
+
178
+ user_text = "What is the capital of France?"
179
+ print(f"User: {user_text}\n")
180
+
181
+ chunk_count = 0
182
+ total_bytes = 0
183
+ async for audio_chunk in pipeline.stream_text_input(user_text):
184
+ chunk_count += 1
185
+ total_bytes += len(audio_chunk.data)
186
+ print(f" • Audio chunk {chunk_count}: {len(audio_chunk.data)} bytes")
187
+
188
+ print()
189
+ print("✓ Pipeline complete")
190
+ print(f" Total audio: {total_bytes} bytes across {chunk_count} chunks")
191
+ print()
192
+ print("Conversation history:")
193
+ for msg in pipeline.context.messages:
194
+ role = msg.role.upper()
195
+ content = msg.content[:60]
196
+ print(f" {role}: {content}...")
197
+
198
+
199
+ if __name__ == "__main__":
200
+ asyncio.run(main())
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Example: GeneFace++ Face Animation Integration
4
+
5
+ Combines audio with GeneFace++ to generate an animated face
6
+ that speaks the assistant's responses.
7
+
8
+ GeneFaceIntegration takes a GeneFaceConfig (no pipeline parameter).
9
+ It generates videos from audio bytes using:
10
+ - generate_video(audio_bytes, sample_rate) -> video_path
11
+ - generate_video_stream(audio_chunks) -> video_path
12
+
13
+ Setup:
14
+ 1. Clone GeneFace++ repository:
15
+ git clone https://github.com/yerfor/GeneFace-plusplus.git
16
+
17
+ 2. Install GeneFace++ (see their docs)
18
+
19
+ 3. Update geneface_path in this script
20
+
21
+ Run:
22
+ python examples/geneface_animation.py
23
+ """
24
+
25
+ import asyncio
26
+ import sys
27
+ import logging
28
+ from pathlib import Path
29
+
30
+ # Add parent to path
31
+ sys.path.insert(0, str(Path(__file__).parent.parent))
32
+
33
+ from dotenv import load_dotenv
34
+ from integrations.geneface import GeneFaceIntegration, GeneFaceConfig
35
+
36
+ # Load environment variables from .env file
37
+ load_dotenv(Path(__file__).parent.parent / ".env")
38
+
39
+ # Setup logging
40
+ logging.basicConfig(
41
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
42
+ )
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ async def main():
47
+ """Run GeneFace++ animation example."""
48
+ print("=" * 60)
49
+ print("Audio Engine: GeneFace++ Face Animation")
50
+ print("=" * 60)
51
+ print()
52
+
53
+ # Setup GeneFace++ configuration
54
+ geneface_path = ""
55
+ if not geneface_path.exists():
56
+ logger.error(f"GeneFace++ path not found: {geneface_path}")
57
+ logger.info("Clone GeneFace-plusplus:")
58
+ logger.info(" git clone https://github.com/yerfor/GeneFace-plusplus.git")
59
+ logger.info()
60
+ logger.info("Update geneface_path variable in this script")
61
+ return
62
+
63
+ geneface_config = GeneFaceConfig(
64
+ geneface_path=str(geneface_path),
65
+ checkpoint_path=None, # Specify trained model checkpoint if available
66
+ output_resolution=(512, 512),
67
+ fps=25,
68
+ device="cuda", # or "cpu"
69
+ )
70
+
71
+ # Initialize GeneFace integration
72
+ try:
73
+ geneface = GeneFaceIntegration(config=geneface_config)
74
+ logger.info("GeneFace++ integration initialized")
75
+ print()
76
+
77
+ # Example: Generate video from audio
78
+ sample_rate = 16000
79
+ duration_seconds = 2
80
+ # For demo, create silence (all zeros)
81
+ audio_bytes = bytes(sample_rate * duration_seconds * 2) # 16-bit PCM
82
+
83
+ logger.info(f"Generating video from {duration_seconds}s audio...")
84
+ video_path = await geneface.generate_video(
85
+ audio=audio_bytes,
86
+ sample_rate=sample_rate,
87
+ output_path=None, # Uses temp file
88
+ )
89
+
90
+ logger.info(f"✓ Video saved to: {video_path}")
91
+ logger.info("GeneFace++ video generation working!")
92
+
93
+ except Exception as e:
94
+ logger.error(f"Error: {e}")
95
+ logger.info("Make sure GeneFace++ is properly installed and configured")
96
+
97
+
98
+ if __name__ == "__main__":
99
+ asyncio.run(main())
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Example: PersonaPlex Full-Duplex Pipeline
4
+
5
+ PersonaPlex is a full-duplex audio pipeline allowing simultaneous
6
+ input/output - user can interrupt while assistant is speaking.
7
+
8
+ PersonaPlexPipeline API:
9
+ - __init__(config=None, system_prompt="...", save_transcripts=True)
10
+ - await start() -> connects to server
11
+ - await stop() -> closes connection, saves transcript
12
+ - async for (audio_chunk, text_chunk) in stream(audio_stream=None)
13
+ - PersonaPlexConfig uses voice_prompt (not voice_id)
14
+
15
+ Setup:
16
+ Obtain PersonaPlex server URL from RunPod or similar service
17
+ Set environment (optional, PersonaPlexConfig has defaults):
18
+ export PERSONAPLEX_SERVER="wss://your-server/"
19
+ export PERSONAPLEX_VOICE_PROMPT="NATF0.pt"
20
+
21
+ Run:
22
+ python examples/personaplex_pipeline.py
23
+
24
+ Note:
25
+ PersonaPlex is full-duplex (simultaneous I/O) unlike
26
+ sequential STT-LLM-TTS pipeline.
27
+ """
28
+
29
+ import asyncio
30
+ import sys
31
+ import logging
32
+ from pathlib import Path
33
+
34
+ # Add parent to path
35
+ sys.path.insert(0, str(Path(__file__).parent.parent))
36
+
37
+ from dotenv import load_dotenv
38
+ from pipelines.personaplex import PersonaPlexPipeline, PersonaPlexConfig
39
+
40
+ # Load environment variables from .env file
41
+ load_dotenv(Path(__file__).parent.parent / ".env")
42
+
43
+ # Setup logging
44
+ logging.basicConfig(
45
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
46
+ )
47
+ logger = logging.getLogger(__name__)
48
+
49
+
50
+ async def main():
51
+ """Run PersonaPlex full-duplex pipeline example."""
52
+ print("=" * 60)
53
+ print("Audio Engine: PersonaPlex Full-Duplex Pipeline")
54
+ print("=" * 60)
55
+ print()
56
+
57
+ # Configure PersonaPlex
58
+ config = PersonaPlexConfig(
59
+ # server_url="wss://your-personaplex-server", # Uses default if not set
60
+ voice_prompt="NATF0.pt", # Voice ID (not voice_id)
61
+ text_temperature=0.7,
62
+ audio_temperature=0.8,
63
+ save_transcripts=True,
64
+ )
65
+
66
+ logger.info("PersonaPlex Configuration:")
67
+ logger.info(f" Server: {config.server_url[:50]}...")
68
+ logger.info(f" Voice Prompt: {config.voice_prompt}")
69
+ logger.info(f" Text Temp: {config.text_temperature}")
70
+ logger.info(f" Audio Temp: {config.audio_temperature}")
71
+ print()
72
+
73
+ # Create pipeline
74
+ pipeline = PersonaPlexPipeline(
75
+ config=config,
76
+ system_prompt="You are a helpful, friendly AI assistant.",
77
+ save_transcripts=True,
78
+ )
79
+
80
+ try:
81
+ # Start connection
82
+ await pipeline.start()
83
+ logger.info("✓ Connected to PersonaPlex server")
84
+ logger.info("✓ Ready for full-duplex streaming")
85
+ print()
86
+
87
+ # Example: Stream bidirectional audio/text
88
+ logger.info("Streaming audio and text (press Ctrl+C to stop)...")
89
+ chunk_count = 0
90
+
91
+ async for audio_chunk, text_chunk in pipeline.stream():
92
+ if text_chunk:
93
+ logger.info(f"[Assistant] {text_chunk.text}")
94
+ if audio_chunk:
95
+ chunk_count += 1
96
+ logger.info(
97
+ f"[Audio] Chunk {chunk_count}: {len(audio_chunk.data)} bytes"
98
+ )
99
+
100
+ # Stop and save transcript
101
+ transcript_data = await pipeline.stop()
102
+ logger.info("✓ Pipeline stopped")
103
+ if transcript_data:
104
+ logger.info(f"✓ Transcript saved (session: {transcript_data.session_id})")
105
+
106
+ except KeyboardInterrupt:
107
+ logger.info("\n⏹️ Stopped by user")
108
+ await pipeline.stop()
109
+ except Exception as e:
110
+ logger.error(f"Error: {e}")
111
+ logger.info("Make sure PersonaPlex server is running and accessible")
112
+ await pipeline.stop()
113
+
114
+
115
+ if __name__ == "__main__":
116
+ asyncio.run(main())
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Example: WebSocket Server for Real-Time Audio Streaming
4
+
5
+ Runs a WebSocket server that accepts live audio streams and returns
6
+ responses in real-time. Clients can send binary audio and receive text
7
+ transcripts + response audio.
8
+
9
+ Setup:
10
+ export DEEPGRAM_API_KEY="your-deepgram-key"
11
+ export GROQ_API_KEY="your-groq-key"
12
+ export CARTESIA_API_KEY="your-cartesia-key"
13
+
14
+ Run:
15
+ python examples/websocket_server.py
16
+
17
+ Then connect a WebSocket client to ws://localhost:8765
18
+
19
+ Example client (JavaScript):
20
+ const ws = new WebSocket("ws://localhost:8765");
21
+ ws.binaryType = "arraybuffer";
22
+ ws.onopen = () => {
23
+ // Send audio data
24
+ ws.send(audioBuffer);
25
+ // Signal end of speech
26
+ ws.send(JSON.stringify({"type": "end_of_speech"}));
27
+ };
28
+ ws.onmessage = (event) => {
29
+ if (typeof event.data === "string") {
30
+ console.log("Event:", JSON.parse(event.data));
31
+ } else {
32
+ console.log("Audio data:", event.data);
33
+ }
34
+ };
35
+ """
36
+
37
+ import asyncio
38
+ import sys
39
+ import logging
40
+ from pathlib import Path
41
+
42
+ # Add parent to path
43
+ sys.path.insert(0, str(Path(__file__).parent.parent))
44
+
45
+ from dotenv import load_dotenv
46
+ from core.config import AudioEngineConfig
47
+ from streaming.websocket_server import run_server_from_config
48
+
49
+ # Load environment variables from .env file
50
+ load_dotenv(Path(__file__).parent.parent / ".env")
51
+
52
+ # Setup logging
53
+ logging.basicConfig(
54
+ level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
55
+ )
56
+ logger = logging.getLogger(__name__)
57
+
58
+
59
+ async def main():
60
+ """Run WebSocket server."""
61
+ print("=" * 60)
62
+ print("Audio Engine: WebSocket Server")
63
+ print("=" * 60)
64
+ print()
65
+
66
+ # Load configuration from environment
67
+ config = AudioEngineConfig.from_env()
68
+ logger.info(f"Config loaded")
69
+ logger.info(f" ASR: {config.asr.provider}")
70
+ logger.info(f" LLM: {config.llm.provider}")
71
+ logger.info(f" TTS: {config.tts.provider}")
72
+ logger.info(f" Host: {config.streaming.host}")
73
+ logger.info(f" Port: {config.streaming.port}")
74
+ print()
75
+
76
+ try:
77
+ await run_server_from_config(
78
+ config,
79
+ system_prompt="You are a helpful audio assistant. Keep responses brief and natural.",
80
+ )
81
+ except KeyboardInterrupt:
82
+ logger.info("Server shutting down...")
83
+
84
+
85
+ if __name__ == "__main__":
86
+ asyncio.run(main())