dominus-sdk-python 2.4.0__tar.gz → 2.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/PKG-INFO +12 -1
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/__init__.py +14 -1
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/helpers/crypto.py +5 -0
- dominus_sdk_python-2.5.1/dominus/namespaces/oracle/__init__.py +120 -0
- dominus_sdk_python-2.5.1/dominus/namespaces/oracle/audio_capture.py +382 -0
- dominus_sdk_python-2.5.1/dominus/namespaces/oracle/oracle_websocket.py +230 -0
- dominus_sdk_python-2.5.1/dominus/namespaces/oracle/session.py +252 -0
- dominus_sdk_python-2.5.1/dominus/namespaces/oracle/types.py +127 -0
- dominus_sdk_python-2.5.1/dominus/namespaces/oracle/vad_gate.py +276 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/start.py +7 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus_sdk_python.egg-info/PKG-INFO +12 -1
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus_sdk_python.egg-info/SOURCES.txt +6 -4
- dominus_sdk_python-2.5.1/dominus_sdk_python.egg-info/requires.txt +23 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/pyproject.toml +14 -1
- dominus_sdk_python-2.4.0/dominus/namespaces/_deprecated_crossover.py +0 -10
- dominus_sdk_python-2.4.0/dominus/namespaces/_deprecated_sql.py +0 -1341
- dominus_sdk_python-2.4.0/dominus/services/_deprecated_architect.py +0 -323
- dominus_sdk_python-2.4.0/dominus/services/_deprecated_sovereign.py +0 -93
- dominus_sdk_python-2.4.0/dominus_sdk_python.egg-info/requires.txt +0 -10
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/README.md +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/config/__init__.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/config/endpoints.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/errors.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/helpers/__init__.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/helpers/auth.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/helpers/cache.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/helpers/core.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/__init__.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/admin.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/auth.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/courier.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/db.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/ddl.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/fastapi.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/files.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/health.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/logs.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/open.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/portal.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/redis.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/secrets.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/namespaces/secure.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/services/__init__.py +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus_sdk_python.egg-info/dependency_links.txt +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus_sdk_python.egg-info/top_level.txt +0 -0
- {dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dominus-sdk-python
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.5.1
|
|
4
4
|
Summary: Python SDK for the Dominus Orchestrator Platform
|
|
5
5
|
Author-email: CareBridge Systems <dev@carebridge.io>
|
|
6
6
|
License: Proprietary
|
|
@@ -25,9 +25,20 @@ Requires-Dist: bcrypt>=4.0.0
|
|
|
25
25
|
Requires-Dist: cryptography>=41.0.0
|
|
26
26
|
Provides-Extra: jwt
|
|
27
27
|
Requires-Dist: PyJWT>=2.8.0; extra == "jwt"
|
|
28
|
+
Provides-Extra: oracle
|
|
29
|
+
Requires-Dist: websockets>=12.0; extra == "oracle"
|
|
30
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "oracle"
|
|
31
|
+
Requires-Dist: numpy>=1.24.0; extra == "oracle"
|
|
32
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "oracle"
|
|
28
33
|
Provides-Extra: dev
|
|
29
34
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
30
35
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
36
|
+
Provides-Extra: all
|
|
37
|
+
Requires-Dist: PyJWT>=2.8.0; extra == "all"
|
|
38
|
+
Requires-Dist: websockets>=12.0; extra == "all"
|
|
39
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "all"
|
|
40
|
+
Requires-Dist: numpy>=1.24.0; extra == "all"
|
|
41
|
+
Requires-Dist: webrtcvad>=2.0.10; extra == "all"
|
|
31
42
|
|
|
32
43
|
# CB Dominus SDK for Python
|
|
33
44
|
|
|
@@ -100,6 +100,14 @@ from .namespaces.courier import CourierNamespace
|
|
|
100
100
|
from .namespaces.health import HealthNamespace
|
|
101
101
|
from .namespaces.open import OpenNamespace
|
|
102
102
|
|
|
103
|
+
# Export Oracle namespace for speech-to-text
|
|
104
|
+
from .namespaces.oracle import (
|
|
105
|
+
OracleNamespace,
|
|
106
|
+
OracleSession,
|
|
107
|
+
OracleSessionOptions,
|
|
108
|
+
VADState,
|
|
109
|
+
)
|
|
110
|
+
|
|
103
111
|
# Export cache and resilience utilities
|
|
104
112
|
from .helpers.cache import (
|
|
105
113
|
dominus_cache,
|
|
@@ -123,7 +131,7 @@ from .errors import (
|
|
|
123
131
|
TimeoutError as DominusTimeoutError,
|
|
124
132
|
)
|
|
125
133
|
|
|
126
|
-
__version__ = "2.
|
|
134
|
+
__version__ = "2.5.0"
|
|
127
135
|
__all__ = [
|
|
128
136
|
# Main SDK instance
|
|
129
137
|
"dominus",
|
|
@@ -152,6 +160,11 @@ __all__ = [
|
|
|
152
160
|
"CourierNamespace",
|
|
153
161
|
"HealthNamespace",
|
|
154
162
|
"OpenNamespace",
|
|
163
|
+
# Oracle namespace for speech-to-text
|
|
164
|
+
"OracleNamespace",
|
|
165
|
+
"OracleSession",
|
|
166
|
+
"OracleSessionOptions",
|
|
167
|
+
"VADState",
|
|
155
168
|
# Cache and resilience utilities
|
|
156
169
|
"dominus_cache",
|
|
157
170
|
"CircuitBreaker",
|
|
@@ -116,3 +116,8 @@ def generate_token(length: int = 64) -> str:
|
|
|
116
116
|
Random URL-safe token string
|
|
117
117
|
"""
|
|
118
118
|
return secrets.token_urlsafe(length)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# Aliases for cleaner imports from dominus.__init__
|
|
122
|
+
verify_password = verify_password_local
|
|
123
|
+
verify_psk = verify_psk_local
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Oracle Namespace - Real-time streaming speech-to-text.
|
|
3
|
+
|
|
4
|
+
Provides WebSocket-based streaming transcription via Deepgram,
|
|
5
|
+
with built-in VAD (Voice Activity Detection) for cost optimization.
|
|
6
|
+
|
|
7
|
+
Key features:
|
|
8
|
+
- Automatic microphone capture and 16kHz resampling
|
|
9
|
+
- VAD gating: only sends audio when speech is detected
|
|
10
|
+
- 4-state VAD machine: IDLE -> ARMED -> SPEAKING -> TRAILING
|
|
11
|
+
- Pre-roll buffer captures word onsets
|
|
12
|
+
- Ping/pong keepalive during IDLE
|
|
13
|
+
- NO send_audio() exposed - VAD handles everything
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
session = dominus.oracle.create_session(user_jwt)
|
|
17
|
+
|
|
18
|
+
session.on_ready = lambda: set_listening(True)
|
|
19
|
+
session.on_interim = lambda text: set_live_transcript(text)
|
|
20
|
+
session.on_utterance = lambda text: send_to_curator(text)
|
|
21
|
+
session.on_vad_state_change = lambda state: set_mic_state(state)
|
|
22
|
+
session.on_error = lambda error: show_error(error)
|
|
23
|
+
|
|
24
|
+
await session.start()
|
|
25
|
+
# ... user speaks, transcripts flow back ...
|
|
26
|
+
await session.stop()
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from typing import Optional, TYPE_CHECKING
|
|
30
|
+
|
|
31
|
+
from .types import (
|
|
32
|
+
VADState,
|
|
33
|
+
OracleSessionOptions,
|
|
34
|
+
ResolvedOracleSessionOptions,
|
|
35
|
+
DEFAULT_OPTIONS,
|
|
36
|
+
AUDIO_CONFIG,
|
|
37
|
+
)
|
|
38
|
+
from .session import OracleSession
|
|
39
|
+
|
|
40
|
+
if TYPE_CHECKING:
|
|
41
|
+
from ...start import Dominus
|
|
42
|
+
|
|
43
|
+
# Re-export public types
|
|
44
|
+
__all__ = [
|
|
45
|
+
"OracleNamespace",
|
|
46
|
+
"OracleSession",
|
|
47
|
+
"OracleSessionOptions",
|
|
48
|
+
"VADState",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class OracleNamespace:
|
|
53
|
+
"""
|
|
54
|
+
OracleNamespace - Factory for creating streaming transcription sessions.
|
|
55
|
+
|
|
56
|
+
The Oracle namespace provides a simple API for real-time speech-to-text:
|
|
57
|
+
- create_session() creates a new transcription session
|
|
58
|
+
- Sessions handle mic capture, VAD, WebSocket, and transcripts internally
|
|
59
|
+
- NO raw audio access - VAD is mandatory for cost control
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self, client: "Dominus"):
|
|
63
|
+
from ...config.endpoints import BASE_URL
|
|
64
|
+
self._base_url = BASE_URL
|
|
65
|
+
self._client = client
|
|
66
|
+
|
|
67
|
+
def create_session(
|
|
68
|
+
self,
|
|
69
|
+
user_token: str,
|
|
70
|
+
options: Optional[OracleSessionOptions] = None
|
|
71
|
+
) -> OracleSession:
|
|
72
|
+
"""
|
|
73
|
+
Create a streaming transcription session.
|
|
74
|
+
|
|
75
|
+
The session handles everything internally:
|
|
76
|
+
- Microphone access and audio capture
|
|
77
|
+
- Resampling to 16kHz mono PCM
|
|
78
|
+
- VAD gating (only sends speech, not silence)
|
|
79
|
+
- WebSocket connection to Oracle
|
|
80
|
+
- Reconnection on connection loss
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
user_token: User JWT from portal.login()
|
|
84
|
+
options: Optional configuration overrides
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
OracleSession ready to start()
|
|
88
|
+
|
|
89
|
+
Example:
|
|
90
|
+
session = dominus.oracle.create_session(user_jwt, OracleSessionOptions(
|
|
91
|
+
preroll_ms=320, # Capture 320ms before speech
|
|
92
|
+
postroll_ms=400, # Continue 400ms after speech
|
|
93
|
+
armed_confirm_ms=80, # Require 80ms to confirm speech
|
|
94
|
+
))
|
|
95
|
+
|
|
96
|
+
session.on_utterance = lambda text: send_to_curator(text)
|
|
97
|
+
|
|
98
|
+
await session.start()
|
|
99
|
+
"""
|
|
100
|
+
# Merge options with defaults
|
|
101
|
+
if options:
|
|
102
|
+
resolved_options = ResolvedOracleSessionOptions(
|
|
103
|
+
preroll_ms=options.preroll_ms if options.preroll_ms != DEFAULT_OPTIONS.preroll_ms else DEFAULT_OPTIONS.preroll_ms,
|
|
104
|
+
postroll_ms=options.postroll_ms if options.postroll_ms != DEFAULT_OPTIONS.postroll_ms else DEFAULT_OPTIONS.postroll_ms,
|
|
105
|
+
armed_confirm_ms=options.armed_confirm_ms if options.armed_confirm_ms != DEFAULT_OPTIONS.armed_confirm_ms else DEFAULT_OPTIONS.armed_confirm_ms,
|
|
106
|
+
vad_threshold=options.vad_threshold if options.vad_threshold != DEFAULT_OPTIONS.vad_threshold else DEFAULT_OPTIONS.vad_threshold,
|
|
107
|
+
energy_threshold=options.energy_threshold if options.energy_threshold != DEFAULT_OPTIONS.energy_threshold else DEFAULT_OPTIONS.energy_threshold,
|
|
108
|
+
ping_interval_ms=options.ping_interval_ms if options.ping_interval_ms != DEFAULT_OPTIONS.ping_interval_ms else DEFAULT_OPTIONS.ping_interval_ms,
|
|
109
|
+
)
|
|
110
|
+
else:
|
|
111
|
+
resolved_options = ResolvedOracleSessionOptions(
|
|
112
|
+
preroll_ms=DEFAULT_OPTIONS.preroll_ms,
|
|
113
|
+
postroll_ms=DEFAULT_OPTIONS.postroll_ms,
|
|
114
|
+
armed_confirm_ms=DEFAULT_OPTIONS.armed_confirm_ms,
|
|
115
|
+
vad_threshold=DEFAULT_OPTIONS.vad_threshold,
|
|
116
|
+
energy_threshold=DEFAULT_OPTIONS.energy_threshold,
|
|
117
|
+
ping_interval_ms=DEFAULT_OPTIONS.ping_interval_ms,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return OracleSession(self._base_url, user_token, resolved_options)
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AudioCapture - Microphone capture and resampling (INTERNAL)
|
|
3
|
+
|
|
4
|
+
Handles:
|
|
5
|
+
- Microphone access via sounddevice or pyaudio
|
|
6
|
+
- Resampling to 16kHz mono if needed
|
|
7
|
+
- Output 20ms frames (640 bytes PCM16)
|
|
8
|
+
|
|
9
|
+
This module is INTERNAL and should NOT be exported publicly.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import struct
|
|
14
|
+
import math
|
|
15
|
+
from typing import Callable, Optional, List
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
|
|
18
|
+
from .types import AUDIO_CONFIG
|
|
19
|
+
|
|
20
|
+
# Try to import audio libraries
|
|
21
|
+
try:
|
|
22
|
+
import sounddevice as sd
|
|
23
|
+
import numpy as np
|
|
24
|
+
SOUNDDEVICE_AVAILABLE = True
|
|
25
|
+
except ImportError:
|
|
26
|
+
SOUNDDEVICE_AVAILABLE = False
|
|
27
|
+
sd = None
|
|
28
|
+
np = None
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
import pyaudio
|
|
32
|
+
PYAUDIO_AVAILABLE = True
|
|
33
|
+
except ImportError:
|
|
34
|
+
PYAUDIO_AVAILABLE = False
|
|
35
|
+
pyaudio = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def resample(input_data: List[float], from_rate: int, to_rate: int) -> List[float]:
|
|
39
|
+
"""
|
|
40
|
+
Resample audio data using linear interpolation.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
input_data: Input samples as floats
|
|
44
|
+
from_rate: Source sample rate
|
|
45
|
+
to_rate: Target sample rate
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Resampled audio data
|
|
49
|
+
"""
|
|
50
|
+
if from_rate == to_rate:
|
|
51
|
+
return input_data
|
|
52
|
+
|
|
53
|
+
ratio = from_rate / to_rate
|
|
54
|
+
output_length = math.ceil(len(input_data) / ratio)
|
|
55
|
+
output = []
|
|
56
|
+
|
|
57
|
+
for i in range(output_length):
|
|
58
|
+
src_index = i * ratio
|
|
59
|
+
src_floor = int(src_index)
|
|
60
|
+
src_ceil = min(src_floor + 1, len(input_data) - 1)
|
|
61
|
+
t = src_index - src_floor
|
|
62
|
+
|
|
63
|
+
# Linear interpolation
|
|
64
|
+
value = input_data[src_floor] * (1 - t) + input_data[src_ceil] * t
|
|
65
|
+
output.append(value)
|
|
66
|
+
|
|
67
|
+
return output
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def float_to_pcm16(samples: List[float]) -> bytes:
|
|
71
|
+
"""
|
|
72
|
+
Convert float samples [-1, 1] to PCM16 bytes.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
samples: Float samples in range [-1, 1]
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
PCM16 bytes (little-endian)
|
|
79
|
+
"""
|
|
80
|
+
pcm_values = []
|
|
81
|
+
for s in samples:
|
|
82
|
+
# Clamp to [-1, 1]
|
|
83
|
+
s = max(-1.0, min(1.0, s))
|
|
84
|
+
# Convert to int16
|
|
85
|
+
if s < 0:
|
|
86
|
+
pcm_values.append(int(s * 0x8000))
|
|
87
|
+
else:
|
|
88
|
+
pcm_values.append(int(s * 0x7FFF))
|
|
89
|
+
|
|
90
|
+
return struct.pack(f"<{len(pcm_values)}h", *pcm_values)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class SoundDeviceCapture:
|
|
94
|
+
"""
|
|
95
|
+
Audio capture using sounddevice library.
|
|
96
|
+
Preferred for cross-platform compatibility.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __init__(self):
|
|
100
|
+
if not SOUNDDEVICE_AVAILABLE:
|
|
101
|
+
raise ImportError(
|
|
102
|
+
"sounddevice package is required for audio capture. "
|
|
103
|
+
"Install with: pip install sounddevice numpy"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
self._stream: Optional[sd.InputStream] = None
|
|
107
|
+
self._buffer: List[float] = []
|
|
108
|
+
self._is_capturing = False
|
|
109
|
+
self._callback_queue: asyncio.Queue = None
|
|
110
|
+
self._process_task: Optional[asyncio.Task] = None
|
|
111
|
+
|
|
112
|
+
self.on_frame: Optional[Callable[[bytes], None]] = None
|
|
113
|
+
|
|
114
|
+
async def start(self) -> None:
|
|
115
|
+
"""Start audio capture from microphone."""
|
|
116
|
+
if self._is_capturing:
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
self._callback_queue = asyncio.Queue()
|
|
120
|
+
|
|
121
|
+
def audio_callback(indata, frames, time_info, status):
|
|
122
|
+
if status:
|
|
123
|
+
print(f"[OracleSDK] Audio status: {status}")
|
|
124
|
+
|
|
125
|
+
# Convert numpy array to list of floats
|
|
126
|
+
mono_data = indata[:, 0].tolist() if indata.ndim > 1 else indata.flatten().tolist()
|
|
127
|
+
|
|
128
|
+
# Put data in queue for async processing
|
|
129
|
+
try:
|
|
130
|
+
self._callback_queue.put_nowait(mono_data)
|
|
131
|
+
except asyncio.QueueFull:
|
|
132
|
+
pass # Drop frames if queue is full
|
|
133
|
+
|
|
134
|
+
# Open stream
|
|
135
|
+
self._stream = sd.InputStream(
|
|
136
|
+
samplerate=AUDIO_CONFIG.SAMPLE_RATE,
|
|
137
|
+
channels=AUDIO_CONFIG.CHANNELS,
|
|
138
|
+
dtype='float32',
|
|
139
|
+
blocksize=1024, # Good balance of latency vs efficiency
|
|
140
|
+
callback=audio_callback,
|
|
141
|
+
)
|
|
142
|
+
self._stream.start()
|
|
143
|
+
self._is_capturing = True
|
|
144
|
+
|
|
145
|
+
# Start processing task
|
|
146
|
+
self._process_task = asyncio.create_task(self._process_audio())
|
|
147
|
+
|
|
148
|
+
async def stop(self) -> None:
|
|
149
|
+
"""Stop audio capture."""
|
|
150
|
+
if not self._is_capturing:
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
self._is_capturing = False
|
|
154
|
+
|
|
155
|
+
if self._process_task:
|
|
156
|
+
self._process_task.cancel()
|
|
157
|
+
try:
|
|
158
|
+
await self._process_task
|
|
159
|
+
except asyncio.CancelledError:
|
|
160
|
+
pass
|
|
161
|
+
self._process_task = None
|
|
162
|
+
|
|
163
|
+
if self._stream:
|
|
164
|
+
self._stream.stop()
|
|
165
|
+
self._stream.close()
|
|
166
|
+
self._stream = None
|
|
167
|
+
|
|
168
|
+
self._buffer = []
|
|
169
|
+
|
|
170
|
+
async def _process_audio(self) -> None:
|
|
171
|
+
"""Process audio from queue and emit frames."""
|
|
172
|
+
while self._is_capturing:
|
|
173
|
+
try:
|
|
174
|
+
# Get audio data from queue
|
|
175
|
+
audio_data = await asyncio.wait_for(
|
|
176
|
+
self._callback_queue.get(),
|
|
177
|
+
timeout=0.1
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Append to buffer
|
|
181
|
+
self._buffer.extend(audio_data)
|
|
182
|
+
|
|
183
|
+
# Extract complete frames
|
|
184
|
+
while len(self._buffer) >= AUDIO_CONFIG.SAMPLES_PER_FRAME:
|
|
185
|
+
frame_samples = self._buffer[:AUDIO_CONFIG.SAMPLES_PER_FRAME]
|
|
186
|
+
self._buffer = self._buffer[AUDIO_CONFIG.SAMPLES_PER_FRAME:]
|
|
187
|
+
|
|
188
|
+
# Convert to PCM16
|
|
189
|
+
pcm_frame = float_to_pcm16(frame_samples)
|
|
190
|
+
|
|
191
|
+
if self.on_frame:
|
|
192
|
+
self.on_frame(pcm_frame)
|
|
193
|
+
|
|
194
|
+
except asyncio.TimeoutError:
|
|
195
|
+
continue
|
|
196
|
+
except asyncio.CancelledError:
|
|
197
|
+
break
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class PyAudioCapture:
|
|
201
|
+
"""
|
|
202
|
+
Audio capture using PyAudio library.
|
|
203
|
+
Fallback for environments where sounddevice doesn't work.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
def __init__(self):
|
|
207
|
+
if not PYAUDIO_AVAILABLE:
|
|
208
|
+
raise ImportError(
|
|
209
|
+
"pyaudio package is required for audio capture. "
|
|
210
|
+
"Install with: pip install pyaudio"
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
self._pa: Optional[pyaudio.PyAudio] = None
|
|
214
|
+
self._stream = None
|
|
215
|
+
self._buffer: List[float] = []
|
|
216
|
+
self._is_capturing = False
|
|
217
|
+
self._callback_queue: asyncio.Queue = None
|
|
218
|
+
self._process_task: Optional[asyncio.Task] = None
|
|
219
|
+
|
|
220
|
+
self.on_frame: Optional[Callable[[bytes], None]] = None
|
|
221
|
+
|
|
222
|
+
async def start(self) -> None:
|
|
223
|
+
"""Start audio capture from microphone."""
|
|
224
|
+
if self._is_capturing:
|
|
225
|
+
return
|
|
226
|
+
|
|
227
|
+
self._pa = pyaudio.PyAudio()
|
|
228
|
+
self._callback_queue = asyncio.Queue()
|
|
229
|
+
|
|
230
|
+
def audio_callback(in_data, frame_count, time_info, status):
|
|
231
|
+
# Convert bytes to float samples
|
|
232
|
+
num_samples = len(in_data) // 2
|
|
233
|
+
samples = struct.unpack(f"<{num_samples}h", in_data)
|
|
234
|
+
# Normalize to [-1, 1]
|
|
235
|
+
float_samples = [s / 32768.0 for s in samples]
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
self._callback_queue.put_nowait(float_samples)
|
|
239
|
+
except asyncio.QueueFull:
|
|
240
|
+
pass
|
|
241
|
+
|
|
242
|
+
return (None, pyaudio.paContinue)
|
|
243
|
+
|
|
244
|
+
# Open stream
|
|
245
|
+
self._stream = self._pa.open(
|
|
246
|
+
format=pyaudio.paInt16,
|
|
247
|
+
channels=AUDIO_CONFIG.CHANNELS,
|
|
248
|
+
rate=AUDIO_CONFIG.SAMPLE_RATE,
|
|
249
|
+
input=True,
|
|
250
|
+
frames_per_buffer=1024,
|
|
251
|
+
stream_callback=audio_callback,
|
|
252
|
+
)
|
|
253
|
+
self._stream.start_stream()
|
|
254
|
+
self._is_capturing = True
|
|
255
|
+
|
|
256
|
+
# Start processing task
|
|
257
|
+
self._process_task = asyncio.create_task(self._process_audio())
|
|
258
|
+
|
|
259
|
+
async def stop(self) -> None:
|
|
260
|
+
"""Stop audio capture."""
|
|
261
|
+
if not self._is_capturing:
|
|
262
|
+
return
|
|
263
|
+
|
|
264
|
+
self._is_capturing = False
|
|
265
|
+
|
|
266
|
+
if self._process_task:
|
|
267
|
+
self._process_task.cancel()
|
|
268
|
+
try:
|
|
269
|
+
await self._process_task
|
|
270
|
+
except asyncio.CancelledError:
|
|
271
|
+
pass
|
|
272
|
+
self._process_task = None
|
|
273
|
+
|
|
274
|
+
if self._stream:
|
|
275
|
+
self._stream.stop_stream()
|
|
276
|
+
self._stream.close()
|
|
277
|
+
self._stream = None
|
|
278
|
+
|
|
279
|
+
if self._pa:
|
|
280
|
+
self._pa.terminate()
|
|
281
|
+
self._pa = None
|
|
282
|
+
|
|
283
|
+
self._buffer = []
|
|
284
|
+
|
|
285
|
+
async def _process_audio(self) -> None:
|
|
286
|
+
"""Process audio from queue and emit frames."""
|
|
287
|
+
while self._is_capturing:
|
|
288
|
+
try:
|
|
289
|
+
audio_data = await asyncio.wait_for(
|
|
290
|
+
self._callback_queue.get(),
|
|
291
|
+
timeout=0.1
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
self._buffer.extend(audio_data)
|
|
295
|
+
|
|
296
|
+
while len(self._buffer) >= AUDIO_CONFIG.SAMPLES_PER_FRAME:
|
|
297
|
+
frame_samples = self._buffer[:AUDIO_CONFIG.SAMPLES_PER_FRAME]
|
|
298
|
+
self._buffer = self._buffer[AUDIO_CONFIG.SAMPLES_PER_FRAME:]
|
|
299
|
+
|
|
300
|
+
pcm_frame = float_to_pcm16(frame_samples)
|
|
301
|
+
|
|
302
|
+
if self.on_frame:
|
|
303
|
+
self.on_frame(pcm_frame)
|
|
304
|
+
|
|
305
|
+
except asyncio.TimeoutError:
|
|
306
|
+
continue
|
|
307
|
+
except asyncio.CancelledError:
|
|
308
|
+
break
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class AudioCapture:
|
|
312
|
+
"""
|
|
313
|
+
AudioCapture - Main class for microphone capture.
|
|
314
|
+
|
|
315
|
+
Automatically uses sounddevice if available, falls back to PyAudio.
|
|
316
|
+
Outputs 20ms frames (640 bytes PCM16) at 16kHz mono.
|
|
317
|
+
"""
|
|
318
|
+
|
|
319
|
+
def __init__(self):
|
|
320
|
+
self._capture = None
|
|
321
|
+
self._is_capturing = False
|
|
322
|
+
|
|
323
|
+
# Callback for each audio frame
|
|
324
|
+
self.on_frame: Optional[Callable[[bytes], None]] = None
|
|
325
|
+
|
|
326
|
+
@property
|
|
327
|
+
def is_capturing(self) -> bool:
|
|
328
|
+
"""Check if currently capturing audio."""
|
|
329
|
+
return self._is_capturing
|
|
330
|
+
|
|
331
|
+
async def start(self) -> None:
|
|
332
|
+
"""
|
|
333
|
+
Start audio capture from microphone.
|
|
334
|
+
|
|
335
|
+
Raises:
|
|
336
|
+
ImportError: If no audio library is available
|
|
337
|
+
Exception: If microphone access fails
|
|
338
|
+
"""
|
|
339
|
+
if self._is_capturing:
|
|
340
|
+
return
|
|
341
|
+
|
|
342
|
+
# Choose capture method based on available libraries
|
|
343
|
+
if SOUNDDEVICE_AVAILABLE:
|
|
344
|
+
try:
|
|
345
|
+
self._capture = SoundDeviceCapture()
|
|
346
|
+
await self._capture.start()
|
|
347
|
+
except Exception as e:
|
|
348
|
+
print(f"[OracleSDK] sounddevice failed, trying PyAudio: {e}")
|
|
349
|
+
if PYAUDIO_AVAILABLE:
|
|
350
|
+
self._capture = PyAudioCapture()
|
|
351
|
+
await self._capture.start()
|
|
352
|
+
else:
|
|
353
|
+
raise
|
|
354
|
+
elif PYAUDIO_AVAILABLE:
|
|
355
|
+
self._capture = PyAudioCapture()
|
|
356
|
+
await self._capture.start()
|
|
357
|
+
else:
|
|
358
|
+
raise ImportError(
|
|
359
|
+
"Audio capture requires either sounddevice or pyaudio. "
|
|
360
|
+
"Install with: pip install sounddevice numpy OR pip install pyaudio"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Wire up frame callback
|
|
364
|
+
self._capture.on_frame = lambda pcm_frame: (
|
|
365
|
+
self.on_frame(pcm_frame) if self.on_frame else None
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
self._is_capturing = True
|
|
369
|
+
|
|
370
|
+
async def stop(self) -> None:
|
|
371
|
+
"""Stop audio capture."""
|
|
372
|
+
if not self._is_capturing or not self._capture:
|
|
373
|
+
return
|
|
374
|
+
|
|
375
|
+
await self._capture.stop()
|
|
376
|
+
self._capture = None
|
|
377
|
+
self._is_capturing = False
|
|
378
|
+
|
|
379
|
+
def dispose(self) -> None:
|
|
380
|
+
"""Clean up resources."""
|
|
381
|
+
asyncio.create_task(self.stop())
|
|
382
|
+
self.on_frame = None
|