PyPI - dominus-sdk-python - Versions diffs - 2.4.0__tar.gz → 2.5.1__tar.gz - Mend

dominus-sdk-python 2.4.0tar.gz → 2.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dominus-sdk-python
-Version: 2.4.0
+Version: 2.5.1
 Summary: Python SDK for the Dominus Orchestrator Platform
 Author-email: CareBridge Systems <dev@carebridge.io>
 License: Proprietary
@@ -25,9 +25,20 @@ Requires-Dist: bcrypt>=4.0.0
 Requires-Dist: cryptography>=41.0.0
 Provides-Extra: jwt
 Requires-Dist: PyJWT>=2.8.0; extra == "jwt"
+Provides-Extra: oracle
+Requires-Dist: websockets>=12.0; extra == "oracle"
+Requires-Dist: sounddevice>=0.4.6; extra == "oracle"
+Requires-Dist: numpy>=1.24.0; extra == "oracle"
+Requires-Dist: webrtcvad>=2.0.10; extra == "oracle"
 Provides-Extra: dev
 Requires-Dist: pytest>=7.0.0; extra == "dev"
 Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
+Provides-Extra: all
+Requires-Dist: PyJWT>=2.8.0; extra == "all"
+Requires-Dist: websockets>=12.0; extra == "all"
+Requires-Dist: sounddevice>=0.4.6; extra == "all"
+Requires-Dist: numpy>=1.24.0; extra == "all"
+Requires-Dist: webrtcvad>=2.0.10; extra == "all"
 # CB Dominus SDK for Python

{dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/__init__.py RENAMED Viewed

@@ -100,6 +100,14 @@ from .namespaces.courier import CourierNamespace
 from .namespaces.health import HealthNamespace
 from .namespaces.open import OpenNamespace
+# Export Oracle namespace for speech-to-text
+from .namespaces.oracle import (
+    OracleNamespace,
+    OracleSession,
+    OracleSessionOptions,
+    VADState,
+)
 # Export cache and resilience utilities
 from .helpers.cache import (
     dominus_cache,
@@ -123,7 +131,7 @@ from .errors import (
     TimeoutError as DominusTimeoutError,
 )
-__version__ = "2.4.0"
+__version__ = "2.5.0"
 __all__ = [
     # Main SDK instance
     "dominus",
@@ -152,6 +160,11 @@ __all__ = [
     "CourierNamespace",
     "HealthNamespace",
     "OpenNamespace",
+    # Oracle namespace for speech-to-text
+    "OracleNamespace",
+    "OracleSession",
+    "OracleSessionOptions",
+    "VADState",
     # Cache and resilience utilities
     "dominus_cache",
     "CircuitBreaker",

{dominus_sdk_python-2.4.0 → dominus_sdk_python-2.5.1}/dominus/helpers/crypto.py RENAMED Viewed

@@ -116,3 +116,8 @@ def generate_token(length: int = 64) -> str:
         Random URL-safe token string
     """
     return secrets.token_urlsafe(length)
+# Aliases for cleaner imports from dominus.__init__
+verify_password = verify_password_local
+verify_psk = verify_psk_local

dominus_sdk_python-2.5.1/dominus/namespaces/oracle/__init__.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""
+Oracle Namespace - Real-time streaming speech-to-text.
+Provides WebSocket-based streaming transcription via Deepgram,
+with built-in VAD (Voice Activity Detection) for cost optimization.
+Key features:
+- Automatic microphone capture and 16kHz resampling
+- VAD gating: only sends audio when speech is detected
+- 4-state VAD machine: IDLE -> ARMED -> SPEAKING -> TRAILING
+- Pre-roll buffer captures word onsets
+- Ping/pong keepalive during IDLE
+- NO send_audio() exposed - VAD handles everything
+Usage:
+    session = dominus.oracle.create_session(user_jwt)
+    session.on_ready = lambda: set_listening(True)
+    session.on_interim = lambda text: set_live_transcript(text)
+    session.on_utterance = lambda text: send_to_curator(text)
+    session.on_vad_state_change = lambda state: set_mic_state(state)
+    session.on_error = lambda error: show_error(error)
+    await session.start()
+    # ... user speaks, transcripts flow back ...
+    await session.stop()
+"""
+from typing import Optional, TYPE_CHECKING
+from .types import (
+    VADState,
+    OracleSessionOptions,
+    ResolvedOracleSessionOptions,
+    DEFAULT_OPTIONS,
+    AUDIO_CONFIG,
+)
+from .session import OracleSession
+if TYPE_CHECKING:
+    from ...start import Dominus
+# Re-export public types
+__all__ = [
+    "OracleNamespace",
+    "OracleSession",
+    "OracleSessionOptions",
+    "VADState",
+]
+class OracleNamespace:
+    """
+    OracleNamespace - Factory for creating streaming transcription sessions.
+    The Oracle namespace provides a simple API for real-time speech-to-text:
+    - create_session() creates a new transcription session
+    - Sessions handle mic capture, VAD, WebSocket, and transcripts internally
+    - NO raw audio access - VAD is mandatory for cost control
+    """
+    def __init__(self, client: "Dominus"):
+        from ...config.endpoints import BASE_URL
+        self._base_url = BASE_URL
+        self._client = client
+    def create_session(
+        self,
+        user_token: str,
+        options: Optional[OracleSessionOptions] = None
+    ) -> OracleSession:
+        """
+        Create a streaming transcription session.
+        The session handles everything internally:
+        - Microphone access and audio capture
+        - Resampling to 16kHz mono PCM
+        - VAD gating (only sends speech, not silence)
+        - WebSocket connection to Oracle
+        - Reconnection on connection loss
+        Args:
+            user_token: User JWT from portal.login()
+            options: Optional configuration overrides
+        Returns:
+            OracleSession ready to start()
+        Example:
+            session = dominus.oracle.create_session(user_jwt, OracleSessionOptions(
+                preroll_ms=320,      # Capture 320ms before speech
+                postroll_ms=400,     # Continue 400ms after speech
+                armed_confirm_ms=80, # Require 80ms to confirm speech
+            ))
+            session.on_utterance = lambda text: send_to_curator(text)
+            await session.start()
+        """
+        # Merge options with defaults
+        if options:
+            resolved_options = ResolvedOracleSessionOptions(
+                preroll_ms=options.preroll_ms if options.preroll_ms != DEFAULT_OPTIONS.preroll_ms else DEFAULT_OPTIONS.preroll_ms,
+                postroll_ms=options.postroll_ms if options.postroll_ms != DEFAULT_OPTIONS.postroll_ms else DEFAULT_OPTIONS.postroll_ms,
+                armed_confirm_ms=options.armed_confirm_ms if options.armed_confirm_ms != DEFAULT_OPTIONS.armed_confirm_ms else DEFAULT_OPTIONS.armed_confirm_ms,
+                vad_threshold=options.vad_threshold if options.vad_threshold != DEFAULT_OPTIONS.vad_threshold else DEFAULT_OPTIONS.vad_threshold,
+                energy_threshold=options.energy_threshold if options.energy_threshold != DEFAULT_OPTIONS.energy_threshold else DEFAULT_OPTIONS.energy_threshold,
+                ping_interval_ms=options.ping_interval_ms if options.ping_interval_ms != DEFAULT_OPTIONS.ping_interval_ms else DEFAULT_OPTIONS.ping_interval_ms,
+            )
+        else:
+            resolved_options = ResolvedOracleSessionOptions(
+                preroll_ms=DEFAULT_OPTIONS.preroll_ms,
+                postroll_ms=DEFAULT_OPTIONS.postroll_ms,
+                armed_confirm_ms=DEFAULT_OPTIONS.armed_confirm_ms,
+                vad_threshold=DEFAULT_OPTIONS.vad_threshold,
+                energy_threshold=DEFAULT_OPTIONS.energy_threshold,
+                ping_interval_ms=DEFAULT_OPTIONS.ping_interval_ms,
+            )
+        return OracleSession(self._base_url, user_token, resolved_options)

dominus_sdk_python-2.5.1/dominus/namespaces/oracle/audio_capture.py ADDED Viewed

@@ -0,0 +1,382 @@
+"""
+AudioCapture - Microphone capture and resampling (INTERNAL)
+Handles:
+- Microphone access via sounddevice or pyaudio
+- Resampling to 16kHz mono if needed
+- Output 20ms frames (640 bytes PCM16)
+This module is INTERNAL and should NOT be exported publicly.
+"""
+import asyncio
+import struct
+import math
+from typing import Callable, Optional, List
+from dataclasses import dataclass
+from .types import AUDIO_CONFIG
+# Try to import audio libraries
+try:
+    import sounddevice as sd
+    import numpy as np
+    SOUNDDEVICE_AVAILABLE = True
+except ImportError:
+    SOUNDDEVICE_AVAILABLE = False
+    sd = None
+    np = None
+try:
+    import pyaudio
+    PYAUDIO_AVAILABLE = True
+except ImportError:
+    PYAUDIO_AVAILABLE = False
+    pyaudio = None
+def resample(input_data: List[float], from_rate: int, to_rate: int) -> List[float]:
+    """
+    Resample audio data using linear interpolation.
+    Args:
+        input_data: Input samples as floats
+        from_rate: Source sample rate
+        to_rate: Target sample rate
+    Returns:
+        Resampled audio data
+    """
+    if from_rate == to_rate:
+        return input_data
+    ratio = from_rate / to_rate
+    output_length = math.ceil(len(input_data) / ratio)
+    output = []
+    for i in range(output_length):
+        src_index = i * ratio
+        src_floor = int(src_index)
+        src_ceil = min(src_floor + 1, len(input_data) - 1)
+        t = src_index - src_floor
+        # Linear interpolation
+        value = input_data[src_floor] * (1 - t) + input_data[src_ceil] * t
+        output.append(value)
+    return output
+def float_to_pcm16(samples: List[float]) -> bytes:
+    """
+    Convert float samples [-1, 1] to PCM16 bytes.
+    Args:
+        samples: Float samples in range [-1, 1]
+    Returns:
+        PCM16 bytes (little-endian)
+    """
+    pcm_values = []
+    for s in samples:
+        # Clamp to [-1, 1]
+        s = max(-1.0, min(1.0, s))
+        # Convert to int16
+        if s < 0:
+            pcm_values.append(int(s * 0x8000))
+        else:
+            pcm_values.append(int(s * 0x7FFF))
+    return struct.pack(f"<{len(pcm_values)}h", *pcm_values)
+class SoundDeviceCapture:
+    """
+    Audio capture using sounddevice library.
+    Preferred for cross-platform compatibility.
+    """
+    def __init__(self):
+        if not SOUNDDEVICE_AVAILABLE:
+            raise ImportError(
+                "sounddevice package is required for audio capture. "
+                "Install with: pip install sounddevice numpy"
+            )
+        self._stream: Optional[sd.InputStream] = None
+        self._buffer: List[float] = []
+        self._is_capturing = False
+        self._callback_queue: asyncio.Queue = None
+        self._process_task: Optional[asyncio.Task] = None
+        self.on_frame: Optional[Callable[[bytes], None]] = None
+    async def start(self) -> None:
+        """Start audio capture from microphone."""
+        if self._is_capturing:
+            return
+        self._callback_queue = asyncio.Queue()
+        def audio_callback(indata, frames, time_info, status):
+            if status:
+                print(f"[OracleSDK] Audio status: {status}")
+            # Convert numpy array to list of floats
+            mono_data = indata[:, 0].tolist() if indata.ndim > 1 else indata.flatten().tolist()
+            # Put data in queue for async processing
+            try:
+                self._callback_queue.put_nowait(mono_data)
+            except asyncio.QueueFull:
+                pass  # Drop frames if queue is full
+        # Open stream
+        self._stream = sd.InputStream(
+            samplerate=AUDIO_CONFIG.SAMPLE_RATE,
+            channels=AUDIO_CONFIG.CHANNELS,
+            dtype='float32',
+            blocksize=1024,  # Good balance of latency vs efficiency
+            callback=audio_callback,
+        )
+        self._stream.start()
+        self._is_capturing = True
+        # Start processing task
+        self._process_task = asyncio.create_task(self._process_audio())
+    async def stop(self) -> None:
+        """Stop audio capture."""
+        if not self._is_capturing:
+            return
+        self._is_capturing = False
+        if self._process_task:
+            self._process_task.cancel()
+            try:
+                await self._process_task
+            except asyncio.CancelledError:
+                pass
+            self._process_task = None
+        if self._stream:
+            self._stream.stop()
+            self._stream.close()
+            self._stream = None
+        self._buffer = []
+    async def _process_audio(self) -> None:
+        """Process audio from queue and emit frames."""
+        while self._is_capturing:
+            try:
+                # Get audio data from queue
+                audio_data = await asyncio.wait_for(
+                    self._callback_queue.get(),
+                    timeout=0.1
+                )
+                # Append to buffer
+                self._buffer.extend(audio_data)
+                # Extract complete frames
+                while len(self._buffer) >= AUDIO_CONFIG.SAMPLES_PER_FRAME:
+                    frame_samples = self._buffer[:AUDIO_CONFIG.SAMPLES_PER_FRAME]
+                    self._buffer = self._buffer[AUDIO_CONFIG.SAMPLES_PER_FRAME:]
+                    # Convert to PCM16
+                    pcm_frame = float_to_pcm16(frame_samples)
+                    if self.on_frame:
+                        self.on_frame(pcm_frame)
+            except asyncio.TimeoutError:
+                continue
+            except asyncio.CancelledError:
+                break
+class PyAudioCapture:
+    """
+    Audio capture using PyAudio library.
+    Fallback for environments where sounddevice doesn't work.
+    """
+    def __init__(self):
+        if not PYAUDIO_AVAILABLE:
+            raise ImportError(
+                "pyaudio package is required for audio capture. "
+                "Install with: pip install pyaudio"
+            )
+        self._pa: Optional[pyaudio.PyAudio] = None
+        self._stream = None
+        self._buffer: List[float] = []
+        self._is_capturing = False
+        self._callback_queue: asyncio.Queue = None
+        self._process_task: Optional[asyncio.Task] = None
+        self.on_frame: Optional[Callable[[bytes], None]] = None
+    async def start(self) -> None:
+        """Start audio capture from microphone."""
+        if self._is_capturing:
+            return
+        self._pa = pyaudio.PyAudio()
+        self._callback_queue = asyncio.Queue()
+        def audio_callback(in_data, frame_count, time_info, status):
+            # Convert bytes to float samples
+            num_samples = len(in_data) // 2
+            samples = struct.unpack(f"<{num_samples}h", in_data)
+            # Normalize to [-1, 1]
+            float_samples = [s / 32768.0 for s in samples]
+            try:
+                self._callback_queue.put_nowait(float_samples)
+            except asyncio.QueueFull:
+                pass
+            return (None, pyaudio.paContinue)
+        # Open stream
+        self._stream = self._pa.open(
+            format=pyaudio.paInt16,
+            channels=AUDIO_CONFIG.CHANNELS,
+            rate=AUDIO_CONFIG.SAMPLE_RATE,
+            input=True,
+            frames_per_buffer=1024,
+            stream_callback=audio_callback,
+        )
+        self._stream.start_stream()
+        self._is_capturing = True
+        # Start processing task
+        self._process_task = asyncio.create_task(self._process_audio())
+    async def stop(self) -> None:
+        """Stop audio capture."""
+        if not self._is_capturing:
+            return
+        self._is_capturing = False
+        if self._process_task:
+            self._process_task.cancel()
+            try:
+                await self._process_task
+            except asyncio.CancelledError:
+                pass
+            self._process_task = None
+        if self._stream:
+            self._stream.stop_stream()
+            self._stream.close()
+            self._stream = None
+        if self._pa:
+            self._pa.terminate()
+            self._pa = None
+        self._buffer = []
+    async def _process_audio(self) -> None:
+        """Process audio from queue and emit frames."""
+        while self._is_capturing:
+            try:
+                audio_data = await asyncio.wait_for(
+                    self._callback_queue.get(),
+                    timeout=0.1
+                )
+                self._buffer.extend(audio_data)
+                while len(self._buffer) >= AUDIO_CONFIG.SAMPLES_PER_FRAME:
+                    frame_samples = self._buffer[:AUDIO_CONFIG.SAMPLES_PER_FRAME]
+                    self._buffer = self._buffer[AUDIO_CONFIG.SAMPLES_PER_FRAME:]
+                    pcm_frame = float_to_pcm16(frame_samples)
+                    if self.on_frame:
+                        self.on_frame(pcm_frame)
+            except asyncio.TimeoutError:
+                continue
+            except asyncio.CancelledError:
+                break
+class AudioCapture:
+    """
+    AudioCapture - Main class for microphone capture.
+    Automatically uses sounddevice if available, falls back to PyAudio.
+    Outputs 20ms frames (640 bytes PCM16) at 16kHz mono.
+    """
+    def __init__(self):
+        self._capture = None
+        self._is_capturing = False
+        # Callback for each audio frame
+        self.on_frame: Optional[Callable[[bytes], None]] = None
+    @property
+    def is_capturing(self) -> bool:
+        """Check if currently capturing audio."""
+        return self._is_capturing
+    async def start(self) -> None:
+        """
+        Start audio capture from microphone.
+        Raises:
+            ImportError: If no audio library is available
+            Exception: If microphone access fails
+        """
+        if self._is_capturing:
+            return
+        # Choose capture method based on available libraries
+        if SOUNDDEVICE_AVAILABLE:
+            try:
+                self._capture = SoundDeviceCapture()
+                await self._capture.start()
+            except Exception as e:
+                print(f"[OracleSDK] sounddevice failed, trying PyAudio: {e}")
+                if PYAUDIO_AVAILABLE:
+                    self._capture = PyAudioCapture()
+                    await self._capture.start()
+                else:
+                    raise
+        elif PYAUDIO_AVAILABLE:
+            self._capture = PyAudioCapture()
+            await self._capture.start()
+        else:
+            raise ImportError(
+                "Audio capture requires either sounddevice or pyaudio. "
+                "Install with: pip install sounddevice numpy  OR  pip install pyaudio"
+            )
+        # Wire up frame callback
+        self._capture.on_frame = lambda pcm_frame: (
+            self.on_frame(pcm_frame) if self.on_frame else None
+        )
+        self._is_capturing = True
+    async def stop(self) -> None:
+        """Stop audio capture."""
+        if not self._is_capturing or not self._capture:
+            return
+        await self._capture.stop()
+        self._capture = None
+        self._is_capturing = False
+    def dispose(self) -> None:
+        """Clean up resources."""
+        asyncio.create_task(self.stop())
+        self.on_frame = None

dominus-sdk-python 2.4.0__tar.gz → 2.5.1__tar.gz

dominus-sdk-python 2.4.0tar.gz → 2.5.1tar.gz