PyPI - atom-audio-engine - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

atom-audio-engine 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{atom_audio_engine-0.1.4.dist-info → atom_audio_engine-0.1.6.dist-info}/METADATA +1 -1
atom_audio_engine-0.1.6.dist-info/RECORD +32 -0
audio_engine/__init__.py +6 -2
audio_engine/asr/__init__.py +48 -0
audio_engine/asr/base.py +89 -0
audio_engine/asr/cartesia.py +350 -0
audio_engine/asr/deepgram.py +196 -0
audio_engine/core/__init__.py +13 -0
audio_engine/core/config.py +162 -0
audio_engine/core/pipeline.py +278 -0
audio_engine/core/types.py +87 -0
audio_engine/integrations/__init__.py +5 -0
audio_engine/integrations/geneface.py +297 -0
audio_engine/llm/__init__.py +40 -0
audio_engine/llm/base.py +106 -0
audio_engine/llm/groq.py +208 -0
audio_engine/pipelines/__init__.py +1 -0
audio_engine/pipelines/personaplex/__init__.py +41 -0
audio_engine/pipelines/personaplex/client.py +259 -0
audio_engine/pipelines/personaplex/config.py +69 -0
audio_engine/pipelines/personaplex/pipeline.py +301 -0
audio_engine/pipelines/personaplex/types.py +173 -0
audio_engine/pipelines/personaplex/utils.py +192 -0
audio_engine/streaming/__init__.py +5 -0
audio_engine/streaming/websocket_server.py +333 -0
audio_engine/tts/__init__.py +35 -0
audio_engine/tts/base.py +153 -0
audio_engine/tts/cartesia.py +370 -0
audio_engine/utils/__init__.py +15 -0
audio_engine/utils/audio.py +218 -0
atom_audio_engine-0.1.4.dist-info/RECORD +0 -5
{atom_audio_engine-0.1.4.dist-info → atom_audio_engine-0.1.6.dist-info}/WHEEL +0 -0
{atom_audio_engine-0.1.4.dist-info → atom_audio_engine-0.1.6.dist-info}/top_level.txt +0 -0

audio_engine/pipelines/personaplex/pipeline.py ADDED Viewed

@@ -0,0 +1,301 @@
+"""Main PersonaPlex pipeline orchestrator."""
+import asyncio
+import logging
+from typing import AsyncIterator, Optional, Tuple
+from .config import PersonaPlexConfig
+from .client import PersonaPlexClient
+from .types import MessageType, AudioChunk, TextChunk, SessionData
+from .utils import generate_session_id, get_timestamp_iso, save_transcript
+logger = logging.getLogger(__name__)
+class PersonaPlexPipeline:
+    """
+    Full-duplex speech-to-speech pipeline using PersonaPlex.
+    This pipeline handles real-time bidirectional communication:
+    - Sends user audio to PersonaPlex
+    - Receives assistant audio and text streaming from PersonaPlex
+    - Maintains conversation transcript
+    - Optionally saves transcripts to disk
+    Unlike the audio-engine's sequential ASR→LLM→TTS pipeline, PersonaPlex
+    is truly full-duplex: user can speak while assistant responds simultaneously.
+    Approach:
+    1. Create session with UUID and timestamp
+    2. Connect client with system prompt
+    3. Launch concurrent receive task to handle server messages
+    4. Caller sends user audio; pipeline yields received audio/text chunks
+    5. On stop, save transcript and disconnect
+    Example:
+        ```python
+        pipeline = PersonaPlexPipeline(
+            system_prompt="You are a helpful AI.",
+            save_transcripts=True
+        )
+        await pipeline.start()
+        # Send user audio, receive assistant response
+        async for audio_chunk, text_chunk in pipeline.stream(user_audio_stream):
+            if audio_chunk:
+                play_audio(audio_chunk)
+            if text_chunk:
+                print(text_chunk.text, end="", flush=True)
+        transcript = await pipeline.stop()
+        ```
+    """
+    def __init__(
+        self,
+        config: Optional[PersonaPlexConfig] = None,
+        system_prompt: str = "You are a helpful AI assistant.",
+        save_transcripts: bool = True,
+        debug: bool = False,
+    ):
+        """
+        Initialize PersonaPlex pipeline.
+        Args:
+            config: PersonaPlexConfig (uses defaults if None)
+            system_prompt: System prompt for persona control
+            save_transcripts: Whether to save transcript after session
+            debug: Enable debug logging
+        """
+        self.config = config or PersonaPlexConfig()
+        self.config.text_prompt = system_prompt
+        self.config.save_transcripts = save_transcripts
+        self.system_prompt = system_prompt
+        self.client = PersonaPlexClient(self.config)
+        # Session state
+        self.session_id = generate_session_id()
+        self.session_data = SessionData(
+            session_id=self.session_id,
+            timestamp=get_timestamp_iso(),
+            system_prompt=system_prompt,
+            voice_prompt=self.config.voice_prompt,
+        )
+        self._is_running = False
+        self._receive_task: Optional[asyncio.Task] = None
+        self._audio_queue: asyncio.Queue[Optional[AudioChunk]] = asyncio.Queue()
+        self._text_queue: asyncio.Queue[Optional[TextChunk]] = asyncio.Queue()
+        if debug:
+            logging.basicConfig(level=logging.DEBUG)
+        logger.info(f"PersonaPlexPipeline initialized (session: {self.session_id})")
+    async def start(self) -> None:
+        """
+        Connect to PersonaPlex server and start listening for messages.
+        Raises:
+            ConnectionError: If connection fails
+        """
+        if self._is_running:
+            logger.warning("Pipeline already running")
+            return
+        try:
+            await self.client.connect(self.system_prompt)
+            self._is_running = True
+            # Start background task to receive messages
+            self._receive_task = asyncio.create_task(self._receive_loop())
+            logger.info("PersonaPlex pipeline started")
+        except Exception as e:
+            logger.error(f"Failed to start pipeline: {e}")
+            raise
+    async def stop(self) -> Optional[SessionData]:
+        """
+        Stop the pipeline, close connection, and optionally save transcript.
+        Returns:
+            SessionData with transcript if save_transcripts=True, else None
+        """
+        if not self._is_running:
+            logger.warning("Pipeline not running")
+            return None
+        try:
+            self._is_running = False
+            # Cancel receive task
+            if self._receive_task:
+                self._receive_task.cancel()
+                try:
+                    await self._receive_task
+                except asyncio.CancelledError:
+                    pass
+            # Disconnect from server
+            await self.client.disconnect()
+            # Save transcript if enabled
+            if self.config.save_transcripts:
+                transcript_path = save_transcript(
+                    self.session_data,
+                    self.config.transcript_path,
+                )
+                logger.info(f"Transcript saved: {transcript_path}")
+            logger.info("PersonaPlex pipeline stopped")
+            return self.session_data
+        except Exception as e:
+            logger.error(f"Error stopping pipeline: {e}")
+            raise
+    async def _receive_loop(self) -> None:
+        """
+        Background task: continuously receive messages from server.
+        Puts audio/text chunks into respective queues.
+        """
+        try:
+            async for message in self.client.stream_messages():
+                if not self._is_running:
+                    break
+                if message.type == MessageType.AUDIO:
+                    chunk = AudioChunk(
+                        data=message.data,  # type: ignore
+                        sample_rate=self.config.sample_rate,
+                    )
+                    await self._audio_queue.put(chunk)
+                elif message.type == MessageType.TEXT:
+                    text = (
+                        message.data.decode("utf-8")
+                        if isinstance(message.data, bytes)
+                        else message.data
+                    )
+                    chunk = TextChunk(text=text)
+                    # Track in transcript
+                    if text and text.strip():
+                        self.session_data.add_message("assistant", text)
+                    await self._text_queue.put(chunk)
+                elif message.type == MessageType.ERROR:
+                    error_msg = (
+                        message.data.decode("utf-8")
+                        if isinstance(message.data, bytes)
+                        else str(message.data)
+                    )
+                    logger.error(f"Server error: {error_msg}")
+        except asyncio.CancelledError:
+            logger.debug("Receive loop cancelled")
+        except Exception as e:
+            logger.error(f"Error in receive loop: {e}")
+    async def send_audio(self, audio_chunk: bytes) -> None:
+        """
+        Send audio chunk to PersonaPlex server.
+        Args:
+            audio_chunk: Raw Opus-encoded audio bytes
+        """
+        if not self._is_running:
+            raise RuntimeError("Pipeline not running")
+        try:
+            await self.client.send_audio(audio_chunk)
+            # Track in transcript (user audio sent)
+            # Note: We don't transcribe user audio; PersonaPlex returns text
+        except Exception as e:
+            logger.error(f"Failed to send audio: {e}")
+            raise
+    async def stream(
+        self,
+        audio_stream: Optional[AsyncIterator[bytes]] = None,
+    ) -> AsyncIterator[Tuple[Optional[AudioChunk], Optional[TextChunk]]]:
+        """
+        Stream bidirectional audio/text from PersonaPlex.
+        This is a generator that yields (audio_chunk, text_chunk) tuples.
+        If audio_stream is provided, sends user audio concurrently.
+        Approach:
+        - If audio_stream provided: spawn task to continuously send user audio
+        - Concurrently receive audio and text from server
+        - Yield (audio, text) tuples as they arrive (either can be None)
+        Args:
+            audio_stream: Optional async iterator of audio bytes to send
+        Yields:
+            Tuple of (AudioChunk or None, TextChunk or None)
+        """
+        if not self._is_running:
+            raise RuntimeError("Pipeline not running")
+        # Optional task to send user audio
+        send_task: Optional[asyncio.Task] = None
+        if audio_stream:
+            async def send_user_audio():
+                """Background task: send audio from user stream."""
+                try:
+                    async for audio_chunk in audio_stream:
+                        if not self._is_running:
+                            break
+                        await self.send_audio(audio_chunk)
+                except asyncio.CancelledError:
+                    logger.debug("Send task cancelled")
+                except Exception as e:
+                    logger.error(f"Error sending audio: {e}")
+            send_task = asyncio.create_task(send_user_audio())
+        try:
+            while self._is_running:
+                # Wait for either audio or text (non-blocking)
+                try:
+                    # Try to get audio (non-blocking)
+                    audio_chunk = self._audio_queue.get_nowait()
+                except asyncio.QueueEmpty:
+                    audio_chunk = None
+                try:
+                    # Try to get text (non-blocking)
+                    text_chunk = self._text_queue.get_nowait()
+                except asyncio.QueueEmpty:
+                    text_chunk = None
+                # If we got something, yield it
+                if audio_chunk or text_chunk:
+                    yield (audio_chunk, text_chunk)
+                else:
+                    # Nothing available, wait a bit before polling again
+                    await asyncio.sleep(0.01)
+        finally:
+            # Clean up send task
+            if send_task:
+                send_task.cancel()
+                try:
+                    await send_task
+                except asyncio.CancelledError:
+                    pass
+    async def __aenter__(self) -> "PersonaPlexPipeline":
+        """Async context manager entry."""
+        await self.start()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit."""
+        await self.stop()

audio_engine/pipelines/personaplex/types.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""Data types for PersonaPlex speech-to-speech pipeline."""
+from dataclasses import dataclass, field
+from typing import Optional
+from enum import Enum
+from datetime import UTC, datetime
+class MessageType(Enum):
+    """WebSocket message types for PersonaPlex protocol."""
+    HANDSHAKE = 0x00
+    AUDIO = 0x01
+    TEXT = 0x02
+    CONTROL = 0x03
+    METADATA = 0x04
+    ERROR = 0x05
+    PING = 0x06
+@dataclass
+class PersonaPlexMessage:
+    """
+    A message in the PersonaPlex WebSocket protocol.
+    Attributes:
+        type: Message type (audio, text, handshake, etc.)
+        data: Message payload (bytes for audio, str for text)
+        timestamp_ms: Optional timestamp in milliseconds
+    """
+    type: MessageType
+    data: bytes | str
+    timestamp_ms: Optional[int] = None
+    def encode(self) -> bytes:
+        """Encode message to binary format for transmission."""
+        type_byte = bytes([self.type.value])
+        if isinstance(self.data, bytes):
+            return type_byte + self.data
+        else:
+            return type_byte + self.data.encode("utf-8")
+    @classmethod
+    def decode(cls, data: bytes) -> "PersonaPlexMessage":
+        """Decode binary message from WebSocket."""
+        if len(data) < 1:
+            raise ValueError("Message too short")
+        msg_type = MessageType(data[0])
+        payload = data[1:]
+        # Text messages are UTF-8 decoded
+        if msg_type == MessageType.TEXT:
+            text_data = payload.decode("utf-8")
+            return cls(type=msg_type, data=text_data)
+        else:
+            return cls(type=msg_type, data=payload)
+@dataclass
+class TranscriptMessage:
+    """
+    A single message in the conversation transcript.
+    Attributes:
+        role: "user" or "assistant"
+        text: The message content
+        timestamp: ISO 8601 timestamp when message was generated
+    """
+    role: str
+    text: str
+    timestamp: str
+@dataclass
+class SessionData:
+    """
+    Metadata and transcript for a PersonaPlex session.
+    Attributes:
+        session_id: Unique session identifier (UUID)
+        timestamp: Session start time (ISO 8601)
+        system_prompt: System prompt used for the session
+        voice_prompt: Voice preset used (e.g., "NATF0.pt")
+        messages: List of transcript messages (user + assistant)
+    """
+    session_id: str
+    timestamp: str
+    system_prompt: str
+    voice_prompt: str
+    messages: list[TranscriptMessage] = field(default_factory=list)
+    def add_message(self, role: str, text: str) -> None:
+        """Add a message to the transcript."""
+        msg = TranscriptMessage(
+            role=role,
+            text=text,
+            timestamp=datetime.now(UTC).isoformat().replace("+00:00", "Z"),
+        )
+        self.messages.append(msg)
+    def to_dict(self) -> dict:
+        """Convert session data to dictionary for JSON serialization."""
+        return {
+            "session_id": self.session_id,
+            "timestamp": self.timestamp,
+            "system_prompt": self.system_prompt,
+            "voice_prompt": self.voice_prompt,
+            "messages": [
+                {
+                    "role": msg.role,
+                    "text": msg.text,
+                    "timestamp": msg.timestamp,
+                }
+                for msg in self.messages
+            ],
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "SessionData":
+        """Create SessionData from dictionary."""
+        messages = [
+            TranscriptMessage(
+                role=msg["role"],
+                text=msg["text"],
+                timestamp=msg.get("timestamp", ""),
+            )
+            for msg in data.get("messages", [])
+        ]
+        return cls(
+            session_id=data["session_id"],
+            timestamp=data["timestamp"],
+            system_prompt=data["system_prompt"],
+            voice_prompt=data["voice_prompt"],
+            messages=messages,
+        )
+@dataclass
+class AudioChunk:
+    """
+    A chunk of audio data from PersonaPlex.
+    Attributes:
+        data: Raw Opus-encoded audio bytes
+        sample_rate: Sample rate in Hz (typically 48000)
+        timestamp_ms: When this chunk was generated
+        is_final: Whether this is the last chunk in a sequence
+    """
+    data: bytes
+    sample_rate: int = 48000
+    timestamp_ms: Optional[int] = None
+    is_final: bool = False
+@dataclass
+class TextChunk:
+    """
+    A text token from PersonaPlex LLM output.
+    Attributes:
+        text: Text content (partial or complete word)
+        timestamp_ms: When this token was generated
+        is_final: Whether this is the last token in a sequence
+    """
+    text: str
+    timestamp_ms: Optional[int] = None
+    is_final: bool = False

audio_engine/pipelines/personaplex/utils.py ADDED Viewed

@@ -0,0 +1,192 @@
+"""Utility functions for PersonaPlex pipeline."""
+import json
+import logging
+import uuid
+from pathlib import Path
+from datetime import datetime, UTC
+from typing import Optional
+from .types import SessionData
+logger = logging.getLogger(__name__)
+def generate_session_id() -> str:
+    """
+    Generate a unique session identifier.
+    Returns:
+        UUID4 string for this session
+    """
+    return str(uuid.uuid4())
+def get_timestamp_iso() -> str:
+    """
+    Get current timestamp in ISO 8601 format with Z suffix.
+    Returns:
+        Timestamp string (e.g., "2026-02-03T10:30:45.123456Z")
+    """
+    return datetime.now(UTC).isoformat().replace("+00:00", "Z")
+def save_transcript(
+    session_data: SessionData,
+    output_path: Optional[str] = None,
+) -> Path:
+    """
+    Save session transcript to JSON file.
+    Approach:
+    1. Convert SessionData to dictionary
+    2. Write as formatted JSON
+    3. Return path for verification
+    Args:
+        session_data: SessionData object with transcript
+        output_path: Directory to save transcript (default: ./transcripts/)
+    Returns:
+        Path to saved JSON file
+    Raises:
+        IOError: If file write fails
+    """
+    if output_path is None:
+        output_path = "./transcripts/"
+    output_dir = Path(output_path)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Filename: session_id_YYYY-MM-DD.json
+    timestamp_str = session_data.timestamp.split("T")[0]  # Extract date
+    filename = f"{session_data.session_id}_{timestamp_str}.json"
+    filepath = output_dir / filename
+    try:
+        with open(filepath, "w") as f:
+            json.dump(session_data.to_dict(), f, indent=2)
+        logger.info(f"Transcript saved to {filepath}")
+        return filepath
+    except IOError as e:
+        logger.error(f"Failed to save transcript: {e}")
+        raise
+def load_transcript(filepath: str | Path) -> SessionData:
+    """
+    Load session transcript from JSON file.
+    Args:
+        filepath: Path to transcript JSON file
+    Returns:
+        SessionData object
+    Raises:
+        FileNotFoundError: If file doesn't exist
+        json.JSONDecodeError: If JSON is invalid
+    """
+    filepath = Path(filepath)
+    try:
+        with open(filepath, "r") as f:
+            data = json.load(f)
+        logger.info(f"Loaded transcript from {filepath}")
+        return SessionData.from_dict(data)
+    except FileNotFoundError:
+        logger.error(f"Transcript file not found: {filepath}")
+        raise
+    except json.JSONDecodeError as e:
+        logger.error(f"Invalid JSON in transcript file: {e}")
+        raise
+def list_transcripts(directory: str | Path = "./transcripts/") -> list[Path]:
+    """
+    List all transcript files in a directory.
+    Args:
+        directory: Path to transcripts directory
+    Returns:
+        List of Path objects for .json files, sorted by modification time (newest first)
+    """
+    dir_path = Path(directory)
+    if not dir_path.exists():
+        logger.warning(f"Transcripts directory does not exist: {directory}")
+        return []
+    transcripts = sorted(
+        dir_path.glob("*.json"),
+        key=lambda p: p.stat().st_mtime,
+        reverse=True,
+    )
+    return transcripts
+def format_transcript_for_display(session_data: SessionData) -> str:
+    """
+    Format transcript as human-readable text.
+    Args:
+        session_data: SessionData object
+    Returns:
+        Formatted text with speaker labels and messages
+    """
+    lines = [
+        f"=== PersonaPlex Session ===",
+        f"Session ID: {session_data.session_id}",
+        f"Started: {session_data.timestamp}",
+        f"Voice: {session_data.voice_prompt}",
+        f"Prompt: {session_data.system_prompt}",
+        f"",
+        "--- Transcript ---",
+    ]
+    for msg in session_data.messages:
+        speaker = msg.role.upper()
+        lines.append(f"{speaker}: {msg.text}")
+        lines.append("")
+    return "\n".join(lines)
+def cleanup_old_transcripts(
+    directory: str | Path = "./transcripts/",
+    max_age_days: int = 30,
+) -> int:
+    """
+    Delete transcripts older than specified number of days.
+    Args:
+        directory: Path to transcripts directory
+        max_age_days: Delete files older than this many days
+    Returns:
+        Number of files deleted
+    """
+    from datetime import timedelta
+    import time
+    dir_path = Path(directory)
+    if not dir_path.exists():
+        return 0
+    cutoff_time = time.time() - (max_age_days * 24 * 60 * 60)
+    deleted_count = 0
+    for transcript_file in dir_path.glob("*.json"):
+        if transcript_file.stat().st_mtime < cutoff_time:
+            try:
+                transcript_file.unlink()
+                logger.info(f"Deleted old transcript: {transcript_file}")
+                deleted_count += 1
+            except OSError as e:
+                logger.error(f"Failed to delete {transcript_file}: {e}")
+    logger.info(f"Cleaned up {deleted_count} old transcripts")
+    return deleted_count

audio_engine/streaming/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Streaming and WebSocket server components."""
+from .websocket_server import WebSocketServer
+__all__ = ["WebSocketServer"]

atom-audio-engine 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

atom-audio-engine 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl