PyPI - atom-audio-engine - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

atom-audio-engine 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{atom_audio_engine-0.1.4.dist-info → atom_audio_engine-0.1.6.dist-info}/METADATA +1 -1
atom_audio_engine-0.1.6.dist-info/RECORD +32 -0
audio_engine/__init__.py +6 -2
audio_engine/asr/__init__.py +48 -0
audio_engine/asr/base.py +89 -0
audio_engine/asr/cartesia.py +350 -0
audio_engine/asr/deepgram.py +196 -0
audio_engine/core/__init__.py +13 -0
audio_engine/core/config.py +162 -0
audio_engine/core/pipeline.py +278 -0
audio_engine/core/types.py +87 -0
audio_engine/integrations/__init__.py +5 -0
audio_engine/integrations/geneface.py +297 -0
audio_engine/llm/__init__.py +40 -0
audio_engine/llm/base.py +106 -0
audio_engine/llm/groq.py +208 -0
audio_engine/pipelines/__init__.py +1 -0
audio_engine/pipelines/personaplex/__init__.py +41 -0
audio_engine/pipelines/personaplex/client.py +259 -0
audio_engine/pipelines/personaplex/config.py +69 -0
audio_engine/pipelines/personaplex/pipeline.py +301 -0
audio_engine/pipelines/personaplex/types.py +173 -0
audio_engine/pipelines/personaplex/utils.py +192 -0
audio_engine/streaming/__init__.py +5 -0
audio_engine/streaming/websocket_server.py +333 -0
audio_engine/tts/__init__.py +35 -0
audio_engine/tts/base.py +153 -0
audio_engine/tts/cartesia.py +370 -0
audio_engine/utils/__init__.py +15 -0
audio_engine/utils/audio.py +218 -0
atom_audio_engine-0.1.4.dist-info/RECORD +0 -5
{atom_audio_engine-0.1.4.dist-info → atom_audio_engine-0.1.6.dist-info}/WHEEL +0 -0
{atom_audio_engine-0.1.4.dist-info → atom_audio_engine-0.1.6.dist-info}/top_level.txt +0 -0

audio_engine/llm/groq.py ADDED Viewed

@@ -0,0 +1,208 @@
+"""Groq API implementation for LLM (Language Model)."""
+import logging
+from typing import AsyncIterator, Optional
+from groq import Groq
+from ..core.types import ResponseChunk, ConversationContext
+from .base import BaseLLM
+logger = logging.getLogger(__name__)
+class GroqLLM(BaseLLM):
+    """
+    Groq API client for language model text generation.
+    Supports both batch and streaming text generation with excellent
+    latency for conversational AI. Uses Groq's optimized inference engine.
+    Example:
+        llm = GroqLLM(
+            api_key="gsk_...",
+            model="llama-3.1-8b-instant"
+        )
+        # Batch generation
+        response = await llm.generate("Hello", context=conversation)
+        # Streaming generation
+        async for chunk in llm.generate_stream("Hello", context=conversation):
+            print(chunk.text, end="", flush=True)
+    """
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: str = "llama-3.1-8b-instant",
+        temperature: float = 0.7,
+        max_tokens: int = 1024,
+        system_prompt: Optional[str] = None,
+        **kwargs,
+    ):
+        """
+        Initialize Groq LLM provider.
+        Args:
+            api_key: Groq API key
+            model: Model to use (default "llama-3.1-8b-instant", alternatives: "llama-3.3-70b-versatile", "mixtral-8x7b-32768")
+            temperature: Sampling temperature (0.0-2.0)
+            max_tokens: Maximum tokens in response
+            system_prompt: Default system prompt
+            **kwargs: Additional config (stored in self.config)
+        """
+        super().__init__(
+            api_key=api_key,
+            model=model,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            system_prompt=system_prompt,
+            **kwargs,
+        )
+        self.client = None
+    @property
+    def name(self) -> str:
+        """Return provider name."""
+        return "groq"
+    async def connect(self):
+        """Initialize Groq client."""
+        try:
+            self.client = Groq(api_key=self.api_key)
+            logger.debug("Groq client initialized")
+        except Exception as e:
+            logger.error(f"Failed to initialize Groq client: {e}")
+            raise
+    async def disconnect(self):
+        """Close Groq client connection."""
+        if self.client:
+            try:
+                # Groq client cleanup (if supported)
+                pass
+            except Exception as e:
+                logger.error(f"Error disconnecting Groq: {e}")
+    async def generate(self, prompt: str, context: Optional[ConversationContext] = None) -> str:
+        """
+        Generate a complete response to a prompt.
+        Args:
+            prompt: User's input text
+            context: Optional conversation history
+        Returns:
+            Complete response text
+        """
+        if not self.client:
+            await self.connect()
+        try:
+            # Build message list from context
+            messages = []
+            # Add system prompt
+            system = self.system_prompt or context.system_prompt if context else None
+            if system:
+                messages.append({"role": "system", "content": system})
+            # Add conversation history
+            if context:
+                for msg in context.get_messages_for_llm():
+                    if msg["role"] != "system":  # Avoid duplicate system prompt
+                        messages.append(msg)
+            # Add current prompt
+            messages.append({"role": "user", "content": prompt})
+            logger.debug(f"Generating response with {len(messages)} messages")
+            # Call Groq API
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+                stream=False,
+            )
+            # Extract text
+            if response.choices and response.choices[0].message:
+                text = response.choices[0].message.content
+                logger.debug(f"Generated response: {text[:100]}...")
+                return text
+            return ""
+        except Exception as e:
+            logger.error(f"Groq generation error: {e}")
+            raise
+    async def generate_stream(
+        self, prompt: str, context: Optional[ConversationContext] = None
+    ) -> AsyncIterator[ResponseChunk]:
+        """
+        Generate a streaming response to a prompt.
+        Yields text chunks as they are generated for real-time display.
+        Args:
+            prompt: User's input text
+            context: Optional conversation history
+        Yields:
+            ResponseChunk objects with partial and final text
+        """
+        if not self.client:
+            await self.connect()
+        try:
+            # Build message list from context
+            messages = []
+            # Add system prompt
+            system = self.system_prompt or context.system_prompt if context else None
+            if system:
+                messages.append({"role": "system", "content": system})
+            # Add conversation history
+            if context:
+                for msg in context.get_messages_for_llm():
+                    if msg["role"] != "system":  # Avoid duplicate system prompt
+                        messages.append(msg)
+            # Add current prompt
+            messages.append({"role": "user", "content": prompt})
+            logger.debug(f"Streaming response with {len(messages)} messages")
+            # Call Groq API with streaming
+            with self.client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+                stream=True,
+            ) as response:
+                full_text = ""
+                for chunk in response:
+                    if chunk.choices and chunk.choices[0].delta.content:
+                        delta = chunk.choices[0].delta.content
+                        full_text += delta
+                        # Check if this is the last chunk
+                        is_final = (
+                            chunk.choices[0].finish_reason is not None
+                            and chunk.choices[0].finish_reason != "length"
+                        )
+                        yield ResponseChunk(text=delta, is_final=is_final)
+                logger.debug(f"Streaming complete. Total: {full_text[:100]}...")
+        except Exception as e:
+            logger.error(f"Groq streaming error: {e}")
+            raise

audio_engine/pipelines/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Pipeline implementations for audio-engine."""

audio_engine/pipelines/personaplex/__init__.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""PersonaPlex speech-to-speech pipeline integration."""
+from .config import PersonaPlexConfig
+from .types import (
+    MessageType,
+    PersonaPlexMessage,
+    TranscriptMessage,
+    SessionData,
+    AudioChunk,
+    TextChunk,
+)
+from .client import PersonaPlexClient
+from .pipeline import PersonaPlexPipeline
+from .utils import (
+    generate_session_id,
+    get_timestamp_iso,
+    save_transcript,
+    load_transcript,
+    list_transcripts,
+    format_transcript_for_display,
+    cleanup_old_transcripts,
+)
+__all__ = [
+    "PersonaPlexConfig",
+    "PersonaPlexClient",
+    "PersonaPlexPipeline",
+    "MessageType",
+    "PersonaPlexMessage",
+    "TranscriptMessage",
+    "SessionData",
+    "AudioChunk",
+    "TextChunk",
+    "generate_session_id",
+    "get_timestamp_iso",
+    "save_transcript",
+    "load_transcript",
+    "list_transcripts",
+    "format_transcript_for_display",
+    "cleanup_old_transcripts",
+]

audio_engine/pipelines/personaplex/client.py ADDED Viewed

@@ -0,0 +1,259 @@
+"""Low-level WebSocket client for PersonaPlex."""
+import asyncio
+import logging
+from typing import AsyncIterator, Optional
+import websockets
+from websockets.asyncio.client import ClientConnection
+from .config import PersonaPlexConfig
+from .types import MessageType, PersonaPlexMessage, AudioChunk, TextChunk
+logger = logging.getLogger(__name__)
+class PersonaPlexClient:
+    """
+    WebSocket client for PersonaPlex speech-to-speech model.
+    Handles binary message encoding/decoding, Opus audio streaming,
+    and bidirectional text/audio communication.
+    Approach:
+    - Connect to WebSocket URL with query parameters (text_prompt, voice_prompt, etc.)
+    - Send Opus audio chunks with 0x01 prefix
+    - Receive Opus audio and text tokens asynchronously
+    - Handle connection lifecycle: connect, send, receive, disconnect
+    """
+    def __init__(self, config: PersonaPlexConfig):
+        """
+        Initialize PersonaPlex WebSocket client.
+        Args:
+            config: PersonaPlexConfig with server URL and model parameters
+        """
+        self.config = config
+        self.connection: Optional[ClientConnection] = None
+        self._is_connected = False
+    def _build_url(self, system_prompt: str) -> str:
+        """
+        Build WebSocket URL with query parameters.
+        Args:
+            system_prompt: Text prompt for controlling persona/behavior
+        Returns:
+            Full WebSocket URL with encoded parameters
+        """
+        url = self.config.server_url
+        params = {
+            "text_prompt": system_prompt,
+            "voice_prompt": self.config.voice_prompt,
+            "text_temperature": str(self.config.text_temperature),
+            "audio_temperature": str(self.config.audio_temperature),
+            "text_topk": str(self.config.text_topk),
+            "audio_topk": str(self.config.audio_topk),
+        }
+        # URL-encode and append parameters
+        param_str = "&".join(f"{k}={v}" for k, v in params.items())
+        return f"{url}?{param_str}"
+    async def connect(self, system_prompt: str) -> None:
+        """
+        Connect to PersonaPlex WebSocket server.
+        Args:
+            system_prompt: System prompt for persona control
+        Raises:
+            ConnectionError: If connection fails
+        """
+        if self._is_connected:
+            logger.warning("Already connected, skipping reconnect")
+            return
+        try:
+            url = self._build_url(system_prompt)
+            logger.debug(f"Connecting to PersonaPlex at {url}")
+            self.connection = await websockets.connect(
+                url,
+                ping_interval=30,  # Send ping every 30s to keep connection alive
+                ping_timeout=10,
+            )
+            self._is_connected = True
+            logger.info("Connected to PersonaPlex server")
+        except Exception as e:
+            logger.error(f"Failed to connect to PersonaPlex: {e}")
+            raise ConnectionError(f"PersonaPlex connection failed: {e}") from e
+    async def disconnect(self) -> None:
+        """Close WebSocket connection."""
+        if self.connection and self._is_connected:
+            try:
+                await self.connection.close()
+                logger.info("Disconnected from PersonaPlex server")
+            except Exception as e:
+                logger.error(f"Error closing connection: {e}")
+            finally:
+                self.connection = None
+                self._is_connected = False
+    async def send_audio(self, audio_chunk: bytes) -> None:
+        """
+        Send Opus-encoded audio chunk to server.
+        Args:
+            audio_chunk: Raw Opus-encoded audio bytes
+        Raises:
+            RuntimeError: If not connected
+        """
+        if not self._is_connected or not self.connection:
+            raise RuntimeError("Not connected to PersonaPlex server")
+        try:
+            # Message format: 0x01 (audio type) + Opus bytes
+            message = MessageType.AUDIO.value.to_bytes(1, "big") + audio_chunk
+            await self.connection.send(message)
+        except Exception as e:
+            logger.error(f"Failed to send audio: {e}")
+            raise
+    async def receive_audio(self) -> Optional[AudioChunk]:
+        """
+        Receive next audio chunk from server.
+        Returns:
+            AudioChunk with Opus data, or None if disconnected
+        Raises:
+            RuntimeError: If not connected
+        """
+        if not self._is_connected or not self.connection:
+            raise RuntimeError("Not connected to PersonaPlex server")
+        try:
+            message = await asyncio.wait_for(
+                self.connection.recv(),
+                timeout=self.config.session_timeout_seconds,
+            )
+            if isinstance(message, bytes):
+                parsed = PersonaPlexMessage.decode(message)
+                if parsed.type == MessageType.AUDIO:
+                    return AudioChunk(
+                        data=parsed.data,  # type: ignore
+                        sample_rate=self.config.sample_rate,
+                    )
+                elif parsed.type == MessageType.ERROR:
+                    error_msg = (
+                        parsed.data.decode("utf-8")
+                        if isinstance(parsed.data, bytes)
+                        else parsed.data
+                    )
+                    logger.error(f"Server error: {error_msg}")
+                    return None
+        except asyncio.TimeoutError:
+            logger.warning("Timeout waiting for audio from server")
+            return None
+        except Exception as e:
+            logger.error(f"Error receiving audio: {e}")
+            raise
+        return None
+    async def receive_text(self) -> Optional[TextChunk]:
+        """
+        Receive next text token from server.
+        Returns:
+            TextChunk with text data, or None if no text available
+        Raises:
+            RuntimeError: If not connected
+        """
+        if not self._is_connected or not self.connection:
+            raise RuntimeError("Not connected to PersonaPlex server")
+        try:
+            message = await asyncio.wait_for(
+                self.connection.recv(),
+                timeout=self.config.session_timeout_seconds,
+            )
+            if isinstance(message, bytes):
+                parsed = PersonaPlexMessage.decode(message)
+                if parsed.type == MessageType.TEXT:
+                    return TextChunk(text=parsed.data)  # type: ignore
+        except asyncio.TimeoutError:
+            logger.warning("Timeout waiting for text from server")
+            return None
+        except Exception as e:
+            logger.error(f"Error receiving text: {e}")
+            raise
+        return None
+    async def receive_any(self) -> Optional[PersonaPlexMessage]:
+        """
+        Receive next message of any type from server.
+        Returns:
+            PersonaPlexMessage, or None on timeout/error
+        """
+        if not self._is_connected or not self.connection:
+            raise RuntimeError("Not connected to PersonaPlex server")
+        try:
+            message = await asyncio.wait_for(
+                self.connection.recv(),
+                timeout=self.config.session_timeout_seconds,
+            )
+            if isinstance(message, bytes):
+                return PersonaPlexMessage.decode(message)
+        except asyncio.TimeoutError:
+            return None
+        except Exception as e:
+            logger.error(f"Error receiving message: {e}")
+            raise
+        return None
+    async def stream_messages(self) -> AsyncIterator[PersonaPlexMessage]:
+        """
+        Stream all messages from server until disconnection.
+        Yields:
+            PersonaPlexMessage objects as they arrive
+        """
+        if not self._is_connected or not self.connection:
+            raise RuntimeError("Not connected to PersonaPlex server")
+        try:
+            async for message in self.connection:
+                if isinstance(message, bytes):
+                    parsed = PersonaPlexMessage.decode(message)
+                    yield parsed
+        except Exception as e:
+            logger.error(f"Error in message stream: {e}")
+            raise
+    @property
+    def is_connected(self) -> bool:
+        """Check if currently connected."""
+        return self._is_connected and self.connection is not None
+    async def __aenter__(self) -> "PersonaPlexClient":
+        """Async context manager entry."""
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit."""
+        await self.disconnect()

audio_engine/pipelines/personaplex/config.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""Configuration for PersonaPlex speech-to-speech pipeline."""
+from dataclasses import dataclass, field
+from typing import Optional
+from pathlib import Path
+@dataclass
+class PersonaPlexConfig:
+    """
+    Configuration for PersonaPlex full-duplex speech-to-speech model.
+    PersonaPlex is a real-time, full-duplex conversational speech model
+    that handles audio input/output simultaneously with optional text streaming.
+    Attributes:
+        server_url: WebSocket URL to PersonaPlex server (default: official RunPod deployment)
+        voice_prompt: Voice preset name (e.g., "NATF0.pt", "NATM1.pt")
+                     See: https://github.com/NVIDIA/personaplex#voices
+        text_prompt: System prompt for controlling persona/behavior
+        text_temperature: LLM temperature for text generation (0.0-2.0)
+        audio_temperature: Audio codec temperature for naturalness (0.0-2.0)
+        text_topk: Top-K sampling for text tokens
+        audio_topk: Top-K sampling for audio tokens
+        sample_rate: Audio sample rate in Hz (Opus default: 48000)
+        save_transcripts: Whether to save session transcripts to disk
+        transcript_path: Directory to save transcripts
+        session_timeout_seconds: Max seconds to wait before closing idle connection
+    """
+    server_url: str = "wss://cl9unux255nnzf-8998.proxy.runpod.net"
+    voice_prompt: str = "NATF0.pt"
+    text_prompt: str = "You are a helpful AI assistant. Have a natural conversation."
+    text_temperature: float = 0.7
+    audio_temperature: float = 0.8
+    text_topk: int = 25
+    audio_topk: int = 250
+    sample_rate: int = 48000
+    save_transcripts: bool = True
+    transcript_path: str = "./transcripts/"
+    session_timeout_seconds: float = 300.0
+    extra: dict = field(default_factory=dict)
+    def __post_init__(self):
+        """Validate configuration after initialization."""
+        if self.text_temperature < 0.0 or self.text_temperature > 2.0:
+            raise ValueError("text_temperature must be between 0.0 and 2.0")
+        if self.audio_temperature < 0.0 or self.audio_temperature > 2.0:
+            raise ValueError("audio_temperature must be between 0.0 and 2.0")
+        if self.text_topk < 1:
+            raise ValueError("text_topk must be >= 1")
+        if self.audio_topk < 1:
+            raise ValueError("audio_topk must be >= 1")
+        if self.sample_rate not in (48000, 24000, 16000):
+            raise ValueError("sample_rate must be 48000, 24000, or 16000")
+        # Create transcript directory if save_transcripts is enabled
+        if self.save_transcripts:
+            Path(self.transcript_path).mkdir(parents=True, exist_ok=True)
+    @classmethod
+    def default(cls) -> "PersonaPlexConfig":
+        """Get default configuration."""
+        return cls()
+    @classmethod
+    def from_dict(cls, data: dict) -> "PersonaPlexConfig":
+        """Create config from dictionary."""
+        return cls(**data)

atom-audio-engine 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

atom-audio-engine 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl