PyPI - finchvox - Versions diffs - 0.0.1__py3-none-any.whl - Mend

finchvox 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

finchvox/__init__.py +0 -0
finchvox/__main__.py +81 -0
finchvox/audio_recorder.py +278 -0
finchvox/audio_utils.py +123 -0
finchvox/cli.py +127 -0
finchvox/collector/__init__.py +0 -0
finchvox/collector/__main__.py +22 -0
finchvox/collector/audio_handler.py +146 -0
finchvox/collector/collector_routes.py +186 -0
finchvox/collector/config.py +64 -0
finchvox/collector/server.py +126 -0
finchvox/collector/service.py +43 -0
finchvox/collector/writer.py +86 -0
finchvox/server.py +201 -0
finchvox/trace.py +115 -0
finchvox/ui/css/app.css +774 -0
finchvox/ui/images/favicon.ico +0 -0
finchvox/ui/images/finchvox-logo.png +0 -0
finchvox/ui/js/time-utils.js +97 -0
finchvox/ui/js/trace_detail.js +1228 -0
finchvox/ui/js/traces_list.js +26 -0
finchvox/ui/lib/alpine.min.js +5 -0
finchvox/ui/lib/wavesurfer.min.js +1 -0
finchvox/ui/trace_detail.html +313 -0
finchvox/ui/traces_list.html +63 -0
finchvox/ui_routes.py +362 -0
finchvox-0.0.1.dist-info/METADATA +189 -0
finchvox-0.0.1.dist-info/RECORD +31 -0
finchvox-0.0.1.dist-info/WHEEL +4 -0
finchvox-0.0.1.dist-info/entry_points.txt +2 -0
finchvox-0.0.1.dist-info/licenses/LICENSE +24 -0

finchvox/__init__.py ADDED Viewed

File without changes

finchvox/__main__.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""
+FinchVox entry point - starts unified server with collector and UI.
+Usage:
+    python -m finchvox                    # Start with default port 3000
+    python -m finchvox --port 8000        # Start with custom port
+    python -m finchvox --help             # Show options
+"""
+import argparse
+from pathlib import Path
+from finchvox.server import UnifiedServer
+from finchvox.collector.config import GRPC_PORT, get_default_data_dir
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="FinchVox unified server for voice AI observability",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python -m finchvox                           # Start with default ports
+  python -m finchvox --port 8000               # Use custom HTTP port
+  python -m finchvox --grpc-port 4318          # Use custom gRPC port
+  python -m finchvox --data-dir ./my-data      # Use custom data directory
+        """
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=3000,
+        help="HTTP server port (default: 3000)"
+    )
+    parser.add_argument(
+        "--grpc-port",
+        type=int,
+        default=GRPC_PORT,
+        help=f"gRPC server port (default: {GRPC_PORT})"
+    )
+    parser.add_argument(
+        "--host",
+        type=str,
+        default="0.0.0.0",
+        help="Host to bind to (default: 0.0.0.0)"
+    )
+    parser.add_argument(
+        "--data-dir",
+        type=str,
+        default=None,
+        help="Data directory for traces/logs/audio/exceptions (default: ~/.finchvox)"
+    )
+    args = parser.parse_args()
+    # Resolve data directory
+    if args.data_dir:
+        data_dir = Path(args.data_dir).expanduser().resolve()
+    else:
+        data_dir = get_default_data_dir()
+    print("Starting FinchVox Unified Server...")
+    print("=" * 50)
+    print(f"HTTP Server:  http://{args.host}:{args.port}")
+    print(f"  - UI:       http://{args.host}:{args.port}")
+    print(f"  - Collector: http://{args.host}:{args.port}/collector")
+    print(f"gRPC Server:  {args.host}:{args.grpc_port}")
+    print(f"Data Directory: {data_dir}")
+    print("=" * 50)
+    server = UnifiedServer(
+        port=args.port,
+        grpc_port=args.grpc_port,
+        host=args.host,
+        data_dir=data_dir
+    )
+    server.run()
+if __name__ == "__main__":
+    main()

finchvox/audio_recorder.py ADDED Viewed

@@ -0,0 +1,278 @@
+"""
+Audio recording with chunked stereo capture and timing metadata.
+Records conversation audio with:
+- Stereo format: user audio on left channel, bot audio on right channel
+- Chunked recording every 5-10 seconds for continuous streaming
+- Timing events for latency calculation
+- Association with OpenTelemetry trace IDs
+- Direct upload to configured endpoint (no local file storage)
+"""
+import asyncio
+import io
+import json
+import wave
+from datetime import datetime
+from typing import Optional
+import aiohttp
+from loguru import logger
+from opentelemetry import trace
+from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
+# Add Pipecat's conversation context provider
+from pipecat.utils.tracing.conversation_context_provider import (
+    ConversationContextProvider
+)
+class ConversationAudioRecorder:
+    """
+    Records conversation audio with timing metadata for latency analysis.
+    Audio chunks are uploaded directly to a configured endpoint with no local file storage.
+    """
+    def __init__(
+        self,
+        chunk_duration_seconds: int = 5,
+        sample_rate: int = 16000,
+        endpoint: Optional[str] = "http://localhost:3000",
+    ):
+        """
+        Initialize audio recorder.
+        Args:
+            chunk_duration_seconds: Duration of each audio chunk (5-10 seconds recommended)
+            sample_rate: Audio sample rate (default 16kHz)
+            endpoint: URL of finchvox HTTP server (default: "http://localhost:3000")
+                     If None, recording will fail when started
+        """
+        self.chunk_duration = chunk_duration_seconds
+        self.sample_rate = sample_rate
+        self.endpoint = endpoint
+        # Create AudioBufferProcessor with stereo configuration
+        self.audio_buffer = AudioBufferProcessor(
+            sample_rate=self.sample_rate,  # Explicit sample rate (16000 Hz)
+            num_channels=2,                # Stereo: user left, bot right
+            buffer_size=320000,            # ~10 seconds at 16kHz, 16-bit
+            enable_turn_audio=False,       # Continuous recording, not per-turn
+        )
+        # Timing events for latency calculation
+        self.timing_events = []
+        self.current_trace_id: Optional[str] = None
+        self.conversation_start_time: Optional[datetime] = None
+        self.chunk_counter = 0
+        self._setup_event_handlers()
+    def _setup_event_handlers(self):
+        """Set up audio buffer event handlers for chunked recording."""
+        @self.audio_buffer.event_handler("on_audio_data")
+        async def on_audio_data(buffer, audio, sample_rate, num_channels):
+            """Handle audio data chunks (called every chunk_duration seconds)."""
+            try:
+                # Get trace ID from Pipecat's conversation context provider
+                trace_id = None
+                # First, try to get trace_id from active conversation span
+                context_provider = ConversationContextProvider.get_instance()
+                conversation_context = context_provider.get_current_conversation_context()
+                if conversation_context:
+                    # Extract span context from conversation context
+                    span = trace.get_current_span(conversation_context)
+                    span_context = span.get_span_context()
+                    if span_context.trace_id != 0:
+                        trace_id = format(span_context.trace_id, "032x")
+                # Fallback to manually set trace_id (for backwards compatibility)
+                if not trace_id:
+                    trace_id = self.current_trace_id or "no_trace"
+                # Prepare metadata
+                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+                metadata = {
+                    "trace_id": trace_id,
+                    "chunk_number": self.chunk_counter,
+                    "timestamp": timestamp,
+                    "sample_rate": sample_rate,
+                    "num_channels": num_channels,
+                    "channels": {
+                        "0": "user",
+                        "1": "bot"
+                    },
+                    "timing_events": self.timing_events,
+                    "conversation_start": (
+                        self.conversation_start_time.isoformat()
+                        if self.conversation_start_time
+                        else None
+                    ),
+                }
+                # Upload to endpoint
+                upload_success = await self.upload_chunk(
+                    trace_id=trace_id,
+                    chunk_number=self.chunk_counter,
+                    audio_data=audio,
+                    metadata=metadata
+                )
+                if upload_success:
+                    logger.info(
+                        f"Uploaded audio chunk {self.chunk_counter} for trace {trace_id[:8]}... "
+                        f"({len(self.timing_events)} timing events)"
+                    )
+                else:
+                    logger.error(
+                        f"Failed to upload chunk {self.chunk_counter} for trace {trace_id[:8]}..."
+                    )
+                self.chunk_counter += 1
+                # Clear old timing events from previous chunks (keep recent for context)
+                if len(self.timing_events) > 100:
+                    self.timing_events = self.timing_events[-50:]
+            except Exception as e:
+                logger.error(f"Failed to process audio chunk: {e}", exc_info=True)
+    async def upload_chunk(
+        self,
+        trace_id: str,
+        chunk_number: int,
+        audio_data: bytes,
+        metadata: dict
+    ) -> bool:
+        """
+        Upload audio chunk to endpoint via HTTP POST.
+        Args:
+            trace_id: OpenTelemetry trace ID
+            chunk_number: Sequential chunk number
+            audio_data: Raw audio bytes (WAV format)
+            metadata: Metadata dictionary
+        Returns:
+            True if upload succeeded, False otherwise
+        """
+        try:
+            url = f"{self.endpoint}/collector/audio/{trace_id}/chunk"
+            # Create WAV file in memory
+            wav_buffer = io.BytesIO()
+            with wave.open(wav_buffer, 'wb') as wav_file:
+                wav_file.setnchannels(metadata['num_channels'])
+                wav_file.setsampwidth(2)  # 16-bit audio
+                wav_file.setframerate(metadata['sample_rate'])
+                wav_file.writeframes(audio_data)
+            wav_buffer.seek(0)
+            async with aiohttp.ClientSession() as session:
+                form = aiohttp.FormData()
+                form.add_field(
+                    'audio',
+                    wav_buffer,
+                    filename=f"chunk_{chunk_number:04d}.wav",
+                    content_type='audio/wav'
+                )
+                form.add_field(
+                    'metadata',
+                    json.dumps(metadata),
+                    content_type='application/json'
+                )
+                # Upload with timeout
+                async with session.post(
+                    url,
+                    data=form,
+                    timeout=aiohttp.ClientTimeout(total=30)
+                ) as response:
+                    if response.status == 201:
+                        result = await response.json()
+                        logger.debug(
+                            f"Uploaded chunk {chunk_number} for trace {trace_id[:8]}... "
+                            f"to endpoint: {result.get('file_path')}"
+                        )
+                        return True
+                    else:
+                        error_text = await response.text()
+                        logger.error(
+                            f"Failed to upload chunk {chunk_number}: "
+                            f"HTTP {response.status}: {error_text}"
+                        )
+                        return False
+        except asyncio.TimeoutError:
+            logger.error(f"Timeout uploading chunk {chunk_number} to endpoint")
+            return False
+        except Exception as e:
+            logger.error(
+                f"Error uploading chunk {chunk_number} to endpoint: {e}",
+                exc_info=True
+            )
+            return False
+    async def start_recording(self, trace_id: Optional[str] = None):
+        """
+        Start recording audio for a conversation.
+        Args:
+            trace_id: Optional trace ID hint. If not provided or unavailable,
+                      the recorder will automatically extract trace_id from
+                      the active conversation span during chunk capture.
+        Raises:
+            ValueError: If endpoint is not configured
+        """
+        if not self.endpoint:
+            raise ValueError(
+                "Cannot start recording: endpoint is not configured. "
+                "Provide an endpoint URL when initializing ConversationAudioRecorder."
+            )
+        self.current_trace_id = trace_id
+        self.conversation_start_time = datetime.now()
+        self.chunk_counter = 0
+        self.timing_events = []
+        await self.audio_buffer.start_recording()
+        logger.info(f"Started audio recording for trace {trace_id}")
+    async def stop_recording(self):
+        """Stop recording audio."""
+        await self.audio_buffer.stop_recording()
+        logger.info(
+            f"Stopped audio recording. Captured {self.chunk_counter} chunks "
+            f"with {len(self.timing_events)} timing events"
+        )
+    def add_timing_event(self, event_type: str, metadata: dict = None):
+        """
+        Add a timing event for latency calculation.
+        Args:
+            event_type: Type of event (e.g., 'user_stopped', 'bot_started', 'bot_stopped')
+            metadata: Additional metadata for the event
+        """
+        event = {
+            "type": event_type,
+            "timestamp": datetime.now().isoformat(),
+            "relative_time": (
+                (datetime.now() - self.conversation_start_time).total_seconds()
+                if self.conversation_start_time
+                else 0
+            ),
+            "metadata": metadata or {},
+        }
+        self.timing_events.append(event)
+        logger.debug(f"Timing event: {event_type}")
+    def get_processor(self) -> AudioBufferProcessor:
+        """Get the AudioBufferProcessor to add to pipeline."""
+        return self.audio_buffer

finchvox/audio_utils.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""
+Audio utilities for FinchVox trace viewer.
+This module provides functions for finding and combining audio chunks
+from voice conversation traces.
+"""
+import wave
+from pathlib import Path
+from typing import List, Tuple
+from loguru import logger
+def find_chunks(audio_dir: Path, trace_id: str) -> List[Tuple[int, Path]]:
+    """
+    Find all audio chunks for a given trace_id.
+    Args:
+        audio_dir: Directory containing audio chunks (can be old or new structure)
+        trace_id: Trace ID to search for
+    Returns:
+        List of (chunk_number, chunk_path) tuples, sorted by chunk number
+    """
+    chunks = []
+    # New structure: traces/{trace_id}/audio/chunk_XXXX.wav
+    new_structure_dir = audio_dir / trace_id / "audio"
+    if new_structure_dir.exists():
+        for chunk_file in new_structure_dir.glob("chunk_*.wav"):
+            # Extract chunk number from filename: chunk_0001.wav -> 1
+            try:
+                chunk_num = int(chunk_file.stem.split("_")[1])
+                chunks.append((chunk_num, chunk_file))
+            except (IndexError, ValueError) as e:
+                logger.warning(f"Could not parse chunk number from {chunk_file}: {e}")
+    # Old structure (for backward compatibility): audio/{trace_id}/chunk_XXXX.wav
+    old_structure_dir = audio_dir / trace_id
+    if old_structure_dir.exists() and old_structure_dir != new_structure_dir.parent:
+        for chunk_file in old_structure_dir.glob("chunk_*.wav"):
+            # Extract chunk number from filename: chunk_0001.wav -> 1
+            try:
+                chunk_num = int(chunk_file.stem.split("_")[1])
+                chunks.append((chunk_num, chunk_file))
+            except (IndexError, ValueError) as e:
+                logger.warning(f"Could not parse chunk number from {chunk_file}: {e}")
+    # Also check local fallback format: audio_{trace_id}_..._chunkXXXX.wav
+    if audio_dir.exists():
+        for chunk_file in audio_dir.glob(f"audio_{trace_id}*_chunk*.wav"):
+            try:
+                # Extract chunk number from filename
+                chunk_part = chunk_file.stem.split("_chunk")[1]
+                chunk_num = int(chunk_part)
+                chunks.append((chunk_num, chunk_file))
+            except (IndexError, ValueError) as e:
+                logger.warning(f"Could not parse chunk number from {chunk_file}: {e}")
+    # Sort by chunk number and remove duplicates
+    chunks = list(set(chunks))
+    chunks.sort(key=lambda x: x[0])
+    return chunks
+def combine_chunks(chunks: List[Tuple[int, Path]], output_file: Path) -> None:
+    """
+    Combine audio chunks into a single WAV file.
+    Args:
+        chunks: List of (chunk_number, chunk_path) tuples
+        output_file: Path to write combined WAV file
+    """
+    if not chunks:
+        logger.error("No chunks to combine")
+        return
+    # Get audio parameters from first chunk
+    first_chunk = chunks[0][1]
+    with wave.open(str(first_chunk), "rb") as wf:
+        sample_rate = wf.getframerate()
+        num_channels = wf.getnchannels()
+        sample_width = wf.getsampwidth()
+    logger.info(
+        f"Combining {len(chunks)} chunks: "
+        f"{sample_rate}Hz, {num_channels} channels, {sample_width*8}-bit"
+    )
+    # Open output file
+    with wave.open(str(output_file), "wb") as out_wf:
+        out_wf.setnchannels(num_channels)
+        out_wf.setsampwidth(sample_width)
+        out_wf.setframerate(sample_rate)
+        # Append each chunk
+        total_frames = 0
+        for chunk_num, chunk_path in chunks:
+            logger.debug(f"Adding chunk {chunk_num}: {chunk_path.name}")
+            with wave.open(str(chunk_path), "rb") as in_wf:
+                # Verify parameters match
+                if (
+                    in_wf.getframerate() != sample_rate
+                    or in_wf.getnchannels() != num_channels
+                    or in_wf.getsampwidth() != sample_width
+                ):
+                    logger.warning(
+                        f"Chunk {chunk_num} has different audio parameters, skipping"
+                    )
+                    continue
+                # Read and write all frames
+                frames = in_wf.readframes(in_wf.getnframes())
+                out_wf.writeframes(frames)
+                total_frames += in_wf.getnframes()
+        duration_seconds = total_frames / sample_rate
+        logger.info(
+            f"Combined {len(chunks)} chunks into {output_file.name} "
+            f"({duration_seconds:.1f} seconds)"
+        )

finchvox/cli.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""
+FinchVox CLI - Command-line interface for the finchvox package.
+Provides subcommands:
+- finchvox start: Start the unified server
+- finchvox version: Display version information
+"""
+import argparse
+import sys
+from pathlib import Path
+from finchvox.server import UnifiedServer
+from finchvox.collector.config import GRPC_PORT
+def get_version() -> str:
+    """Get the package version."""
+    # Read version from pyproject.toml or package metadata
+    try:
+        from importlib.metadata import version
+        return version("finchvox")
+    except Exception:
+        return "0.0.1"  # Fallback version
+def cmd_version(args):
+    """Handle the 'version' subcommand."""
+    print(f"finchvox version {get_version()}")
+    print(f"Python {sys.version}")
+def cmd_start(args):
+    """Handle the 'start' subcommand."""
+    # Resolve data directory
+    if args.data_dir:
+        data_dir = Path(args.data_dir).expanduser().resolve()
+    else:
+        data_dir = Path.home() / ".finchvox"
+    print("Starting FinchVox Unified Server...")
+    print("=" * 50)
+    print(f"HTTP Server:  http://{args.host}:{args.port}")
+    print(f"  - UI:       http://{args.host}:{args.port}")
+    print(f"  - Collector: http://{args.host}:{args.port}/collector")
+    print(f"gRPC Server:  {args.host}:{args.grpc_port}")
+    print(f"Data Directory: {data_dir}")
+    print("=" * 50)
+    server = UnifiedServer(
+        port=args.port,
+        grpc_port=args.grpc_port,
+        host=args.host,
+        data_dir=data_dir
+    )
+    server.run()
+def main():
+    """Main CLI entry point."""
+    parser = argparse.ArgumentParser(
+        prog="finchvox",
+        description="FinchVox - Voice AI observability dev tool for Pipecat",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    subparsers = parser.add_subparsers(
+        title="commands",
+        description="Available commands",
+        dest="command",
+        required=True
+    )
+    # 'start' subcommand
+    start_parser = subparsers.add_parser(
+        "start",
+        help="Start the unified server",
+        description="Start the FinchVox unified server (gRPC + HTTP)",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  finchvox start                           # Start with defaults
+  finchvox start --port 8000               # Custom HTTP port
+  finchvox start --grpc-port 4318          # Custom gRPC port
+  finchvox start --data-dir ./my-data      # Custom data directory
+        """
+    )
+    start_parser.add_argument(
+        "--port",
+        type=int,
+        default=3000,
+        help="HTTP server port (default: 3000)"
+    )
+    start_parser.add_argument(
+        "--grpc-port",
+        type=int,
+        default=GRPC_PORT,
+        help=f"gRPC server port (default: {GRPC_PORT})"
+    )
+    start_parser.add_argument(
+        "--host",
+        type=str,
+        default="0.0.0.0",
+        help="Host to bind to (default: 0.0.0.0)"
+    )
+    start_parser.add_argument(
+        "--data-dir",
+        type=str,
+        default=None,
+        help="Data directory for traces/logs/audio/exceptions (default: ~/.finchvox)"
+    )
+    start_parser.set_defaults(func=cmd_start)
+    # 'version' subcommand
+    version_parser = subparsers.add_parser(
+        "version",
+        help="Display version information",
+        description="Display FinchVox version and Python version"
+    )
+    version_parser.set_defaults(func=cmd_version)
+    # Parse arguments and dispatch to handler
+    args = parser.parse_args()
+    args.func(args)
+if __name__ == "__main__":
+    main()

finchvox/collector/__init__.py ADDED Viewed

File without changes

finchvox/collector/__main__.py ADDED Viewed

@@ -0,0 +1,22 @@
+import sys
+from loguru import logger
+from .server import run_server
+from .config import LOG_LEVEL
+def main():
+    """Main entry point for the OTLP collector."""
+    # Configure loguru
+    logger.remove()  # Remove default handler
+    logger.add(
+        sink=sys.stderr,
+        level=LOG_LEVEL,
+        format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan> - <level>{message}</level>"
+    )
+    logger.info("Starting FinchVox OTLP Collector")
+    run_server()
+if __name__ == "__main__":
+    main()