PyPI - finchvox - Versions diffs - 0.0.1__py3-none-any.whl - Mend

finchvox 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

finchvox/__init__.py +0 -0
finchvox/__main__.py +81 -0
finchvox/audio_recorder.py +278 -0
finchvox/audio_utils.py +123 -0
finchvox/cli.py +127 -0
finchvox/collector/__init__.py +0 -0
finchvox/collector/__main__.py +22 -0
finchvox/collector/audio_handler.py +146 -0
finchvox/collector/collector_routes.py +186 -0
finchvox/collector/config.py +64 -0
finchvox/collector/server.py +126 -0
finchvox/collector/service.py +43 -0
finchvox/collector/writer.py +86 -0
finchvox/server.py +201 -0
finchvox/trace.py +115 -0
finchvox/ui/css/app.css +774 -0
finchvox/ui/images/favicon.ico +0 -0
finchvox/ui/images/finchvox-logo.png +0 -0
finchvox/ui/js/time-utils.js +97 -0
finchvox/ui/js/trace_detail.js +1228 -0
finchvox/ui/js/traces_list.js +26 -0
finchvox/ui/lib/alpine.min.js +5 -0
finchvox/ui/lib/wavesurfer.min.js +1 -0
finchvox/ui/trace_detail.html +313 -0
finchvox/ui/traces_list.html +63 -0
finchvox/ui_routes.py +362 -0
finchvox-0.0.1.dist-info/METADATA +189 -0
finchvox-0.0.1.dist-info/RECORD +31 -0
finchvox-0.0.1.dist-info/WHEEL +4 -0
finchvox-0.0.1.dist-info/entry_points.txt +2 -0
finchvox-0.0.1.dist-info/licenses/LICENSE +24 -0

finchvox/collector/audio_handler.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""
+Audio handler for storing conversation audio chunks.
+This module provides the AudioHandler class which manages the storage of
+audio chunks and their associated metadata, organized by trace ID.
+"""
+import json
+from pathlib import Path
+from typing import Optional
+import aiofiles
+from loguru import logger
+from finchvox.collector.config import get_trace_audio_dir
+class AudioHandler:
+    """Handles writing audio chunks to organized directory structure."""
+    def __init__(self, data_dir: Path):
+        """
+        Initialize audio handler.
+        Args:
+            data_dir: Base data directory (e.g., ~/.finchvox)
+        """
+        self.data_dir = data_dir
+        logger.info(f"AudioHandler initialized with data_dir: {self.data_dir}")
+    async def save_audio_chunk(
+        self,
+        trace_id: str,
+        chunk_number: int,
+        audio_data: bytes,
+        metadata: dict,
+    ) -> Optional[Path]:
+        """
+        Save audio chunk with metadata.
+        Directory structure:
+        data_dir/
+          traces/
+            {trace_id}/
+              audio/
+                chunk_0000.wav
+                chunk_0000.json  # metadata
+                chunk_0001.wav
+                chunk_0001.json
+        Args:
+            trace_id: OpenTelemetry trace ID (32 hex chars)
+            chunk_number: Sequential chunk number
+            audio_data: Raw audio file bytes
+            metadata: Dictionary containing chunk metadata
+        Returns:
+            Path to saved audio file, or None if save failed
+        """
+        try:
+            # Validate trace_id format
+            if not self._is_valid_trace_id(trace_id):
+                logger.error(f"Invalid trace_id format: {trace_id}")
+                return None
+            # Create trace-specific audio directory
+            trace_audio_dir = get_trace_audio_dir(self.data_dir, trace_id)
+            trace_audio_dir.mkdir(exist_ok=True, parents=True)
+            # Generate filenames with zero-padded chunk number
+            audio_file = trace_audio_dir / f"chunk_{chunk_number:04d}.wav"
+            metadata_file = trace_audio_dir / f"chunk_{chunk_number:04d}.json"
+            # Save audio file (async to avoid blocking)
+            async with aiofiles.open(audio_file, "wb") as f:
+                await f.write(audio_data)
+            # Save metadata
+            async with aiofiles.open(metadata_file, "w") as f:
+                await f.write(json.dumps(metadata, indent=2))
+            logger.info(
+                f"Saved audio chunk {chunk_number} for trace {trace_id[:8]}... "
+                f"({len(audio_data)} bytes)"
+            )
+            return audio_file
+        except OSError as e:
+            # Disk full, permission denied, etc.
+            logger.error(
+                f"Failed to save audio chunk {chunk_number} for trace {trace_id[:8]}...: {e}",
+                exc_info=True,
+            )
+            return None
+        except Exception as e:
+            logger.error(
+                f"Unexpected error saving audio chunk {chunk_number} "
+                f"for trace {trace_id[:8]}...: {e}",
+                exc_info=True,
+            )
+            return None
+    def _is_valid_trace_id(self, trace_id: str) -> bool:
+        """
+        Validate trace_id format.
+        Args:
+            trace_id: Trace ID to validate
+        Returns:
+            True if valid (32 hex chars), False otherwise
+        """
+        if len(trace_id) != 32:
+            return False
+        try:
+            int(trace_id, 16)  # Validate it's hexadecimal
+            return True
+        except ValueError:
+            return False
+    def get_trace_audio_dir(self, trace_id: str) -> Path:
+        """
+        Get directory path for a trace's audio files.
+        Args:
+            trace_id: OpenTelemetry trace ID
+        Returns:
+            Path to trace-specific audio directory
+        """
+        return get_trace_audio_dir(self.data_dir, trace_id)
+    def list_chunks(self, trace_id: str) -> list[Path]:
+        """
+        List all audio chunks for a trace.
+        Args:
+            trace_id: OpenTelemetry trace ID
+        Returns:
+            Sorted list of audio file paths for the trace
+        """
+        trace_dir = self.get_trace_audio_dir(trace_id)
+        if not trace_dir.exists():
+            return []
+        return sorted(trace_dir.glob("chunk_*.wav"))

finchvox/collector/collector_routes.py ADDED Viewed

@@ -0,0 +1,186 @@
+"""
+Collector routes for audio chunk uploads, logs, and exceptions.
+This module provides route registration functions for the collector endpoints,
+which handle data ingestion from Pipecat applications.
+"""
+import json
+from pathlib import Path
+from fastapi import FastAPI, File, Form, HTTPException, UploadFile, status
+from fastapi.responses import JSONResponse
+from loguru import logger
+from .audio_handler import AudioHandler
+from .config import ALLOWED_AUDIO_FORMATS, MAX_AUDIO_FILE_SIZE
+def register_collector_routes(
+    app: FastAPI,
+    audio_handler: AudioHandler,
+    prefix: str = "/collector"
+):
+    """
+    Register collector routes on an existing FastAPI app with URL prefix.
+    Args:
+        app: Existing FastAPI application to register routes on
+        audio_handler: AudioHandler instance for managing audio storage
+        prefix: URL prefix for all collector routes (default: "/collector")
+    """
+    @app.post(f"{prefix}/audio/{{trace_id}}/chunk")
+    async def upload_audio_chunk(
+        trace_id: str,
+        audio: UploadFile = File(..., description="Audio file (WAV format)"),
+        metadata: str = Form(..., description="JSON metadata string"),
+    ):
+        """
+        Upload audio chunk for a trace.
+        Args:
+            trace_id: Hex string trace ID (32 chars)
+            audio: Audio file (WAV format)
+            metadata: JSON string with:
+                - chunk_number: int
+                - timestamp: ISO format string
+                - sample_rate: int
+                - num_channels: int
+                - timing_events: list[dict] (optional)
+        Returns:
+            JSON response with storage path and status
+        Raises:
+            HTTPException: For validation errors or server errors
+        """
+        try:
+            # Validate trace_id format
+            if len(trace_id) != 32 or not all(
+                c in "0123456789abcdef" for c in trace_id
+            ):
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                    detail=f"Invalid trace_id format: must be 32 hex chars, got {trace_id}",
+                )
+            # Parse metadata
+            try:
+                metadata_dict = json.loads(metadata)
+            except json.JSONDecodeError as e:
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                    detail=f"Invalid JSON metadata: {e}",
+                )
+            # Validate required metadata fields
+            required_fields = ["chunk_number", "timestamp", "sample_rate", "num_channels"]
+            missing = [f for f in required_fields if f not in metadata_dict]
+            if missing:
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                    detail=f"Missing required metadata fields: {missing}",
+                )
+            # Read audio data
+            audio_data = await audio.read()
+            # Validate file size
+            if len(audio_data) > MAX_AUDIO_FILE_SIZE:
+                raise HTTPException(
+                    status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
+                    detail=f"Audio file too large: {len(audio_data)} bytes (max {MAX_AUDIO_FILE_SIZE})",
+                )
+            # Validate file format (basic check)
+            file_ext = Path(audio.filename or "unknown.wav").suffix.lower()
+            if file_ext not in ALLOWED_AUDIO_FORMATS:
+                raise HTTPException(
+                    status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
+                    detail=f"Unsupported audio format: {file_ext} (allowed: {ALLOWED_AUDIO_FORMATS})",
+                )
+            # Check if this is a new trace or existing trace (for logging)
+            existing_chunks = audio_handler.list_chunks(trace_id)
+            is_new_trace = len(existing_chunks) == 0
+            if is_new_trace:
+                logger.info(f"New audio trace {trace_id[:8]}... - receiving chunk #{metadata_dict['chunk_number']}")
+            else:
+                logger.info(f"Audio trace {trace_id[:8]}... - receiving chunk #{metadata_dict['chunk_number']} (total: {len(existing_chunks) + 1})")
+            # Save audio chunk
+            saved_path = await audio_handler.save_audio_chunk(
+                trace_id=trace_id,
+                chunk_number=metadata_dict["chunk_number"],
+                audio_data=audio_data,
+                metadata=metadata_dict,
+            )
+            if saved_path is None:
+                raise HTTPException(
+                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                    detail="Failed to save audio chunk",
+                )
+            return JSONResponse(
+                status_code=status.HTTP_201_CREATED,
+                content={
+                    "status": "success",
+                    "trace_id": trace_id,
+                    "chunk_number": metadata_dict["chunk_number"],
+                    "file_path": str(saved_path),
+                    "size_bytes": len(audio_data),
+                },
+            )
+        except HTTPException:
+            raise
+        except Exception as e:
+            logger.error(
+                f"Failed to process audio upload for trace {trace_id}: {e}",
+                exc_info=True,
+            )
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="Internal server error processing audio upload",
+            )
+    @app.get(f"{prefix}/health")
+    async def health_check():
+        """
+        Health check endpoint for monitoring.
+        Returns:
+            Status information
+        """
+        return {"status": "healthy", "service": "finchvox-collector"}
+    @app.get(f"{prefix}/audio/{{trace_id}}/chunks")
+    async def list_audio_chunks(trace_id: str):
+        """
+        List all audio chunks for a trace.
+        Args:
+            trace_id: OpenTelemetry trace ID
+        Returns:
+            JSON with list of chunks
+        Raises:
+            HTTPException: If listing fails
+        """
+        try:
+            chunks = audio_handler.list_chunks(trace_id)
+            return {
+                "trace_id": trace_id,
+                "chunk_count": len(chunks),
+                "chunks": [{"path": str(p), "name": p.name} for p in chunks],
+            }
+        except Exception as e:
+            logger.error(f"Failed to list chunks for trace {trace_id}: {e}")
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="Failed to list audio chunks",
+            )

finchvox/collector/config.py ADDED Viewed

@@ -0,0 +1,64 @@
+from pathlib import Path
+# Server configuration
+GRPC_PORT = 4317  # Standard OTLP gRPC port
+HTTP_PORT = 3000  # Unified HTTP server (collector + UI)
+MAX_WORKERS = 10  # Thread pool size for concurrent requests
+# Logging configuration
+LOG_LEVEL = "INFO"  # Can be overridden via LOGURU_LEVEL env var
+# Audio upload configuration
+MAX_AUDIO_FILE_SIZE = 10 * 1024 * 1024  # 10MB max per chunk
+ALLOWED_AUDIO_FORMATS = {".wav", ".mp3", ".ogg", ".flac"}
+# Log batching configuration
+MAX_LOG_BATCH_SIZE = 100  # Max logs per HTTP request
+LOG_FLUSH_INTERVAL = 5.0  # Seconds between batched uploads
+def get_default_data_dir() -> Path:
+    """Get the default data directory (~/.finchvox)."""
+    return Path.home() / ".finchvox"
+def get_traces_base_dir(data_dir: Path) -> Path:
+    """
+    Get the base traces directory.
+    Args:
+        data_dir: Base data directory (e.g., ~/.finchvox)
+    Returns:
+        Path to traces directory (e.g., ~/.finchvox/traces)
+    """
+    return data_dir / "traces"
+def get_trace_dir(data_dir: Path, trace_id: str) -> Path:
+    """
+    Get the directory for a specific trace.
+    Args:
+        data_dir: Base data directory
+        trace_id: Hex string trace ID
+    Returns:
+        Path to trace-specific directory (e.g., ~/.finchvox/traces/<trace_id>)
+    """
+    return get_traces_base_dir(data_dir) / trace_id
+def get_trace_logs_dir(data_dir: Path, trace_id: str) -> Path:
+    """Get the logs directory for a specific trace."""
+    return get_trace_dir(data_dir, trace_id) / "logs"
+def get_trace_audio_dir(data_dir: Path, trace_id: str) -> Path:
+    """Get the audio directory for a specific trace."""
+    return get_trace_dir(data_dir, trace_id) / "audio"
+def get_trace_exceptions_dir(data_dir: Path, trace_id: str) -> Path:
+    """Get the exceptions directory for a specific trace."""
+    return get_trace_dir(data_dir, trace_id) / "exceptions"

finchvox/collector/server.py ADDED Viewed

@@ -0,0 +1,126 @@
+import asyncio
+import signal
+import grpc
+import uvicorn
+from concurrent import futures
+from loguru import logger
+from opentelemetry.proto.collector.trace.v1.trace_service_pb2_grpc import (
+    add_TraceServiceServicer_to_server
+)
+from .service import TraceCollectorServicer
+from .writer import SpanWriter
+from .logs_writer import LogWriter
+from .exceptions_writer import ExceptionsWriter
+from .audio_handler import AudioHandler
+from .http_server import create_app
+from .config import GRPC_PORT, HTTP_PORT, MAX_WORKERS, TRACES_DIR, AUDIO_DIR, LOGS_DIR, EXCEPTIONS_DIR
+class CollectorServer:
+    """Manages both gRPC and HTTP server lifecycle."""
+    def __init__(self):
+        self.grpc_server = None
+        self.http_server = None
+        self.span_writer = SpanWriter(TRACES_DIR)
+        self.log_writer = LogWriter(LOGS_DIR)
+        self.exceptions_writer = ExceptionsWriter(EXCEPTIONS_DIR)
+        self.audio_handler = AudioHandler(AUDIO_DIR)
+        self.shutdown_event = asyncio.Event()
+    async def start_grpc(self):
+        """Start the gRPC server."""
+        logger.info(f"Starting OTLP gRPC collector on port {GRPC_PORT}")
+        # Create gRPC server with thread pool
+        self.grpc_server = grpc.server(
+            futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
+        )
+        # Register our service implementation
+        servicer = TraceCollectorServicer(self.span_writer)
+        add_TraceServiceServicer_to_server(servicer, self.grpc_server)
+        # Bind to port (insecure for PoC - no TLS)
+        self.grpc_server.add_insecure_port(f'[::]:{GRPC_PORT}')
+        # Start serving
+        self.grpc_server.start()
+        logger.info(f"OTLP collector listening on port {GRPC_PORT}")
+        logger.info(f"Writing traces to: {TRACES_DIR.absolute()}")
+    async def start_http(self):
+        """Start the HTTP server using uvicorn."""
+        logger.info(f"Starting HTTP collector on port {HTTP_PORT}")
+        # Create FastAPI app with injected dependencies
+        app = create_app(self.audio_handler, self.log_writer, self.exceptions_writer)
+        # Configure uvicorn server
+        config = uvicorn.Config(
+            app,
+            host="0.0.0.0",
+            port=HTTP_PORT,
+            log_level="info",
+            access_log=True,
+        )
+        self.http_server = uvicorn.Server(config)
+        logger.info(f"HTTP collector listening on port {HTTP_PORT}")
+        logger.info(f"Writing audio to: {AUDIO_DIR.absolute()}")
+        logger.info(f"Writing logs to: {LOGS_DIR.absolute()}")
+        logger.info(f"Writing exceptions to: {EXCEPTIONS_DIR.absolute()}")
+        # Run server until shutdown event
+        await self.http_server.serve()
+    async def start(self):
+        """Start both servers concurrently."""
+        # Start gRPC server
+        await self.start_grpc()
+        # Start HTTP server (this blocks until shutdown)
+        await self.start_http()
+    async def stop(self, grace_period=5):
+        """Gracefully stop both servers."""
+        logger.info(f"Shutting down servers (grace period: {grace_period}s)")
+        # Stop HTTP server
+        if self.http_server:
+            logger.info("Stopping HTTP server...")
+            self.http_server.should_exit = True
+            await asyncio.sleep(0.1)  # Give it time to process shutdown
+        # Stop gRPC server
+        if self.grpc_server:
+            logger.info("Stopping gRPC server...")
+            self.grpc_server.stop(grace_period)
+        logger.info("All servers stopped")
+async def run_server_async():
+    """Async entry point for running the collector server."""
+    server = CollectorServer()
+    # Setup signal handlers
+    loop = asyncio.get_running_loop()
+    def handle_shutdown(signum):
+        logger.info(f"Received signal {signum}, shutting down...")
+        asyncio.create_task(server.stop())
+    # Register signal handlers
+    for sig in (signal.SIGINT, signal.SIGTERM):
+        loop.add_signal_handler(sig, lambda s=sig: handle_shutdown(s))
+    try:
+        await server.start()
+    except KeyboardInterrupt:
+        await server.stop()
+def run_server():
+    """Entry point for running the collector server (blocks until shutdown)."""
+    asyncio.run(run_server_async())

finchvox/collector/service.py ADDED Viewed

@@ -0,0 +1,43 @@
+from loguru import logger
+from opentelemetry.proto.collector.trace.v1.trace_service_pb2_grpc import TraceServiceServicer
+from opentelemetry.proto.collector.trace.v1.trace_service_pb2 import (
+    ExportTraceServiceResponse,
+    ExportTracePartialSuccess,
+)
+from .writer import SpanWriter
+class TraceCollectorServicer(TraceServiceServicer):
+    """Implements the OTLP TraceService gRPC interface."""
+    def __init__(self, span_writer: SpanWriter):
+        self.span_writer = span_writer
+    def Export(self, request, context):
+        """Handle incoming trace export requests."""
+        try:
+            span_count = 0
+            span_names = []
+            for resource_spans in request.resource_spans:
+                for scope_spans in resource_spans.scope_spans:
+                    for span in scope_spans.spans:
+                        self.span_writer.write_span(span, resource_spans, scope_spans)
+                        span_names.append(span.name)
+                        span_count += 1
+            logger.info(f"Successfully processed {span_count} spans={span_names}")
+            return ExportTraceServiceResponse(
+                partial_success=ExportTracePartialSuccess(
+                    rejected_spans=0,
+                    error_message=""
+                )
+            )
+        except Exception as e:
+            logger.error(f"Error processing spans: {e}", exc_info=True)
+            # Continue processing - return partial success
+            return ExportTraceServiceResponse(
+                partial_success=ExportTracePartialSuccess(
+                    rejected_spans=0,  # Could track actual failures
+                    error_message=str(e)
+                )
+            )

finchvox/collector/writer.py ADDED Viewed

@@ -0,0 +1,86 @@
+import json
+from pathlib import Path
+from loguru import logger
+from google.protobuf.json_format import MessageToDict
+from finchvox.collector.config import get_trace_dir
+class SpanWriter:
+    """Handles writing spans to JSONL files organized by trace_id."""
+    def __init__(self, data_dir: Path):
+        """
+        Initialize SpanWriter.
+        Args:
+            data_dir: Base data directory (e.g., ~/.finchvox)
+        """
+        self.data_dir = data_dir
+    def write_span(self, span, resource_spans, scope_spans):
+        """Write a single span to its trace-specific JSONL file."""
+        try:
+            # Extract trace_id as hex string
+            trace_id_hex = span.trace_id.hex()
+            # Get trace-specific directory
+            trace_dir = get_trace_dir(self.data_dir, trace_id_hex)
+            trace_dir.mkdir(parents=True, exist_ok=True)
+            # Convert protobuf to dict for JSON serialization
+            span_dict = self._convert_span_to_dict(span, resource_spans, scope_spans)
+            # Write to trace file inside trace directory
+            trace_file = trace_dir / f"trace_{trace_id_hex}.jsonl"
+            # Check if this is a new trace or existing trace
+            is_new_trace = not trace_file.exists()
+            if is_new_trace:
+                # Log span type for new traces
+                span_name = span.name if span.name else "UNKNOWN"
+                logger.info(f"New trace {trace_id_hex[:8]}... - first span type: {span_name}")
+            else:
+                # Count existing spans in the trace
+                with trace_file.open('r') as f:
+                    span_count = sum(1 for _ in f)
+                logger.info(f"Trace {trace_id_hex[:8]}... - adding span #{span_count + 1}")
+            with trace_file.open('a') as f:
+                json.dump(span_dict, f)
+                f.write('\n')
+            logger.debug(f"Wrote span {span.span_id.hex()} to {trace_file}")
+        except Exception as e:
+            logger.error(f"Failed to write span: {e}", exc_info=True)
+    def _convert_span_to_dict(self, span, resource_spans, scope_spans):
+        """Convert protobuf span to dictionary, preserving all fields."""
+        # Use MessageToDict for automatic conversion
+        span_data = MessageToDict(
+            span,
+            preserving_proto_field_name=True
+        )
+        # Note: MessageToDict converts bytes to base64 by default
+        # We'll enhance with hex representation for trace_id/span_id
+        span_data['trace_id_hex'] = span.trace_id.hex()
+        span_data['span_id_hex'] = span.span_id.hex()
+        if span.parent_span_id:
+            span_data['parent_span_id_hex'] = span.parent_span_id.hex()
+        # Include resource attributes for context
+        if resource_spans.resource:
+            span_data['resource'] = MessageToDict(
+                resource_spans.resource,
+                preserving_proto_field_name=True
+            )
+        # Include instrumentation scope
+        if scope_spans.scope:
+            span_data['instrumentation_scope'] = MessageToDict(
+                scope_spans.scope,
+                preserving_proto_field_name=True
+            )
+        return span_data