PyPI - echo-vector - Versions diffs - 0.1.1__py3-none-any.whl - Mend

echo-vector 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

echo_vector-0.1.1.dist-info/METADATA +288 -0
echo_vector-0.1.1.dist-info/RECORD +38 -0
echo_vector-0.1.1.dist-info/WHEEL +4 -0
echo_vector-0.1.1.dist-info/entry_points.txt +2 -0
echovector/__init__.py +7 -0
echovector/api/__init__.py +1 -0
echovector/api/server.py +144 -0
echovector/audio/__init__.py +12 -0
echovector/audio/chunker.py +71 -0
echovector/audio/metadata.py +58 -0
echovector/audio/processor.py +53 -0
echovector/audio/streaming.py +33 -0
echovector/cli/__init__.py +1 -0
echovector/cli/main.py +165 -0
echovector/core.py +289 -0
echovector/embeddings/__init__.py +15 -0
echovector/embeddings/ast_model.py +41 -0
echovector/embeddings/base.py +43 -0
echovector/embeddings/cache.py +96 -0
echovector/embeddings/clap.py +126 -0
echovector/embeddings/factory.py +78 -0
echovector/embeddings/hubert.py +41 -0
echovector/embeddings/local.py +109 -0
echovector/embeddings/wav2vec2.py +41 -0
echovector/embeddings/whisper_enc.py +44 -0
echovector/evaluation/__init__.py +1 -0
echovector/evaluation/metrics.py +45 -0
echovector/indexing/__init__.py +12 -0
echovector/indexing/base.py +105 -0
echovector/indexing/faiss_index.py +182 -0
echovector/indexing/store.py +165 -0
echovector/search/__init__.py +14 -0
echovector/search/engine.py +82 -0
echovector/search/filters.py +55 -0
echovector/search/results.py +41 -0
echovector/utils/__init__.py +6 -0
echovector/utils/config.py +69 -0
echovector/utils/logging.py +31 -0

echo_vector-0.1.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,288 @@
+Metadata-Version: 2.4
+Name: echo_vector
+Version: 0.1.1
+Summary: Semantic text search over audio files without full transcription
+Project-URL: Homepage, https://github.com/ahron-maslin/echo_vector
+Project-URL: Documentation, https://github.com/ahron-maslin/echo_vector#readme
+Project-URL: Repository, https://github.com/ahron-maslin/echo_vector
+Project-URL: Issues, https://github.com/ahron-maslin/echo_vector/issues
+Project-URL: Changelog, https://github.com/ahron-maslin/echo_vector/blob/main/CHANGELOG.md
+Author: EchoVector Contributors
+License-Expression: MIT
+Keywords: CLAP,FAISS,audio,embeddings,search,semantic,vector
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Text Processing :: Indexing
+Classifier: Typing :: Typed
+Requires-Python: >=3.12
+Requires-Dist: faiss-cpu<2,>=1.7
+Requires-Dist: librosa<1,>=0.10
+Requires-Dist: numpy<3,>=1.26
+Requires-Dist: pydantic<3,>=2.5
+Requires-Dist: pydub<1,>=0.25
+Requires-Dist: rich<14,>=13.7
+Requires-Dist: soundfile<1,>=0.12
+Requires-Dist: tqdm<5,>=4.66
+Requires-Dist: typer[all]<1,>=0.12
+Provides-Extra: all
+Requires-Dist: fastapi<1,>=0.109; extra == 'all'
+Requires-Dist: httpx2<3,>=2.0; extra == 'all'
+Requires-Dist: hypothesis<7,>=6.92; extra == 'all'
+Requires-Dist: mkdocs-gen-files<1,>=0.5; extra == 'all'
+Requires-Dist: mkdocs-literate-nav<1,>=0.6; extra == 'all'
+Requires-Dist: mkdocs-material<10,>=9.5; extra == 'all'
+Requires-Dist: mkdocstrings[python]<1,>=0.24; extra == 'all'
+Requires-Dist: mutmut<3,>=2.4; extra == 'all'
+Requires-Dist: mypy<2,>=1.8; extra == 'all'
+Requires-Dist: pre-commit<4,>=3.6; extra == 'all'
+Requires-Dist: pytest-asyncio<1,>=0.23; extra == 'all'
+Requires-Dist: pytest-cov<6,>=5.0; extra == 'all'
+Requires-Dist: pytest-xdist<4,>=3.5; extra == 'all'
+Requires-Dist: pytest<9,>=8.0; extra == 'all'
+Requires-Dist: ruff<1,>=0.15; extra == 'all'
+Requires-Dist: torch<3,>=2.1; extra == 'all'
+Requires-Dist: transformers<5,>=4.36; extra == 'all'
+Requires-Dist: uvicorn[standard]<1,>=0.27; extra == 'all'
+Provides-Extra: api
+Requires-Dist: fastapi<1,>=0.109; extra == 'api'
+Requires-Dist: uvicorn[standard]<1,>=0.27; extra == 'api'
+Provides-Extra: clap
+Requires-Dist: torch<3,>=2.1; extra == 'clap'
+Requires-Dist: transformers<5,>=4.36; extra == 'clap'
+Provides-Extra: dev
+Requires-Dist: httpx2<3,>=2.0; extra == 'dev'
+Requires-Dist: hypothesis<7,>=6.92; extra == 'dev'
+Requires-Dist: mutmut<3,>=2.4; extra == 'dev'
+Requires-Dist: mypy<2,>=1.8; extra == 'dev'
+Requires-Dist: pre-commit<4,>=3.6; extra == 'dev'
+Requires-Dist: pytest-asyncio<1,>=0.23; extra == 'dev'
+Requires-Dist: pytest-cov<6,>=5.0; extra == 'dev'
+Requires-Dist: pytest-xdist<4,>=3.5; extra == 'dev'
+Requires-Dist: pytest<9,>=8.0; extra == 'dev'
+Requires-Dist: ruff<1,>=0.15; extra == 'dev'
+Provides-Extra: docs
+Requires-Dist: mkdocs-gen-files<1,>=0.5; extra == 'docs'
+Requires-Dist: mkdocs-literate-nav<1,>=0.6; extra == 'docs'
+Requires-Dist: mkdocs-material<10,>=9.5; extra == 'docs'
+Requires-Dist: mkdocstrings[python]<1,>=0.24; extra == 'docs'
+Description-Content-Type: text/markdown
+# 🔊 EchoVector
+> **Semantic text search over audio files — without full transcription.**
+[![CI](https://github.com/echovector/echovector/actions/workflows/test.yml/badge.svg)](https://github.com/echovector/echovector/actions/workflows/test.yml)
+[![Coverage](https://img.shields.io/badge/coverage-%3E95%25-brightgreen)](.)
+[![Python](https://img.shields.io/badge/python-3.12%2B-blue)](.)
+[![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
+---
+## What is EchoVector?
+EchoVector indexes audio files by generating **semantic embeddings directly from audio waveforms**, then lets you search them with natural language text queries — all without transcribing a single word.
+### Traditional approach (slow & expensive)
+```
+Audio → Full Transcription → Text Embeddings → Text Search
+```
+### EchoVector approach (fast & efficient)
+```
+Audio → Audio Chunks → Audio Embeddings ─┐
+                                          ├─► ANN Search → Results
+Text Query → Text Embedding ──────────────┘
+```
+## Features
+- 🎵 **Multi-format support** — MP3, WAV, FLAC, M4A
+- 🧠 **Direct audio embeddings** — No transcription needed
+- 🔍 **Semantic search** — Query with natural language
+- ⚡ **FAISS-powered** — Approximate nearest neighbor search
+- 🔌 **Pluggable backends** — CLAP, Whisper, wav2vec2, HuBERT, AST
+- 🧪 **Offline smoke backend** — `local` backend for CI/Kaggle tests without model downloads
+- 📊 **Rich CLI** — Progress bars, colors, benchmarking mode
+- 🌐 **REST API** — Optional FastAPI server
+- 📦 **Production-ready** — Typed, tested, documented
+## Quick Start
+### Installation
+```bash
+pip install echovector
+```
+Or with uv:
+```bash
+uv add echovector
+```
+### CLI Usage
+```bash
+# One-time indexing: split audio into timestamped chunks and embed each chunk
+echovector index ./meetings
+# Fast repeated search: embed only the text query and search the saved FAISS index
+echovector search "discussion about transformers"
+# Search with options
+echovector search "pricing strategy" --top-k 10
+# View index statistics
+echovector stats
+```
+For a no-download smoke test, use the deterministic local backend:
+```bash
+echovector index ./meetings --backend local --store-dir ./ev-index
+echovector search "high alarm tone" --backend local --store-dir ./ev-index
+echovector stats --backend local --store-dir ./ev-index
+```
+The search command does not reopen or scan the audio files. All expensive audio processing happens
+during `index`; `search` loads the saved vector index, embeds the short text query, and returns the
+nearest timestamped chunks.
+### Python API
+```python
+from echovector import EchoVector
+ev = EchoVector()
+# Index audio files
+ev.index("./meetings")
+# Search with natural language
+results = ev.search("conversation about CUDA kernels")
+for r in results:
+    print(
+        f"{r.filepath} "
+        f"[{r.timestamp_range.start:.1f}s - {r.timestamp_range.end:.1f}s] "
+        f"score={r.score:.4f}"
+    )
+```
+## Testing on Kaggle
+Kaggle is useful for GPU-backed CLAP tests, but first check the runtime Python version:
+```python
+import sys
+print(sys.version)
+```
+EchoVector currently declares `Python >=3.12`. If the Kaggle image is older, install and test in a
+Python 3.12-capable environment instead, or relax the project requirement only after validating the
+test suite on that Python version.
+### Notebook smoke test without internet/model downloads
+Upload this repository as a Kaggle dataset, attach it to a notebook, then run:
+```python
+%cd /kaggle/input/<your-echo-vector-dataset>
+!pip install -e . --no-deps
+!pip install numpy soundfile librosa faiss-cpu typer rich pydantic
+!python -m pytest tests/ -q
+```
+Create a tiny audio corpus and test the real CLI/index path:
+```python
+import os
+import numpy as np
+import soundfile as sf
+audio_dir = "/kaggle/working/ev-audio"
+index_dir = "/kaggle/working/ev-index"
+os.makedirs(audio_dir, exist_ok=True)
+sr = 16000
+t = np.linspace(0, 1.0, sr, endpoint=False)
+sf.write(f"{audio_dir}/high_tone.wav", 0.25 * np.sin(2 * np.pi * 880 * t), sr)
+sf.write(f"{audio_dir}/low_tone.wav", 0.25 * np.sin(2 * np.pi * 110 * t), sr)
+```
+```python
+!echovector index /kaggle/working/ev-audio --backend local --store-dir /kaggle/working/ev-index --reset
+!echovector search "high alarm tone" --backend local --store-dir /kaggle/working/ev-index --top-k 2
+!echovector stats --backend local --store-dir /kaggle/working/ev-index
+```
+This validates packaging, audio loading, FAISS persistence, metadata storage, and the CLI without
+depending on Hugging Face downloads.
+### CLAP semantic test
+For actual semantic text-to-audio search, enable internet in the notebook settings and use a GPU
+runtime if available:
+```python
+!pip install transformers torch faiss-cpu librosa soundfile
+!echovector index /kaggle/input/<audio-dataset> --backend clap --device cuda --store-dir /kaggle/working/clap-index --recursive --reset
+!echovector search "people discussing pricing strategy" --backend clap --device cuda --store-dir /kaggle/working/clap-index --top-k 10
+```
+If GPU is unavailable, replace `--device cuda` with `--device cpu`; it will be slower. Keep indexes
+under `/kaggle/working` so they are writable during the notebook session.
+## Architecture
+```
+echovector/
+├── audio/        # Audio loading, chunking, streaming, metadata
+├── embeddings/   # Pluggable embedding backends (CLAP, Whisper, etc.)
+├── indexing/     # Vector index backends (FAISS, with pluggable design)
+├── search/       # Search engine, filtering, result hydration
+├── cli/          # Typer-based CLI with Rich output
+├── api/          # Optional FastAPI server
+├── evaluation/   # Metrics (recall@k, throughput)
+├── benchmarks/   # Reproducible benchmark harness
+└── utils/        # Config, logging, helpers
+```
+## Supported Embedding Backends
+| Backend | Text+Audio Aligned | Notes |
+|---------|-------------------|-------|
+| **CLAP** (default) | ✅ | Best for text→audio search |
+| Whisper Encoder | ❌ | Audio-only embeddings |
+| wav2vec2 | ❌ | Audio-only, good for speech |
+| HuBERT | ❌ | Audio-only, self-supervised |
+| Audio Spectrogram Transformer | ❌ | Audio-only, classification-focused |
+## Development
+```bash
+# Clone and install
+git clone https://github.com/echovector/echovector.git
+cd echovector
+uv sync --all-extras
+# Run checks
+make lint
+make typecheck
+make test
+make coverage
+```
+## License
+MIT

echo_vector-0.1.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,38 @@
+echovector/__init__.py,sha256=bNOH--O4ZZ2JGLk2WBmuR-LDJwxrHH9NFPVJGmNlUys,154
+echovector/core.py,sha256=WgTis_6d1SNbmT8iE26SZjk2xeT5U_Ju6_iOh53czDc,10857
+echovector/api/__init__.py,sha256=UXSa3IbGYl8Ld7hU8YHtIDB3RWr-Jr47xerJpd-JKGI,33
+echovector/api/server.py,sha256=fU4780Am7V9ZoNpuOmWKaObEudHm98e4QTG6koR8rKc,4059
+echovector/audio/__init__.py,sha256=FLz5_kYf5l1VpWIFXZcKuDV6BDft-bxugyNSqEaAEm0,300
+echovector/audio/chunker.py,sha256=oA2XUkhBoQBsdIIFJmX9WkNUMMeTtHaCX-P9jlFCKk8,2340
+echovector/audio/metadata.py,sha256=YWP6A7HS_mK0oC8cXoF5uiQ-3NQcFNgE1mN38X_osU4,1474
+echovector/audio/processor.py,sha256=rxNz72dc3WvM-U8TyCNC3wUGDWZZOEDiP-SgoP3_peE,1589
+echovector/audio/streaming.py,sha256=sm6-XGdVn4NrgviDdpqOulQVd1ZgYdJxw-eug1MjSaE,893
+echovector/cli/__init__.py,sha256=I_dYESPy1z4et6onBmUZUf9QGk-mWz34codFENej_0U,33
+echovector/cli/main.py,sha256=TTJNzPgBO86YdgIMhmLe8FOuGoQeQysqXZUT5bfY9b0,5302
+echovector/embeddings/__init__.py,sha256=HJ3yRohOHxLYiVI1SePdiPIfY4Pxd-Fum6rbm4_WbSw,407
+echovector/embeddings/ast_model.py,sha256=0ZulFEx3jschoBft2cJn1UX3ZYUcINn03guqMINbekg,1277
+echovector/embeddings/base.py,sha256=3zYaxqamBvj2BEPwQWMm2czcqT-jocKFLjkRvVsTWp8,1080
+echovector/embeddings/cache.py,sha256=3yMxEQhQsHELyj7ZYdeT3Kwm4f0EuVRbt5WLvicOeCM,3120
+echovector/embeddings/clap.py,sha256=h7i1hyzuOHegLq0bHT2MnNHPkiNKholGHRP4UbaPbcA,4311
+echovector/embeddings/factory.py,sha256=j0CWYZq4NVYU8igdxOKL2pKkE_3FBQFVXtnYCmnXPxs,2683
+echovector/embeddings/hubert.py,sha256=Imnmw6m9ilfRPAFCQVP8Ff_vdS-uHJ4Q6633PQr6Sag,1253
+echovector/embeddings/local.py,sha256=jBDrWXMTyTCEDZVmFeNd8C4AoBL2mv0H3m_hk-EhpwE,4277
+echovector/embeddings/wav2vec2.py,sha256=g8JGc6ji3_MDxTIUgQkT565OeZMkb09Ly72lyPsKEZk,1263
+echovector/embeddings/whisper_enc.py,sha256=weFspd31TzRE83tEnnioESas-e0t33i112DiCrfBdhQ,1333
+echovector/evaluation/__init__.py,sha256=3vsJte5hUM4D9lwLouVyqLHKuZ6Xlqwyzoi7UAXw8FE,40
+echovector/evaluation/metrics.py,sha256=jXNWoko4T3TZIgZKBiL5eoVrjEWfPGcT15fAepM4IfQ,1227
+echovector/indexing/__init__.py,sha256=x8D0AvoJFce5_LU-Evdvi01JGmgr8bvemu3ZJRpw-4g,227
+echovector/indexing/base.py,sha256=MXHVxxr2T0o2GWo4jO_8ZNnSRMyJoi9LrEL7EiDp3lY,2880
+echovector/indexing/faiss_index.py,sha256=hWvuKwD4oLEFOknp_OxZWAUI_cpVNJNeIamVcRmbLhM,6615
+echovector/indexing/store.py,sha256=C3zVDSjOGZNIu49e_XXVHFzDCQpWf51VpIHbz1TXP_w,5357
+echovector/search/__init__.py,sha256=qy3p-vIXtIbzw8ratKf434IUJFOLtxPLjT1DsAwaDIE,360
+echovector/search/engine.py,sha256=48IjaLjUTWoD11zI-MCJl9K_yoccxeJrP4qdkqeUZbU,2319
+echovector/search/filters.py,sha256=AHUbq9OzyTnQa7Ci2bjzzpeNhWYtNqBN-7ypmPk5Hq4,1712
+echovector/search/results.py,sha256=3qDgF_VDXTlxbfa0RWCQz_iTEhTG7CCMkBBnlDQmheE,1063
+echovector/utils/__init__.py,sha256=78OxBC7wC9q5JALB4u38dJTU73xbqlThp_33E5LidzU,188
+echovector/utils/config.py,sha256=CN2i1p0k_hbq2XA2mLGwgEFS2vfSI5XShT9CmtBeu24,2002
+echovector/utils/logging.py,sha256=XdnP3hL9FKm0lZCG4k_Kz8kgLwxpNvbGxCFJdyusRrI,778
+echo_vector-0.1.1.dist-info/METADATA,sha256=wM-6TNcq5clyP85Je2qN3mvNZcCXwl_bplJgCeD5xfI,10058
+echo_vector-0.1.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+echo_vector-0.1.1.dist-info/entry_points.txt,sha256=5ckspm1n74MtWsT9mfNvLymeDG9JMPVVsh21DMN7B-g,55
+echo_vector-0.1.1.dist-info/RECORD,,

echo_vector-0.1.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

echo_vector-0.1.1.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ echovector = echovector.cli.main:app

echovector/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""EchoVector: Audio vector embedding and processing library."""
+from echovector.core import EchoVector
+__version__ = "0.1.0"
+__all__ = ["EchoVector"]

echovector/api/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """API module for EchoVector."""

echovector/api/server.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""FastAPI server for EchoVector."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+from fastapi import Depends, FastAPI
+from pydantic import BaseModel, Field
+if TYPE_CHECKING:
+    from echovector.core import EchoVector
+app = FastAPI(
+    title="EchoVector API",
+    description="Semantic search over audio files.",
+    version="0.1.0",
+)
+# Default engine instance — override via app.dependency_overrides in tests.
+_default_engine: EchoVector | None = None
+def get_engine() -> EchoVector:
+    """Return the active EchoVector engine."""
+    if _default_engine is None:
+        raise RuntimeError(
+            "No EchoVector engine configured. Call configure_engine() before starting the server."
+        )
+    return _default_engine
+def configure_engine(engine: EchoVector) -> None:
+    """Set the engine used by the server at startup."""
+    global _default_engine
+    _default_engine = engine
+# ---------------------------------------------------------------------------
+# Request / response models
+# ---------------------------------------------------------------------------
+class IndexRequest(BaseModel):
+    """Paths to index."""
+    paths: list[str] = Field(..., description="Audio file or directory paths to index.")
+    force: bool = Field(False, description="Re-index files that are already stored.")
+class IndexResponse(BaseModel):
+    """Result of an index operation."""
+    chunks_added: int
+    files_skipped: int
+class SearchRequest(BaseModel):
+    """Text query for audio search."""
+    query: str = Field(..., description="Natural language query.")
+    top_k: int = Field(5, ge=1, description="Maximum results to return.")
+class SearchResultItem(BaseModel):
+    """Single search result."""
+    filepath: str
+    start: float
+    end: float
+    score: float
+    metadata: dict[str, Any] = Field(default_factory=dict)
+class SearchResponse(BaseModel):
+    """Search results."""
+    results: list[SearchResultItem]
+class StatsResponse(BaseModel):
+    """Index statistics."""
+    chunks: int
+    embedding_dim: int
+    store_dir: str
+# ---------------------------------------------------------------------------
+# Endpoints
+# ---------------------------------------------------------------------------
+@app.post("/index", response_model=IndexResponse)
+def index_audio(
+    request: IndexRequest,
+    engine: EchoVector = Depends(get_engine),
+) -> IndexResponse:
+    """Index audio files or directories."""
+    before = engine.stats()["chunks"]
+    chunks_added = engine.index(request.paths, force=request.force)
+    after = engine.stats()["chunks"]
+    files_skipped = len(request.paths) - (after - before > 0 or chunks_added > 0)
+    # Simpler: derive skipped count from chunks_added being 0 per file
+    files_skipped = max(0, len(request.paths) - (1 if chunks_added > 0 else 0))
+    return IndexResponse(chunks_added=chunks_added, files_skipped=files_skipped)
+@app.post("/search", response_model=SearchResponse)
+def search_audio(
+    request: SearchRequest,
+    engine: EchoVector = Depends(get_engine),
+) -> SearchResponse:
+    """Search indexed audio with a text query."""
+    results = engine.search(request.query, top_k=request.top_k)
+    return SearchResponse(
+        results=[
+            SearchResultItem(
+                filepath=r.filepath,
+                start=r.timestamp_range.start,
+                end=r.timestamp_range.end,
+                score=r.score,
+                metadata=r.metadata or {},
+            )
+            for r in results
+        ]
+    )
+@app.get("/stats", response_model=StatsResponse)
+def get_stats(engine: EchoVector = Depends(get_engine)) -> StatsResponse:
+    """Return index statistics."""
+    s = engine.stats()
+    return StatsResponse(
+        chunks=int(s["chunks"]),
+        embedding_dim=int(s["embedding_dim"]),
+        store_dir=str(s["store_dir"]),
+    )
+@app.post("/reset")
+def reset_index(engine: EchoVector = Depends(get_engine)) -> dict[str, str]:
+    """Clear the index."""
+    engine.reset()
+    return {"status": "ok"}

echovector/audio/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+from .chunker import SilenceAwareChunker
+from .metadata import AudioMetadata, extract_metadata
+from .processor import AudioProcessor
+from .streaming import AudioStreamer
+__all__ = [
+    "AudioMetadata",
+    "AudioProcessor",
+    "AudioStreamer",
+    "SilenceAwareChunker",
+    "extract_metadata",
+]

echovector/audio/chunker.py ADDED Viewed

@@ -0,0 +1,71 @@
+import numpy as np
+import numpy.typing as npt
+class SilenceAwareChunker:
+    """Chunks audio based on silence."""
+    def __init__(
+        self,
+        top_db: float = 60.0,
+        min_chunk_length: float = 1.0,
+        max_chunk_length: float = 10.0,
+        sample_rate: int = 16000,
+    ) -> None:
+        """Initialize the chunker.
+        Args:
+            top_db: The threshold (in decibels) below reference to consider as silence.
+            min_chunk_length: Minimum length of a chunk in seconds.
+            max_chunk_length: Maximum length of a chunk in seconds.
+            sample_rate: The sample rate of the audio.
+        """
+        self.top_db = top_db
+        self.min_chunk_length = min_chunk_length
+        self.max_chunk_length = max_chunk_length
+        self.sample_rate = sample_rate
+    def chunk(self, audio: npt.NDArray[np.float32]) -> list[npt.NDArray[np.float32]]:
+        """Split audio into chunks based on silence.
+        Args:
+            audio: The audio signal to chunk.
+        Returns:
+            A list of audio chunks.
+        """
+        if len(audio) == 0:
+            return []
+        max_amplitude = float(np.max(np.abs(audio)))
+        if max_amplitude == 0.0:
+            return []
+        threshold = max_amplitude * (10.0 ** (-self.top_db / 20.0))
+        non_silent = np.flatnonzero(np.abs(audio) > threshold)
+        if len(non_silent) == 0:
+            return []
+        breaks = np.where(np.diff(non_silent) > 1)[0] + 1
+        intervals = [
+            (int(group[0]), int(group[-1]) + 1)
+            for group in np.split(non_silent, breaks)
+            if len(group) > 0
+        ]
+        chunks: list[npt.NDArray[np.float32]] = []
+        min_samples = int(self.min_chunk_length * self.sample_rate)
+        max_samples = int(self.max_chunk_length * self.sample_rate)
+        for start, end in intervals:
+            interval_audio: npt.NDArray[np.float32] = audio[start:end]
+            while len(interval_audio) > max_samples:
+                chunks.append(interval_audio[:max_samples])
+                interval_audio = interval_audio[max_samples:]
+            keep_short_first = len(interval_audio) > 0 and len(chunks) == 0
+            if len(interval_audio) >= min_samples or keep_short_first:
+                chunks.append(interval_audio)
+        return chunks

echovector/audio/metadata.py ADDED Viewed

@@ -0,0 +1,58 @@
+import os
+from dataclasses import dataclass
+import librosa
+import soundfile as sf
+@dataclass
+class AudioMetadata:
+    """Metadata for an audio file."""
+    duration: float
+    sample_rate: int
+    channels: int
+    format: str
+    file_size: int
+    file_path: str
+def extract_metadata(file_path: str) -> AudioMetadata:
+    """Extract metadata from an audio file.
+    Args:
+        file_path: Path to the audio file.
+    Returns:
+        AudioMetadata object containing extracted metadata.
+    Raises:
+        FileNotFoundError: If the file does not exist.
+    """
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"Audio file not found: {file_path}")
+    file_size = os.path.getsize(file_path)
+    try:
+        info = sf.info(file_path)
+        duration = float(info.duration)
+        sample_rate = int(info.samplerate)
+        channels = int(info.channels)
+        fmt = str(info.format)
+    except Exception:
+        # Fallback to librosa
+        duration = float(librosa.get_duration(path=file_path))
+        sample_rate = int(librosa.get_samplerate(file_path))
+        y, _ = librosa.load(file_path, sr=None, mono=False)
+        channels = int(y.shape[0]) if y.ndim > 1 else 1
+        fmt = str(os.path.splitext(file_path)[1].lstrip("."))
+    return AudioMetadata(
+        duration=duration,
+        sample_rate=sample_rate,
+        channels=channels,
+        format=fmt,
+        file_size=file_size,
+        file_path=file_path,
+    )

echovector/audio/processor.py ADDED Viewed

@@ -0,0 +1,53 @@
+import os
+import librosa
+import numpy as np
+import numpy.typing as npt
+import soundfile as sf
+class AudioProcessor:
+    """Processes audio files for vectorization."""
+    def __init__(self, target_sample_rate: int = 16000, mono: bool = True) -> None:
+        """Initialize the AudioProcessor.
+        Args:
+            target_sample_rate: The sample rate to convert audio to.
+            mono: Whether to convert audio to mono.
+        """
+        self.target_sample_rate = target_sample_rate
+        self.mono = mono
+    def load_audio(self, file_path: str) -> npt.NDArray[np.float32]:
+        """Load an audio file into a numpy array.
+        Args:
+            file_path: Path to the audio file (mp3/wav/flac/m4a).
+        Returns:
+            The loaded audio as a numpy array.
+        Raises:
+            FileNotFoundError: If the file does not exist.
+        """
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"Audio file not found: {file_path}")
+        try:
+            audio, sample_rate = sf.read(file_path, dtype="float32", always_2d=False)
+        except sf.LibsndfileError:
+            audio, sample_rate = librosa.load(file_path, sr=None, mono=False)
+        if self.mono and audio.ndim > 1:
+            audio = np.mean(audio, axis=1)
+        if sample_rate != self.target_sample_rate:
+            audio = librosa.resample(
+                y=audio,
+                orig_sr=int(sample_rate),
+                target_sr=self.target_sample_rate,
+                axis=0,
+            )
+        return np.asarray(audio, dtype=np.float32)