remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Audio chunker - splits audio by silence near minute boundaries.
|
|
3
|
+
|
|
4
|
+
Lightweight implementation using minimal dependencies:
|
|
5
|
+
- wave (stdlib) for WAV files
|
|
6
|
+
- pydub (optional) for format conversion
|
|
7
|
+
|
|
8
|
+
Design: Split audio near minute boundaries (58-62s range) at silence points.
|
|
9
|
+
This optimizes for OpenAI Whisper API 25MB file size limits while maintaining
|
|
10
|
+
natural speech boundaries.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import struct
|
|
14
|
+
import tempfile
|
|
15
|
+
import wave
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
from loguru import logger
|
|
20
|
+
|
|
21
|
+
# Check for pydub availability (optional for non-WAV formats)
|
|
22
|
+
try:
|
|
23
|
+
from pydub import AudioSegment
|
|
24
|
+
from pydub.silence import detect_silence
|
|
25
|
+
|
|
26
|
+
PYDUB_AVAILABLE = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
PYDUB_AVAILABLE = False
|
|
29
|
+
logger.warning("pydub not available - only WAV files will be supported")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AudioChunk:
|
|
33
|
+
"""Represents a chunk of audio with temporal boundaries."""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
file_path: str,
|
|
38
|
+
start_ms: int,
|
|
39
|
+
end_ms: int,
|
|
40
|
+
chunk_index: int,
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
Initialize audio chunk.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
file_path: Path to temporary audio file for this chunk
|
|
47
|
+
start_ms: Start time in milliseconds
|
|
48
|
+
end_ms: End time in milliseconds
|
|
49
|
+
chunk_index: Index of this chunk in sequence
|
|
50
|
+
"""
|
|
51
|
+
self.file_path = file_path
|
|
52
|
+
self.start_ms = start_ms
|
|
53
|
+
self.end_ms = end_ms
|
|
54
|
+
self.chunk_index = chunk_index
|
|
55
|
+
self.duration_ms = end_ms - start_ms
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def start_seconds(self) -> float:
|
|
59
|
+
"""Start time in seconds."""
|
|
60
|
+
return self.start_ms / 1000.0
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def end_seconds(self) -> float:
|
|
64
|
+
"""End time in seconds."""
|
|
65
|
+
return self.end_ms / 1000.0
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def duration_seconds(self) -> float:
|
|
69
|
+
"""Duration in seconds."""
|
|
70
|
+
return self.duration_ms / 1000.0
|
|
71
|
+
|
|
72
|
+
def __repr__(self) -> str:
|
|
73
|
+
return f"AudioChunk(index={self.chunk_index}, start={self.start_seconds:.1f}s, end={self.end_seconds:.1f}s, duration={self.duration_seconds:.1f}s)"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class AudioChunker:
|
|
77
|
+
"""
|
|
78
|
+
Chunks audio files by silence near minute boundaries.
|
|
79
|
+
|
|
80
|
+
Strategy:
|
|
81
|
+
1. Target chunks around 60 seconds (configurable)
|
|
82
|
+
2. Look for silence in a window around the target (e.g., 58-62s)
|
|
83
|
+
3. Split at the longest silence in that window
|
|
84
|
+
4. If no silence found, split at the target boundary
|
|
85
|
+
|
|
86
|
+
This creates natural breaks while keeping chunks under OpenAI's
|
|
87
|
+
25MB file size limit (~10 minutes of audio at typical bitrates).
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
def __init__(
|
|
91
|
+
self,
|
|
92
|
+
target_chunk_seconds: float = 60.0,
|
|
93
|
+
chunk_window_seconds: float = 2.0,
|
|
94
|
+
silence_threshold_db: float = -40.0,
|
|
95
|
+
min_silence_ms: int = 500,
|
|
96
|
+
):
|
|
97
|
+
"""
|
|
98
|
+
Initialize audio chunker.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
target_chunk_seconds: Target chunk duration (default: 60s)
|
|
102
|
+
chunk_window_seconds: Window around target to search for silence (±seconds)
|
|
103
|
+
silence_threshold_db: dB threshold for silence detection (lower = stricter)
|
|
104
|
+
min_silence_ms: Minimum silence duration to consider (milliseconds)
|
|
105
|
+
"""
|
|
106
|
+
self.target_chunk_ms = int(target_chunk_seconds * 1000)
|
|
107
|
+
self.chunk_window_ms = int(chunk_window_seconds * 1000)
|
|
108
|
+
self.silence_threshold_db = silence_threshold_db
|
|
109
|
+
self.min_silence_ms = min_silence_ms
|
|
110
|
+
|
|
111
|
+
def chunk_audio(
|
|
112
|
+
self,
|
|
113
|
+
audio_path: str | Path,
|
|
114
|
+
output_dir: Optional[str | Path] = None,
|
|
115
|
+
) -> list[AudioChunk]:
|
|
116
|
+
"""
|
|
117
|
+
Chunk audio file by silence near minute boundaries.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
audio_path: Path to audio file (WAV, M4A, MP3, etc.)
|
|
121
|
+
output_dir: Directory for chunk files (temp dir if None)
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
List of AudioChunk objects
|
|
125
|
+
|
|
126
|
+
Raises:
|
|
127
|
+
ValueError: If audio format not supported
|
|
128
|
+
RuntimeError: If pydub not available for non-WAV files
|
|
129
|
+
"""
|
|
130
|
+
audio_path = Path(audio_path)
|
|
131
|
+
|
|
132
|
+
if not audio_path.exists():
|
|
133
|
+
raise FileNotFoundError(f"Audio file not found: {audio_path}")
|
|
134
|
+
|
|
135
|
+
# Determine output directory
|
|
136
|
+
if output_dir is None:
|
|
137
|
+
output_dir = Path(tempfile.mkdtemp(prefix="rem_audio_chunks_"))
|
|
138
|
+
else:
|
|
139
|
+
output_dir = Path(output_dir)
|
|
140
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
141
|
+
|
|
142
|
+
logger.info(f"Chunking audio: {audio_path}")
|
|
143
|
+
logger.info(
|
|
144
|
+
f"Target: {self.target_chunk_ms/1000:.0f}s chunks, "
|
|
145
|
+
f"window: ±{self.chunk_window_ms/1000:.0f}s, "
|
|
146
|
+
f"silence: {self.silence_threshold_db}dB"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Load audio (convert to WAV if needed)
|
|
150
|
+
if audio_path.suffix.lower() == ".wav":
|
|
151
|
+
audio = self._load_wav(audio_path)
|
|
152
|
+
elif PYDUB_AVAILABLE:
|
|
153
|
+
logger.info(f"Converting {audio_path.suffix} to AudioSegment")
|
|
154
|
+
audio = AudioSegment.from_file(str(audio_path))
|
|
155
|
+
else:
|
|
156
|
+
raise RuntimeError(
|
|
157
|
+
f"pydub required for {audio_path.suffix} files. "
|
|
158
|
+
"Install with: pip install pydub"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
duration_ms = len(audio)
|
|
162
|
+
logger.info(f"Audio duration: {duration_ms/1000:.1f}s")
|
|
163
|
+
|
|
164
|
+
# Find chunk boundaries
|
|
165
|
+
boundaries = self._find_chunk_boundaries(audio, duration_ms)
|
|
166
|
+
logger.info(f"Found {len(boundaries)-1} chunk boundaries: {[f'{b/1000:.1f}s' for b in boundaries]}")
|
|
167
|
+
|
|
168
|
+
# Create chunks
|
|
169
|
+
chunks = []
|
|
170
|
+
for i in range(len(boundaries) - 1):
|
|
171
|
+
start_ms = boundaries[i]
|
|
172
|
+
end_ms = boundaries[i + 1]
|
|
173
|
+
|
|
174
|
+
# Extract segment
|
|
175
|
+
segment = audio[start_ms:end_ms]
|
|
176
|
+
|
|
177
|
+
# Save to file
|
|
178
|
+
chunk_filename = f"chunk_{i:03d}_{start_ms}_{end_ms}.wav"
|
|
179
|
+
chunk_path = output_dir / chunk_filename
|
|
180
|
+
|
|
181
|
+
segment.export(str(chunk_path), format="wav")
|
|
182
|
+
|
|
183
|
+
chunk = AudioChunk(
|
|
184
|
+
file_path=str(chunk_path),
|
|
185
|
+
start_ms=start_ms,
|
|
186
|
+
end_ms=end_ms,
|
|
187
|
+
chunk_index=i,
|
|
188
|
+
)
|
|
189
|
+
chunks.append(chunk)
|
|
190
|
+
logger.debug(f"Created {chunk}")
|
|
191
|
+
|
|
192
|
+
logger.info(f"Created {len(chunks)} chunks in {output_dir}")
|
|
193
|
+
return chunks
|
|
194
|
+
|
|
195
|
+
def _load_wav(self, wav_path: Path) -> "AudioSegment":
|
|
196
|
+
"""
|
|
197
|
+
Load WAV file using pydub or wave module.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
wav_path: Path to WAV file
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
AudioSegment
|
|
204
|
+
|
|
205
|
+
Raises:
|
|
206
|
+
ValueError: If WAV file invalid
|
|
207
|
+
"""
|
|
208
|
+
if PYDUB_AVAILABLE:
|
|
209
|
+
return AudioSegment.from_wav(str(wav_path))
|
|
210
|
+
|
|
211
|
+
# Fallback: use wave module and convert to AudioSegment-like interface
|
|
212
|
+
# This is a minimal implementation for WAV-only support
|
|
213
|
+
raise RuntimeError(
|
|
214
|
+
"pydub required for audio processing. Install with: pip install pydub"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
def _find_chunk_boundaries(
|
|
218
|
+
self,
|
|
219
|
+
audio: "AudioSegment",
|
|
220
|
+
duration_ms: int,
|
|
221
|
+
) -> list[int]:
|
|
222
|
+
"""
|
|
223
|
+
Find chunk boundaries by detecting silence near target intervals.
|
|
224
|
+
|
|
225
|
+
Strategy:
|
|
226
|
+
1. Start at 0, target boundary at 60s
|
|
227
|
+
2. Look for silence in window [58s, 62s]
|
|
228
|
+
3. Split at longest silence in window
|
|
229
|
+
4. If no silence, split at target (60s)
|
|
230
|
+
5. Repeat until end of audio
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
audio: AudioSegment
|
|
234
|
+
duration_ms: Total audio duration in milliseconds
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
List of boundary timestamps in milliseconds
|
|
238
|
+
"""
|
|
239
|
+
boundaries = [0] # Start at beginning
|
|
240
|
+
current_pos = 0
|
|
241
|
+
|
|
242
|
+
while current_pos < duration_ms:
|
|
243
|
+
# Target next boundary
|
|
244
|
+
target_boundary = current_pos + self.target_chunk_ms
|
|
245
|
+
|
|
246
|
+
if target_boundary >= duration_ms:
|
|
247
|
+
# Last chunk - use end of audio
|
|
248
|
+
boundaries.append(duration_ms)
|
|
249
|
+
break
|
|
250
|
+
|
|
251
|
+
# Define search window around target
|
|
252
|
+
window_start = max(
|
|
253
|
+
current_pos, target_boundary - self.chunk_window_ms
|
|
254
|
+
)
|
|
255
|
+
window_end = min(duration_ms, target_boundary + self.chunk_window_ms)
|
|
256
|
+
|
|
257
|
+
# Find best split point (longest silence in window)
|
|
258
|
+
split_point = self._find_best_split(
|
|
259
|
+
audio,
|
|
260
|
+
window_start,
|
|
261
|
+
window_end,
|
|
262
|
+
target_boundary,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
boundaries.append(split_point)
|
|
266
|
+
current_pos = split_point
|
|
267
|
+
|
|
268
|
+
return boundaries
|
|
269
|
+
|
|
270
|
+
def _find_best_split(
|
|
271
|
+
self,
|
|
272
|
+
audio: "AudioSegment",
|
|
273
|
+
window_start: int,
|
|
274
|
+
window_end: int,
|
|
275
|
+
target: int,
|
|
276
|
+
) -> int:
|
|
277
|
+
"""
|
|
278
|
+
Find best split point in window by detecting silence.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
audio: AudioSegment
|
|
282
|
+
window_start: Start of search window (ms)
|
|
283
|
+
window_end: End of search window (ms)
|
|
284
|
+
target: Target split point (ms)
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Best split point in milliseconds
|
|
288
|
+
"""
|
|
289
|
+
if not PYDUB_AVAILABLE:
|
|
290
|
+
# No pydub - split at target
|
|
291
|
+
return target
|
|
292
|
+
|
|
293
|
+
# Extract window
|
|
294
|
+
window = audio[window_start:window_end]
|
|
295
|
+
|
|
296
|
+
# Detect silence
|
|
297
|
+
silence_ranges = detect_silence(
|
|
298
|
+
window,
|
|
299
|
+
min_silence_len=self.min_silence_ms,
|
|
300
|
+
silence_thresh=self.silence_threshold_db,
|
|
301
|
+
seek_step=10, # Check every 10ms
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
if not silence_ranges:
|
|
305
|
+
# No silence found - split at target
|
|
306
|
+
logger.debug(f"No silence in window [{window_start/1000:.1f}s, {window_end/1000:.1f}s], splitting at target {target/1000:.1f}s")
|
|
307
|
+
return target
|
|
308
|
+
|
|
309
|
+
# Find longest silence closest to target
|
|
310
|
+
best_silence = None
|
|
311
|
+
best_score = float("-inf")
|
|
312
|
+
|
|
313
|
+
for silence_start, silence_end in silence_ranges:
|
|
314
|
+
silence_duration = silence_end - silence_start
|
|
315
|
+
silence_midpoint = (silence_start + silence_end) // 2
|
|
316
|
+
absolute_midpoint = window_start + silence_midpoint
|
|
317
|
+
|
|
318
|
+
# Score: prefer longer silences closer to target
|
|
319
|
+
# Distance penalty: further from target = lower score
|
|
320
|
+
distance_from_target = abs(absolute_midpoint - target)
|
|
321
|
+
distance_penalty = 1.0 / (1.0 + distance_from_target / 1000.0)
|
|
322
|
+
|
|
323
|
+
# Duration bonus: longer silence = higher score
|
|
324
|
+
duration_bonus = silence_duration / 1000.0
|
|
325
|
+
|
|
326
|
+
score = duration_bonus * distance_penalty
|
|
327
|
+
|
|
328
|
+
if score > best_score:
|
|
329
|
+
best_score = score
|
|
330
|
+
best_silence = absolute_midpoint
|
|
331
|
+
|
|
332
|
+
if best_silence is not None:
|
|
333
|
+
logger.debug(
|
|
334
|
+
f"Found silence at {best_silence/1000:.1f}s "
|
|
335
|
+
f"(target: {target/1000:.1f}s, score: {best_score:.3f})"
|
|
336
|
+
)
|
|
337
|
+
return best_silence
|
|
338
|
+
|
|
339
|
+
# Fallback to target
|
|
340
|
+
return target
|
|
341
|
+
|
|
342
|
+
def cleanup_chunks(self, chunks: list[AudioChunk]) -> None:
|
|
343
|
+
"""
|
|
344
|
+
Clean up chunk files.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
chunks: List of AudioChunk objects to clean up
|
|
348
|
+
"""
|
|
349
|
+
for chunk in chunks:
|
|
350
|
+
try:
|
|
351
|
+
Path(chunk.file_path).unlink(missing_ok=True)
|
|
352
|
+
logger.debug(f"Deleted chunk file: {chunk.file_path}")
|
|
353
|
+
except Exception as e:
|
|
354
|
+
logger.warning(f"Failed to delete {chunk.file_path}: {e}")
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Audio transcriber using OpenAI Whisper API.
|
|
3
|
+
|
|
4
|
+
Lightweight implementation using only requests (no httpx dependency).
|
|
5
|
+
Handles file uploads and response parsing for OpenAI's Whisper API.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
import requests
|
|
13
|
+
from loguru import logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TranscriptionResult:
|
|
17
|
+
"""Result from audio transcription."""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
text: str,
|
|
22
|
+
start_seconds: float,
|
|
23
|
+
end_seconds: float,
|
|
24
|
+
duration_seconds: float,
|
|
25
|
+
language: Optional[str] = None,
|
|
26
|
+
confidence: float = 0.9,
|
|
27
|
+
):
|
|
28
|
+
"""
|
|
29
|
+
Initialize transcription result.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
text: Transcribed text
|
|
33
|
+
start_seconds: Start time of segment
|
|
34
|
+
end_seconds: End time of segment
|
|
35
|
+
duration_seconds: Duration of segment
|
|
36
|
+
language: Detected language (if available)
|
|
37
|
+
confidence: Confidence score (0.0-1.0)
|
|
38
|
+
"""
|
|
39
|
+
self.text = text
|
|
40
|
+
self.start_seconds = start_seconds
|
|
41
|
+
self.end_seconds = end_seconds
|
|
42
|
+
self.duration_seconds = duration_seconds
|
|
43
|
+
self.language = language
|
|
44
|
+
self.confidence = confidence
|
|
45
|
+
|
|
46
|
+
def __repr__(self) -> str:
|
|
47
|
+
return f"TranscriptionResult(start={self.start_seconds:.1f}s, end={self.end_seconds:.1f}s, chars={len(self.text)})"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class AudioTranscriber:
|
|
51
|
+
"""
|
|
52
|
+
Transcribe audio using OpenAI Whisper API.
|
|
53
|
+
|
|
54
|
+
Uses only requests library (no httpx) for minimal dependencies.
|
|
55
|
+
Supports all Whisper-compatible audio formats.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
api_key: Optional[str] = None,
|
|
61
|
+
model: str = "whisper-1",
|
|
62
|
+
language: Optional[str] = None,
|
|
63
|
+
temperature: float = 0.0,
|
|
64
|
+
):
|
|
65
|
+
"""
|
|
66
|
+
Initialize audio transcriber.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
api_key: OpenAI API key (from env if None)
|
|
70
|
+
model: Whisper model name (default: whisper-1)
|
|
71
|
+
language: ISO-639-1 language code (auto-detect if None)
|
|
72
|
+
temperature: Sampling temperature 0.0-1.0 (0 = deterministic)
|
|
73
|
+
"""
|
|
74
|
+
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
|
75
|
+
if not self.api_key:
|
|
76
|
+
logger.warning("No OpenAI API key found - transcription will fail")
|
|
77
|
+
|
|
78
|
+
self.model = model
|
|
79
|
+
self.language = language
|
|
80
|
+
self.temperature = temperature
|
|
81
|
+
self.api_url = "https://api.openai.com/v1/audio/transcriptions"
|
|
82
|
+
|
|
83
|
+
def transcribe_file(
|
|
84
|
+
self,
|
|
85
|
+
audio_path: str | Path,
|
|
86
|
+
start_seconds: float = 0.0,
|
|
87
|
+
end_seconds: Optional[float] = None,
|
|
88
|
+
) -> TranscriptionResult:
|
|
89
|
+
"""
|
|
90
|
+
Transcribe audio file using OpenAI Whisper API.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
audio_path: Path to audio file
|
|
94
|
+
start_seconds: Start time (for metadata only)
|
|
95
|
+
end_seconds: End time (for metadata, auto-detect if None)
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
TranscriptionResult with text and metadata
|
|
99
|
+
|
|
100
|
+
Raises:
|
|
101
|
+
ValueError: If API key missing or file invalid
|
|
102
|
+
RuntimeError: If API request fails
|
|
103
|
+
"""
|
|
104
|
+
if not self.api_key:
|
|
105
|
+
raise ValueError("OpenAI API key required for transcription")
|
|
106
|
+
|
|
107
|
+
audio_path = Path(audio_path)
|
|
108
|
+
if not audio_path.exists():
|
|
109
|
+
raise FileNotFoundError(f"Audio file not found: {audio_path}")
|
|
110
|
+
|
|
111
|
+
file_size_mb = audio_path.stat().st_size / (1024 * 1024)
|
|
112
|
+
logger.info(
|
|
113
|
+
f"Transcribing {audio_path.name} ({file_size_mb:.1f} MB) "
|
|
114
|
+
f"with Whisper API"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Check file size (Whisper API limit: 25 MB)
|
|
118
|
+
if file_size_mb > 25:
|
|
119
|
+
raise ValueError(
|
|
120
|
+
f"Audio file too large: {file_size_mb:.1f} MB "
|
|
121
|
+
"(max 25 MB for Whisper API)"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Prepare request
|
|
125
|
+
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
126
|
+
|
|
127
|
+
# Build form data
|
|
128
|
+
data = {
|
|
129
|
+
"model": self.model,
|
|
130
|
+
"response_format": "text", # Simple text response
|
|
131
|
+
"temperature": self.temperature,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if self.language:
|
|
135
|
+
data["language"] = self.language
|
|
136
|
+
|
|
137
|
+
# Open file and make request
|
|
138
|
+
try:
|
|
139
|
+
with open(audio_path, "rb") as audio_file:
|
|
140
|
+
files = {"file": (audio_path.name, audio_file, "audio/wav")}
|
|
141
|
+
|
|
142
|
+
logger.debug(f"Sending request to {self.api_url}")
|
|
143
|
+
response = requests.post(
|
|
144
|
+
self.api_url,
|
|
145
|
+
headers=headers,
|
|
146
|
+
data=data,
|
|
147
|
+
files=files,
|
|
148
|
+
timeout=120.0, # 2 minute timeout
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Check response
|
|
152
|
+
if response.status_code != 200:
|
|
153
|
+
error_detail = response.text
|
|
154
|
+
logger.error(
|
|
155
|
+
f"Whisper API error: {response.status_code} - {error_detail}"
|
|
156
|
+
)
|
|
157
|
+
raise RuntimeError(
|
|
158
|
+
f"Transcription failed: {response.status_code} - {error_detail}"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Extract text
|
|
162
|
+
transcription_text = response.text.strip()
|
|
163
|
+
logger.info(
|
|
164
|
+
f"✓ Transcription complete: {len(transcription_text)} characters"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Calculate duration (use provided or estimate)
|
|
168
|
+
if end_seconds is None:
|
|
169
|
+
# Estimate from file size (rough approximation)
|
|
170
|
+
# WAV: ~10KB per second at 16kHz mono
|
|
171
|
+
# This is very rough, but better than nothing
|
|
172
|
+
end_seconds = start_seconds + (file_size_mb * 1024 * 10)
|
|
173
|
+
|
|
174
|
+
duration = end_seconds - start_seconds
|
|
175
|
+
|
|
176
|
+
return TranscriptionResult(
|
|
177
|
+
text=transcription_text,
|
|
178
|
+
start_seconds=start_seconds,
|
|
179
|
+
end_seconds=end_seconds,
|
|
180
|
+
duration_seconds=duration,
|
|
181
|
+
language=self.language,
|
|
182
|
+
confidence=0.9, # Whisper doesn't provide confidence
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
except requests.exceptions.Timeout:
|
|
186
|
+
logger.error("Whisper API request timed out")
|
|
187
|
+
raise RuntimeError("Transcription timed out after 120 seconds")
|
|
188
|
+
except requests.exceptions.RequestException as e:
|
|
189
|
+
logger.error(f"Request error: {e}")
|
|
190
|
+
raise RuntimeError(f"Transcription request failed: {e}")
|
|
191
|
+
except Exception as e:
|
|
192
|
+
logger.error(f"Unexpected error during transcription: {e}")
|
|
193
|
+
raise
|
|
194
|
+
|
|
195
|
+
def transcribe_chunks(
|
|
196
|
+
self,
|
|
197
|
+
chunks: list, # List of AudioChunk objects from AudioChunker
|
|
198
|
+
) -> list[TranscriptionResult]:
|
|
199
|
+
"""
|
|
200
|
+
Transcribe multiple audio chunks.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
chunks: List of AudioChunk objects from AudioChunker
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
List of TranscriptionResult objects
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
ValueError: If API key missing
|
|
210
|
+
RuntimeError: If any transcription fails
|
|
211
|
+
"""
|
|
212
|
+
if not self.api_key:
|
|
213
|
+
raise ValueError("OpenAI API key required for transcription")
|
|
214
|
+
|
|
215
|
+
logger.info(f"Transcribing {len(chunks)} audio chunks")
|
|
216
|
+
|
|
217
|
+
results = []
|
|
218
|
+
total_duration = sum(c.duration_seconds for c in chunks)
|
|
219
|
+
estimated_cost = (total_duration / 60) * 0.006 # $0.006 per minute
|
|
220
|
+
|
|
221
|
+
logger.info(
|
|
222
|
+
f"Estimated cost: ${estimated_cost:.3f} "
|
|
223
|
+
f"(${total_duration / 60:.1f} minutes)"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
for i, chunk in enumerate(chunks, 1):
|
|
227
|
+
logger.info(
|
|
228
|
+
f"Processing chunk {i}/{len(chunks)} "
|
|
229
|
+
f"({chunk.start_seconds:.1f}s - {chunk.end_seconds:.1f}s)"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
try:
|
|
233
|
+
result = self.transcribe_file(
|
|
234
|
+
chunk.file_path,
|
|
235
|
+
start_seconds=chunk.start_seconds,
|
|
236
|
+
end_seconds=chunk.end_seconds,
|
|
237
|
+
)
|
|
238
|
+
results.append(result)
|
|
239
|
+
logger.debug(f"✓ Chunk {i} transcribed: {len(result.text)} chars")
|
|
240
|
+
|
|
241
|
+
except Exception as e:
|
|
242
|
+
logger.error(f"Failed to transcribe chunk {i}: {e}")
|
|
243
|
+
# Add error result
|
|
244
|
+
results.append(
|
|
245
|
+
TranscriptionResult(
|
|
246
|
+
text=f"[Transcription failed: {e}]",
|
|
247
|
+
start_seconds=chunk.start_seconds,
|
|
248
|
+
end_seconds=chunk.end_seconds,
|
|
249
|
+
duration_seconds=chunk.duration_seconds,
|
|
250
|
+
confidence=0.0,
|
|
251
|
+
)
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
successful = sum(1 for r in results if r.confidence > 0)
|
|
255
|
+
logger.info(
|
|
256
|
+
f"Transcription complete: {successful}/{len(chunks)} chunks successful"
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
return results
|