remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +565 -0
  44. rem/cli/commands/configure.py +423 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1124 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +88 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +657 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +229 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.2.6.dist-info/METADATA +1191 -0
  185. remdb-0.2.6.dist-info/RECORD +187 -0
  186. remdb-0.2.6.dist-info/WHEEL +4 -0
  187. remdb-0.2.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,354 @@
1
+ """
2
+ Audio chunker - splits audio by silence near minute boundaries.
3
+
4
+ Lightweight implementation using minimal dependencies:
5
+ - wave (stdlib) for WAV files
6
+ - pydub (optional) for format conversion
7
+
8
+ Design: Split audio near minute boundaries (58-62s range) at silence points.
9
+ This optimizes for OpenAI Whisper API 25MB file size limits while maintaining
10
+ natural speech boundaries.
11
+ """
12
+
13
+ import struct
14
+ import tempfile
15
+ import wave
16
+ from pathlib import Path
17
+ from typing import Optional
18
+
19
+ from loguru import logger
20
+
21
+ # Check for pydub availability (optional for non-WAV formats)
22
+ try:
23
+ from pydub import AudioSegment
24
+ from pydub.silence import detect_silence
25
+
26
+ PYDUB_AVAILABLE = True
27
+ except ImportError:
28
+ PYDUB_AVAILABLE = False
29
+ logger.warning("pydub not available - only WAV files will be supported")
30
+
31
+
32
+ class AudioChunk:
33
+ """Represents a chunk of audio with temporal boundaries."""
34
+
35
+ def __init__(
36
+ self,
37
+ file_path: str,
38
+ start_ms: int,
39
+ end_ms: int,
40
+ chunk_index: int,
41
+ ):
42
+ """
43
+ Initialize audio chunk.
44
+
45
+ Args:
46
+ file_path: Path to temporary audio file for this chunk
47
+ start_ms: Start time in milliseconds
48
+ end_ms: End time in milliseconds
49
+ chunk_index: Index of this chunk in sequence
50
+ """
51
+ self.file_path = file_path
52
+ self.start_ms = start_ms
53
+ self.end_ms = end_ms
54
+ self.chunk_index = chunk_index
55
+ self.duration_ms = end_ms - start_ms
56
+
57
+ @property
58
+ def start_seconds(self) -> float:
59
+ """Start time in seconds."""
60
+ return self.start_ms / 1000.0
61
+
62
+ @property
63
+ def end_seconds(self) -> float:
64
+ """End time in seconds."""
65
+ return self.end_ms / 1000.0
66
+
67
+ @property
68
+ def duration_seconds(self) -> float:
69
+ """Duration in seconds."""
70
+ return self.duration_ms / 1000.0
71
+
72
+ def __repr__(self) -> str:
73
+ return f"AudioChunk(index={self.chunk_index}, start={self.start_seconds:.1f}s, end={self.end_seconds:.1f}s, duration={self.duration_seconds:.1f}s)"
74
+
75
+
76
+ class AudioChunker:
77
+ """
78
+ Chunks audio files by silence near minute boundaries.
79
+
80
+ Strategy:
81
+ 1. Target chunks around 60 seconds (configurable)
82
+ 2. Look for silence in a window around the target (e.g., 58-62s)
83
+ 3. Split at the longest silence in that window
84
+ 4. If no silence found, split at the target boundary
85
+
86
+ This creates natural breaks while keeping chunks under OpenAI's
87
+ 25MB file size limit (~10 minutes of audio at typical bitrates).
88
+ """
89
+
90
+ def __init__(
91
+ self,
92
+ target_chunk_seconds: float = 60.0,
93
+ chunk_window_seconds: float = 2.0,
94
+ silence_threshold_db: float = -40.0,
95
+ min_silence_ms: int = 500,
96
+ ):
97
+ """
98
+ Initialize audio chunker.
99
+
100
+ Args:
101
+ target_chunk_seconds: Target chunk duration (default: 60s)
102
+ chunk_window_seconds: Window around target to search for silence (±seconds)
103
+ silence_threshold_db: dB threshold for silence detection (lower = stricter)
104
+ min_silence_ms: Minimum silence duration to consider (milliseconds)
105
+ """
106
+ self.target_chunk_ms = int(target_chunk_seconds * 1000)
107
+ self.chunk_window_ms = int(chunk_window_seconds * 1000)
108
+ self.silence_threshold_db = silence_threshold_db
109
+ self.min_silence_ms = min_silence_ms
110
+
111
+ def chunk_audio(
112
+ self,
113
+ audio_path: str | Path,
114
+ output_dir: Optional[str | Path] = None,
115
+ ) -> list[AudioChunk]:
116
+ """
117
+ Chunk audio file by silence near minute boundaries.
118
+
119
+ Args:
120
+ audio_path: Path to audio file (WAV, M4A, MP3, etc.)
121
+ output_dir: Directory for chunk files (temp dir if None)
122
+
123
+ Returns:
124
+ List of AudioChunk objects
125
+
126
+ Raises:
127
+ ValueError: If audio format not supported
128
+ RuntimeError: If pydub not available for non-WAV files
129
+ """
130
+ audio_path = Path(audio_path)
131
+
132
+ if not audio_path.exists():
133
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
134
+
135
+ # Determine output directory
136
+ if output_dir is None:
137
+ output_dir = Path(tempfile.mkdtemp(prefix="rem_audio_chunks_"))
138
+ else:
139
+ output_dir = Path(output_dir)
140
+ output_dir.mkdir(parents=True, exist_ok=True)
141
+
142
+ logger.info(f"Chunking audio: {audio_path}")
143
+ logger.info(
144
+ f"Target: {self.target_chunk_ms/1000:.0f}s chunks, "
145
+ f"window: ±{self.chunk_window_ms/1000:.0f}s, "
146
+ f"silence: {self.silence_threshold_db}dB"
147
+ )
148
+
149
+ # Load audio (convert to WAV if needed)
150
+ if audio_path.suffix.lower() == ".wav":
151
+ audio = self._load_wav(audio_path)
152
+ elif PYDUB_AVAILABLE:
153
+ logger.info(f"Converting {audio_path.suffix} to AudioSegment")
154
+ audio = AudioSegment.from_file(str(audio_path))
155
+ else:
156
+ raise RuntimeError(
157
+ f"pydub required for {audio_path.suffix} files. "
158
+ "Install with: pip install pydub"
159
+ )
160
+
161
+ duration_ms = len(audio)
162
+ logger.info(f"Audio duration: {duration_ms/1000:.1f}s")
163
+
164
+ # Find chunk boundaries
165
+ boundaries = self._find_chunk_boundaries(audio, duration_ms)
166
+ logger.info(f"Found {len(boundaries)-1} chunk boundaries: {[f'{b/1000:.1f}s' for b in boundaries]}")
167
+
168
+ # Create chunks
169
+ chunks = []
170
+ for i in range(len(boundaries) - 1):
171
+ start_ms = boundaries[i]
172
+ end_ms = boundaries[i + 1]
173
+
174
+ # Extract segment
175
+ segment = audio[start_ms:end_ms]
176
+
177
+ # Save to file
178
+ chunk_filename = f"chunk_{i:03d}_{start_ms}_{end_ms}.wav"
179
+ chunk_path = output_dir / chunk_filename
180
+
181
+ segment.export(str(chunk_path), format="wav")
182
+
183
+ chunk = AudioChunk(
184
+ file_path=str(chunk_path),
185
+ start_ms=start_ms,
186
+ end_ms=end_ms,
187
+ chunk_index=i,
188
+ )
189
+ chunks.append(chunk)
190
+ logger.debug(f"Created {chunk}")
191
+
192
+ logger.info(f"Created {len(chunks)} chunks in {output_dir}")
193
+ return chunks
194
+
195
+ def _load_wav(self, wav_path: Path) -> "AudioSegment":
196
+ """
197
+ Load WAV file using pydub or wave module.
198
+
199
+ Args:
200
+ wav_path: Path to WAV file
201
+
202
+ Returns:
203
+ AudioSegment
204
+
205
+ Raises:
206
+ ValueError: If WAV file invalid
207
+ """
208
+ if PYDUB_AVAILABLE:
209
+ return AudioSegment.from_wav(str(wav_path))
210
+
211
+ # Fallback: use wave module and convert to AudioSegment-like interface
212
+ # This is a minimal implementation for WAV-only support
213
+ raise RuntimeError(
214
+ "pydub required for audio processing. Install with: pip install pydub"
215
+ )
216
+
217
+ def _find_chunk_boundaries(
218
+ self,
219
+ audio: "AudioSegment",
220
+ duration_ms: int,
221
+ ) -> list[int]:
222
+ """
223
+ Find chunk boundaries by detecting silence near target intervals.
224
+
225
+ Strategy:
226
+ 1. Start at 0, target boundary at 60s
227
+ 2. Look for silence in window [58s, 62s]
228
+ 3. Split at longest silence in window
229
+ 4. If no silence, split at target (60s)
230
+ 5. Repeat until end of audio
231
+
232
+ Args:
233
+ audio: AudioSegment
234
+ duration_ms: Total audio duration in milliseconds
235
+
236
+ Returns:
237
+ List of boundary timestamps in milliseconds
238
+ """
239
+ boundaries = [0] # Start at beginning
240
+ current_pos = 0
241
+
242
+ while current_pos < duration_ms:
243
+ # Target next boundary
244
+ target_boundary = current_pos + self.target_chunk_ms
245
+
246
+ if target_boundary >= duration_ms:
247
+ # Last chunk - use end of audio
248
+ boundaries.append(duration_ms)
249
+ break
250
+
251
+ # Define search window around target
252
+ window_start = max(
253
+ current_pos, target_boundary - self.chunk_window_ms
254
+ )
255
+ window_end = min(duration_ms, target_boundary + self.chunk_window_ms)
256
+
257
+ # Find best split point (longest silence in window)
258
+ split_point = self._find_best_split(
259
+ audio,
260
+ window_start,
261
+ window_end,
262
+ target_boundary,
263
+ )
264
+
265
+ boundaries.append(split_point)
266
+ current_pos = split_point
267
+
268
+ return boundaries
269
+
270
+ def _find_best_split(
271
+ self,
272
+ audio: "AudioSegment",
273
+ window_start: int,
274
+ window_end: int,
275
+ target: int,
276
+ ) -> int:
277
+ """
278
+ Find best split point in window by detecting silence.
279
+
280
+ Args:
281
+ audio: AudioSegment
282
+ window_start: Start of search window (ms)
283
+ window_end: End of search window (ms)
284
+ target: Target split point (ms)
285
+
286
+ Returns:
287
+ Best split point in milliseconds
288
+ """
289
+ if not PYDUB_AVAILABLE:
290
+ # No pydub - split at target
291
+ return target
292
+
293
+ # Extract window
294
+ window = audio[window_start:window_end]
295
+
296
+ # Detect silence
297
+ silence_ranges = detect_silence(
298
+ window,
299
+ min_silence_len=self.min_silence_ms,
300
+ silence_thresh=self.silence_threshold_db,
301
+ seek_step=10, # Check every 10ms
302
+ )
303
+
304
+ if not silence_ranges:
305
+ # No silence found - split at target
306
+ logger.debug(f"No silence in window [{window_start/1000:.1f}s, {window_end/1000:.1f}s], splitting at target {target/1000:.1f}s")
307
+ return target
308
+
309
+ # Find longest silence closest to target
310
+ best_silence = None
311
+ best_score = float("-inf")
312
+
313
+ for silence_start, silence_end in silence_ranges:
314
+ silence_duration = silence_end - silence_start
315
+ silence_midpoint = (silence_start + silence_end) // 2
316
+ absolute_midpoint = window_start + silence_midpoint
317
+
318
+ # Score: prefer longer silences closer to target
319
+ # Distance penalty: further from target = lower score
320
+ distance_from_target = abs(absolute_midpoint - target)
321
+ distance_penalty = 1.0 / (1.0 + distance_from_target / 1000.0)
322
+
323
+ # Duration bonus: longer silence = higher score
324
+ duration_bonus = silence_duration / 1000.0
325
+
326
+ score = duration_bonus * distance_penalty
327
+
328
+ if score > best_score:
329
+ best_score = score
330
+ best_silence = absolute_midpoint
331
+
332
+ if best_silence is not None:
333
+ logger.debug(
334
+ f"Found silence at {best_silence/1000:.1f}s "
335
+ f"(target: {target/1000:.1f}s, score: {best_score:.3f})"
336
+ )
337
+ return best_silence
338
+
339
+ # Fallback to target
340
+ return target
341
+
342
+ def cleanup_chunks(self, chunks: list[AudioChunk]) -> None:
343
+ """
344
+ Clean up chunk files.
345
+
346
+ Args:
347
+ chunks: List of AudioChunk objects to clean up
348
+ """
349
+ for chunk in chunks:
350
+ try:
351
+ Path(chunk.file_path).unlink(missing_ok=True)
352
+ logger.debug(f"Deleted chunk file: {chunk.file_path}")
353
+ except Exception as e:
354
+ logger.warning(f"Failed to delete {chunk.file_path}: {e}")
@@ -0,0 +1,259 @@
1
+ """
2
+ Audio transcriber using OpenAI Whisper API.
3
+
4
+ Lightweight implementation using only requests (no httpx dependency).
5
+ Handles file uploads and response parsing for OpenAI's Whisper API.
6
+ """
7
+
8
+ import os
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ import requests
13
+ from loguru import logger
14
+
15
+
16
+ class TranscriptionResult:
17
+ """Result from audio transcription."""
18
+
19
+ def __init__(
20
+ self,
21
+ text: str,
22
+ start_seconds: float,
23
+ end_seconds: float,
24
+ duration_seconds: float,
25
+ language: Optional[str] = None,
26
+ confidence: float = 0.9,
27
+ ):
28
+ """
29
+ Initialize transcription result.
30
+
31
+ Args:
32
+ text: Transcribed text
33
+ start_seconds: Start time of segment
34
+ end_seconds: End time of segment
35
+ duration_seconds: Duration of segment
36
+ language: Detected language (if available)
37
+ confidence: Confidence score (0.0-1.0)
38
+ """
39
+ self.text = text
40
+ self.start_seconds = start_seconds
41
+ self.end_seconds = end_seconds
42
+ self.duration_seconds = duration_seconds
43
+ self.language = language
44
+ self.confidence = confidence
45
+
46
+ def __repr__(self) -> str:
47
+ return f"TranscriptionResult(start={self.start_seconds:.1f}s, end={self.end_seconds:.1f}s, chars={len(self.text)})"
48
+
49
+
50
+ class AudioTranscriber:
51
+ """
52
+ Transcribe audio using OpenAI Whisper API.
53
+
54
+ Uses only requests library (no httpx) for minimal dependencies.
55
+ Supports all Whisper-compatible audio formats.
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ api_key: Optional[str] = None,
61
+ model: str = "whisper-1",
62
+ language: Optional[str] = None,
63
+ temperature: float = 0.0,
64
+ ):
65
+ """
66
+ Initialize audio transcriber.
67
+
68
+ Args:
69
+ api_key: OpenAI API key (from env if None)
70
+ model: Whisper model name (default: whisper-1)
71
+ language: ISO-639-1 language code (auto-detect if None)
72
+ temperature: Sampling temperature 0.0-1.0 (0 = deterministic)
73
+ """
74
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
75
+ if not self.api_key:
76
+ logger.warning("No OpenAI API key found - transcription will fail")
77
+
78
+ self.model = model
79
+ self.language = language
80
+ self.temperature = temperature
81
+ self.api_url = "https://api.openai.com/v1/audio/transcriptions"
82
+
83
+ def transcribe_file(
84
+ self,
85
+ audio_path: str | Path,
86
+ start_seconds: float = 0.0,
87
+ end_seconds: Optional[float] = None,
88
+ ) -> TranscriptionResult:
89
+ """
90
+ Transcribe audio file using OpenAI Whisper API.
91
+
92
+ Args:
93
+ audio_path: Path to audio file
94
+ start_seconds: Start time (for metadata only)
95
+ end_seconds: End time (for metadata, auto-detect if None)
96
+
97
+ Returns:
98
+ TranscriptionResult with text and metadata
99
+
100
+ Raises:
101
+ ValueError: If API key missing or file invalid
102
+ RuntimeError: If API request fails
103
+ """
104
+ if not self.api_key:
105
+ raise ValueError("OpenAI API key required for transcription")
106
+
107
+ audio_path = Path(audio_path)
108
+ if not audio_path.exists():
109
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
110
+
111
+ file_size_mb = audio_path.stat().st_size / (1024 * 1024)
112
+ logger.info(
113
+ f"Transcribing {audio_path.name} ({file_size_mb:.1f} MB) "
114
+ f"with Whisper API"
115
+ )
116
+
117
+ # Check file size (Whisper API limit: 25 MB)
118
+ if file_size_mb > 25:
119
+ raise ValueError(
120
+ f"Audio file too large: {file_size_mb:.1f} MB "
121
+ "(max 25 MB for Whisper API)"
122
+ )
123
+
124
+ # Prepare request
125
+ headers = {"Authorization": f"Bearer {self.api_key}"}
126
+
127
+ # Build form data
128
+ data = {
129
+ "model": self.model,
130
+ "response_format": "text", # Simple text response
131
+ "temperature": self.temperature,
132
+ }
133
+
134
+ if self.language:
135
+ data["language"] = self.language
136
+
137
+ # Open file and make request
138
+ try:
139
+ with open(audio_path, "rb") as audio_file:
140
+ files = {"file": (audio_path.name, audio_file, "audio/wav")}
141
+
142
+ logger.debug(f"Sending request to {self.api_url}")
143
+ response = requests.post(
144
+ self.api_url,
145
+ headers=headers,
146
+ data=data,
147
+ files=files,
148
+ timeout=120.0, # 2 minute timeout
149
+ )
150
+
151
+ # Check response
152
+ if response.status_code != 200:
153
+ error_detail = response.text
154
+ logger.error(
155
+ f"Whisper API error: {response.status_code} - {error_detail}"
156
+ )
157
+ raise RuntimeError(
158
+ f"Transcription failed: {response.status_code} - {error_detail}"
159
+ )
160
+
161
+ # Extract text
162
+ transcription_text = response.text.strip()
163
+ logger.info(
164
+ f"✓ Transcription complete: {len(transcription_text)} characters"
165
+ )
166
+
167
+ # Calculate duration (use provided or estimate)
168
+ if end_seconds is None:
169
+ # Estimate from file size (rough approximation)
170
+ # WAV: ~10KB per second at 16kHz mono
171
+ # This is very rough, but better than nothing
172
+ end_seconds = start_seconds + (file_size_mb * 1024 * 10)
173
+
174
+ duration = end_seconds - start_seconds
175
+
176
+ return TranscriptionResult(
177
+ text=transcription_text,
178
+ start_seconds=start_seconds,
179
+ end_seconds=end_seconds,
180
+ duration_seconds=duration,
181
+ language=self.language,
182
+ confidence=0.9, # Whisper doesn't provide confidence
183
+ )
184
+
185
+ except requests.exceptions.Timeout:
186
+ logger.error("Whisper API request timed out")
187
+ raise RuntimeError("Transcription timed out after 120 seconds")
188
+ except requests.exceptions.RequestException as e:
189
+ logger.error(f"Request error: {e}")
190
+ raise RuntimeError(f"Transcription request failed: {e}")
191
+ except Exception as e:
192
+ logger.error(f"Unexpected error during transcription: {e}")
193
+ raise
194
+
195
+ def transcribe_chunks(
196
+ self,
197
+ chunks: list, # List of AudioChunk objects from AudioChunker
198
+ ) -> list[TranscriptionResult]:
199
+ """
200
+ Transcribe multiple audio chunks.
201
+
202
+ Args:
203
+ chunks: List of AudioChunk objects from AudioChunker
204
+
205
+ Returns:
206
+ List of TranscriptionResult objects
207
+
208
+ Raises:
209
+ ValueError: If API key missing
210
+ RuntimeError: If any transcription fails
211
+ """
212
+ if not self.api_key:
213
+ raise ValueError("OpenAI API key required for transcription")
214
+
215
+ logger.info(f"Transcribing {len(chunks)} audio chunks")
216
+
217
+ results = []
218
+ total_duration = sum(c.duration_seconds for c in chunks)
219
+ estimated_cost = (total_duration / 60) * 0.006 # $0.006 per minute
220
+
221
+ logger.info(
222
+ f"Estimated cost: ${estimated_cost:.3f} "
223
+ f"(${total_duration / 60:.1f} minutes)"
224
+ )
225
+
226
+ for i, chunk in enumerate(chunks, 1):
227
+ logger.info(
228
+ f"Processing chunk {i}/{len(chunks)} "
229
+ f"({chunk.start_seconds:.1f}s - {chunk.end_seconds:.1f}s)"
230
+ )
231
+
232
+ try:
233
+ result = self.transcribe_file(
234
+ chunk.file_path,
235
+ start_seconds=chunk.start_seconds,
236
+ end_seconds=chunk.end_seconds,
237
+ )
238
+ results.append(result)
239
+ logger.debug(f"✓ Chunk {i} transcribed: {len(result.text)} chars")
240
+
241
+ except Exception as e:
242
+ logger.error(f"Failed to transcribe chunk {i}: {e}")
243
+ # Add error result
244
+ results.append(
245
+ TranscriptionResult(
246
+ text=f"[Transcription failed: {e}]",
247
+ start_seconds=chunk.start_seconds,
248
+ end_seconds=chunk.end_seconds,
249
+ duration_seconds=chunk.duration_seconds,
250
+ confidence=0.0,
251
+ )
252
+ )
253
+
254
+ successful = sum(1 for r in results if r.confidence > 0)
255
+ logger.info(
256
+ f"Transcription complete: {successful}/{len(chunks)} chunks successful"
257
+ )
258
+
259
+ return results