remdb 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +801 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.7.dist-info/METADATA +1473 -0
  185. remdb-0.3.7.dist-info/RECORD +187 -0
  186. remdb-0.3.7.dist-info/WHEEL +4 -0
  187. remdb-0.3.7.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,308 @@
1
+ # Audio Provider Integration
2
+
3
+ The AudioProvider is now fully integrated into REM's ContentService with a **consistent interface** that matches all other content providers.
4
+
5
+ ## Architecture
6
+
7
+ ```
8
+ ┌────────────────────────────────────────────────────────────┐
9
+ │ ContentService │
10
+ │ (Pluggable Providers) │
11
+ ├────────────────────────────────────────────────────────────┤
12
+ │ │
13
+ │ ┌──────────────┐ ┌────────────┐ ┌─────────────────┐ │
14
+ │ │ TextProvider │→│DocProvider │→│ AudioProvider │ │
15
+ │ └──────────────┘ └────────────┘ └─────────────────┘ │
16
+ │ │ │ │ │
17
+ │ ▼ ▼ ▼ │
18
+ │ extract() extract() extract() │
19
+ │ │ │ │ │
20
+ │ ▼ ▼ ▼ │
21
+ │ Markdown Markdown Markdown │
22
+ │ text text text │
23
+ │ │ │ │ │
24
+ │ └──────────────────┼──────────────────┘ │
25
+ │ │ │
26
+ │ ▼ │
27
+ │ chunk_text() → embed() │
28
+ │ │ │
29
+ │ ▼ │
30
+ │ Save to Database │
31
+ │ (File + Resource entities) │
32
+ └────────────────────────────────────────────────────────────┘
33
+ ```
34
+
35
+ ## Consistent Interface
36
+
37
+ All content providers implement the same `ContentProvider` base class:
38
+
39
+ ```python
40
+ class ContentProvider(ABC):
41
+ @property
42
+ @abstractmethod
43
+ def name(self) -> str:
44
+ """Provider name for logging/debugging."""
45
+ pass
46
+
47
+ @abstractmethod
48
+ def extract(self, content: bytes, metadata: dict[str, Any]) -> dict[str, Any]:
49
+ """
50
+ Extract text content from file bytes.
51
+
52
+ Args:
53
+ content: Raw file bytes
54
+ metadata: File metadata (size, type, etc.)
55
+
56
+ Returns:
57
+ dict with:
58
+ - text: Extracted text content
59
+ - metadata: Additional metadata from extraction (optional)
60
+ """
61
+ pass
62
+ ```
63
+
64
+ ## Provider Implementations
65
+
66
+ ### 1. TextProvider
67
+ ```python
68
+ def extract(self, content: bytes, metadata: dict) -> dict:
69
+ text = content.decode("utf-8")
70
+ return {
71
+ "text": text,
72
+ "metadata": {"line_count": len(text.split("\n"))}
73
+ }
74
+ ```
75
+
76
+ ### 2. DocProvider (Kreuzberg)
77
+ ```python
78
+ def extract(self, content: bytes, metadata: dict) -> dict:
79
+ # Uses Kreuzberg for PDF extraction
80
+ result = extract_file_sync(tmp_path, config=config)
81
+ return {
82
+ "text": result.content,
83
+ "metadata": {"table_count": len(result.tables)}
84
+ }
85
+ ```
86
+
87
+ ### 3. AudioProvider (AudioChunker + Whisper)
88
+ ```python
89
+ def extract(self, content: bytes, metadata: dict) -> dict:
90
+ # 1. Chunk audio by silence
91
+ chunks = chunker.chunk_audio(tmp_path)
92
+
93
+ # 2. Transcribe chunks
94
+ results = transcriber.transcribe_chunks(chunks)
95
+
96
+ # 3. Format as markdown with timestamps
97
+ markdown_parts = []
98
+ for result in results:
99
+ timestamp = f"{result.start_seconds:.1f}s - {result.end_seconds:.1f}s"
100
+ markdown_parts.append(f"## [{timestamp}]\n\n{result.text}\n")
101
+
102
+ return {
103
+ "text": "\n".join(markdown_parts),
104
+ "metadata": {
105
+ "chunk_count": len(chunks),
106
+ "duration_seconds": total_duration,
107
+ "estimated_cost": estimated_cost,
108
+ }
109
+ }
110
+ ```
111
+
112
+ ## Markdown Format
113
+
114
+ All providers return markdown-formatted text. AudioProvider returns:
115
+
116
+ ```markdown
117
+ ## [0.0s - 60.0s]
118
+
119
+ Transcription of first minute goes here...
120
+
121
+ ## [60.0s - 120.0s]
122
+
123
+ Transcription of second minute goes here...
124
+
125
+ ## [120.0s - 180.0s]
126
+
127
+ Transcription of third minute goes here...
128
+ ```
129
+
130
+ This format:
131
+ - ✅ Is valid markdown
132
+ - ✅ Has clear section boundaries
133
+ - ✅ Preserves temporal information
134
+ - ✅ Can be chunked further if needed
135
+ - ✅ Embeds naturally with other content
136
+
137
+ ## Processing Pipeline
138
+
139
+ ### Example: Audio File Processing
140
+
141
+ ```python
142
+ from rem.services.content import ContentService
143
+
144
+ service = ContentService()
145
+
146
+ # Process audio file (same interface as PDF/markdown!)
147
+ result = service.process_uri("s3://bucket/meeting.m4a")
148
+
149
+ # Result structure (same for all providers):
150
+ {
151
+ "uri": "s3://bucket/meeting.m4a",
152
+ "content": "## [0.0s - 60.0s]\n\nDiscussion about...\n\n## [60.0s - 120.0s]...",
153
+ "metadata": {
154
+ "chunk_count": 5,
155
+ "duration_seconds": 300.0,
156
+ "estimated_cost": 0.030,
157
+ "parser": "whisper_api"
158
+ },
159
+ "provider": "audio"
160
+ }
161
+ ```
162
+
163
+ ### End-to-End Processing
164
+
165
+ ```python
166
+ # Process and save to database
167
+ await service.process_and_save(
168
+ uri="s3://bucket/meeting.m4a",
169
+ user_id="user-123"
170
+ )
171
+
172
+ # This automatically:
173
+ # 1. Downloads from S3
174
+ # 2. Chunks audio by silence
175
+ # 3. Transcribes with Whisper
176
+ # 4. Converts to markdown
177
+ # 5. Chunks markdown text
178
+ # 6. Saves File entity
179
+ # 7. Saves Resource entities (one per chunk)
180
+ # 8. Generates embeddings (ready for vector search)
181
+ ```
182
+
183
+ ## Registered Extensions
184
+
185
+ The AudioProvider is automatically registered for:
186
+ - `.wav` - Uncompressed audio
187
+ - `.mp3` - Compressed audio
188
+ - `.m4a` - Apple audio format
189
+ - `.flac` - Lossless compression
190
+ - `.ogg` - Ogg Vorbis
191
+
192
+ ## Graceful Degradation
193
+
194
+ Without OpenAI API key:
195
+ ```python
196
+ result = audio_provider.extract(content, metadata)
197
+
198
+ # Returns:
199
+ {
200
+ "text": "[Audio transcription requires OPENAI_API_KEY environment variable]",
201
+ "metadata": {"error": "missing_api_key"}
202
+ }
203
+ ```
204
+
205
+ Without pydub installed:
206
+ ```python
207
+ # Returns:
208
+ {
209
+ "text": "[Audio processing requires: pip install rem[audio]]",
210
+ "metadata": {"error": "missing_dependencies"}
211
+ }
212
+ ```
213
+
214
+ ## Testing
215
+
216
+ All providers tested for interface consistency:
217
+
218
+ ```bash
219
+ # Run integration tests
220
+ pytest tests/integration/services/test_content_providers.py -v
221
+
222
+ # Results:
223
+ # ✓ test_markdown_provider_interface PASSED
224
+ # ✓ test_pdf_provider_interface PASSED
225
+ # ✓ test_audio_provider_interface PASSED
226
+ # ✓ test_content_service_has_all_providers PASSED
227
+ # ✓ test_markdown_file_processing PASSED
228
+ # ✓ test_audio_file_processing_without_api_key PASSED
229
+ # ✓ test_all_providers_return_text_and_metadata PASSED
230
+ # ✓ test_all_providers_handle_empty_content PASSED
231
+ # ✓ test_markdown_to_audio_consistency PASSED
232
+ # ✓ test_audio_returns_markdown_with_timestamps PASSED
233
+ ```
234
+
235
+ ## Consistency Guarantees
236
+
237
+ All providers:
238
+
239
+ 1. **Accept same input**: `extract(content: bytes, metadata: dict)`
240
+ 2. **Return same structure**: `{"text": str, "metadata": dict}`
241
+ 3. **Return markdown format**: Text is markdown-compatible
242
+ 4. **Handle errors gracefully**: Return error messages, don't crash
243
+ 5. **Register with ContentService**: Via file extension mapping
244
+ 6. **Follow pipeline**: extract → markdown → chunk → embed → save
245
+
246
+ ## Usage Examples
247
+
248
+ ### Process Single File
249
+
250
+ ```python
251
+ from rem.services.content import ContentService
252
+
253
+ service = ContentService()
254
+
255
+ # Process markdown
256
+ md_result = service.process_uri("document.md")
257
+
258
+ # Process PDF
259
+ pdf_result = service.process_uri("report.pdf")
260
+
261
+ # Process audio (same interface!)
262
+ audio_result = service.process_uri("meeting.m4a")
263
+
264
+ # All return same structure
265
+ assert "content" in md_result
266
+ assert "content" in pdf_result
267
+ assert "content" in audio_result
268
+ ```
269
+
270
+ ### Process with S3
271
+
272
+ ```python
273
+ # S3 URI - automatic download and processing
274
+ result = service.process_uri("s3://recordings/standup.m4a")
275
+
276
+ # Transcribed, chunked, and ready to save
277
+ ```
278
+
279
+ ### Custom Provider Registration
280
+
281
+ ```python
282
+ # Register custom provider
283
+ service.register_provider(
284
+ extensions=[".custom"],
285
+ provider=CustomProvider()
286
+ )
287
+
288
+ # Now .custom files use CustomProvider
289
+ ```
290
+
291
+ ## Future Enhancements
292
+
293
+ 1. **Streaming Transcription**: Process long audio files in streams
294
+ 2. **Speaker Diarization**: Identify different speakers
295
+ 3. **Language Detection**: Auto-detect language for transcription
296
+ 4. **Timestamp Refinement**: More accurate timestamps via VAD
297
+ 5. **Batch Processing**: Parallel transcription of multiple files
298
+
299
+ ## Key Takeaways
300
+
301
+ ✅ **Pluggable**: Easy to add new content types
302
+ ✅ **Consistent**: Same interface for all providers
303
+ ✅ **Testable**: All providers tested for consistency
304
+ ✅ **Graceful**: Handles missing dependencies/keys elegantly
305
+ ✅ **Integrated**: Works with ContentService out of the box
306
+ ✅ **Production-Ready**: Error handling, logging, cleanup
307
+
308
+ The AudioProvider is a **first-class citizen** in REM's content processing pipeline!
@@ -0,0 +1,376 @@
1
+ # REM Audio Processing
2
+
3
+ Lightweight audio processing service with minimal dependencies for chunking and transcribing audio files.
4
+
5
+ ## Design Philosophy
6
+
7
+ **Minimal Dependencies:**
8
+ - `wave` (stdlib) for WAV file handling
9
+ - `pydub` for audio format conversion (wraps ffmpeg)
10
+ - `requests` for OpenAI Whisper API (already a REM dependency)
11
+ - `loguru` for logging (REM standard)
12
+
13
+ **No Heavy ML Libraries:**
14
+ - No `torch`, `torchaudio`, or other heavyweight dependencies
15
+ - No `librosa` for audio analysis
16
+ - Keep the Docker image lean and fast
17
+
18
+ ## Architecture
19
+
20
+ ```
21
+ ┌─────────────────────────────────────────────────────────┐
22
+ │ REM Audio Service │
23
+ ├─────────────────────────────────────────────────────────┤
24
+ │ │
25
+ │ ┌──────────────┐ ┌──────────────┐ │
26
+ │ │ AudioChunker │────────▶│AudioTranscriber│ │
27
+ │ └──────────────┘ └──────────────┘ │
28
+ │ │ │ │
29
+ │ │ │ │
30
+ │ Split by silence OpenAI Whisper API │
31
+ │ near minute ($0.006/minute) │
32
+ │ boundaries │
33
+ │ │
34
+ └─────────────────────────────────────────────────────────┘
35
+ ```
36
+
37
+ ## Components
38
+
39
+ ### 1. AudioChunker
40
+
41
+ Splits audio files by detecting silence near minute boundaries.
42
+
43
+ **Strategy:**
44
+ - Target chunks around 60 seconds (configurable)
45
+ - Look for silence in window around target (±2 seconds)
46
+ - Split at longest silence in window
47
+ - If no silence, split at target boundary
48
+
49
+ **Benefits:**
50
+ - Keeps chunks under OpenAI's 25MB limit (~10 minutes)
51
+ - Natural breaks at silence points
52
+ - Maintains speech context within chunks
53
+
54
+ **Example:**
55
+ ```python
56
+ from rem.services.audio import AudioChunker
57
+
58
+ chunker = AudioChunker(
59
+ target_chunk_seconds=60.0, # 1 minute target
60
+ chunk_window_seconds=2.0, # ±2 second search window
61
+ silence_threshold_db=-40.0, # Silence detection threshold
62
+ min_silence_ms=500, # Minimum 500ms silence
63
+ )
64
+
65
+ # Chunk audio file
66
+ chunks = chunker.chunk_audio("recording.m4a")
67
+
68
+ # Process chunks
69
+ for chunk in chunks:
70
+ print(f"Chunk {chunk.chunk_index}: {chunk.start_seconds:.1f}s - {chunk.end_seconds:.1f}s")
71
+ print(f"Duration: {chunk.duration_seconds:.1f}s")
72
+ print(f"File: {chunk.file_path}")
73
+
74
+ # Cleanup when done
75
+ chunker.cleanup_chunks(chunks)
76
+ ```
77
+
78
+ ### 2. AudioTranscriber
79
+
80
+ Transcribes audio using OpenAI Whisper API.
81
+
82
+ **Features:**
83
+ - Uses `requests` (no httpx dependency)
84
+ - Handles file uploads efficiently
85
+ - Automatic cost estimation
86
+ - Detailed logging with loguru
87
+
88
+ **Example:**
89
+ ```python
90
+ from rem.services.audio import AudioTranscriber
91
+
92
+ transcriber = AudioTranscriber(
93
+ api_key="sk-...", # Or from OPENAI_API_KEY env
94
+ model="whisper-1", # OpenAI Whisper model
95
+ language=None, # Auto-detect language
96
+ temperature=0.0, # Deterministic transcription
97
+ )
98
+
99
+ # Transcribe single file
100
+ result = transcriber.transcribe_file("audio.wav")
101
+ print(result.text)
102
+
103
+ # Transcribe chunks
104
+ results = transcriber.transcribe_chunks(chunks)
105
+ for result in results:
106
+ print(f"[{result.start_seconds:.1f}s - {result.end_seconds:.1f}s]: {result.text}")
107
+ ```
108
+
109
+ ### 3. Complete Workflow
110
+
111
+ ```python
112
+ from rem.services.audio import AudioChunker, AudioTranscriber
113
+
114
+ # 1. Chunk audio by silence
115
+ chunker = AudioChunker()
116
+ chunks = chunker.chunk_audio("meeting_recording.m4a")
117
+
118
+ print(f"Created {len(chunks)} chunks")
119
+
120
+ # 2. Transcribe chunks
121
+ transcriber = AudioTranscriber()
122
+ results = transcriber.transcribe_chunks(chunks)
123
+
124
+ print(f"Transcribed {len(results)} chunks")
125
+
126
+ # 3. Combine results
127
+ full_transcription = "\n\n".join([
128
+ f"[{r.start_seconds:.1f}s]: {r.text}"
129
+ for r in results
130
+ ])
131
+
132
+ print(full_transcription)
133
+
134
+ # 4. Cleanup
135
+ chunker.cleanup_chunks(chunks)
136
+ ```
137
+
138
+ ## Configuration
139
+
140
+ ### Environment Variables
141
+
142
+ ```bash
143
+ # OpenAI API Key (required for transcription)
144
+ OPENAI_API_KEY=sk-...
145
+
146
+ # Chunker Settings (optional)
147
+ AUDIO_CHUNK_TARGET_SECONDS=60 # Target chunk duration
148
+ AUDIO_CHUNK_WINDOW_SECONDS=2 # Silence search window
149
+ AUDIO_SILENCE_THRESHOLD_DB=-40 # Silence detection threshold
150
+ AUDIO_MIN_SILENCE_MS=500 # Minimum silence duration
151
+ ```
152
+
153
+ ### Transcription Costs
154
+
155
+ OpenAI Whisper API pricing: **$0.006 per minute**
156
+
157
+ Examples:
158
+ - 10 minute recording: $0.06
159
+ - 1 hour recording: $0.36
160
+ - 10 hour recording: $3.60
161
+
162
+ ## Supported Formats
163
+
164
+ ### With pydub + ffmpeg:
165
+ - WAV (uncompressed)
166
+ - MP3 (compressed)
167
+ - M4A (Apple audio)
168
+ - FLAC (lossless)
169
+ - OGG (Vorbis)
170
+ - WMA (Windows)
171
+
172
+ ### Without pydub:
173
+ - Only WAV files (requires pydub for format conversion)
174
+
175
+ ## Docker Setup
176
+
177
+ The Dockerfile includes ffmpeg for audio processing:
178
+
179
+ ```dockerfile
180
+ # Runtime dependencies
181
+ RUN apt-get install -y \
182
+ ffmpeg # Required by pydub for format conversion
183
+ ```
184
+
185
+ Install pydub dependency:
186
+
187
+ ```bash
188
+ # Install audio extras
189
+ pip install rem[audio]
190
+
191
+ # Or install all extras
192
+ pip install rem[all]
193
+ ```
194
+
195
+ ## Dependencies
196
+
197
+ ### Core (always installed with rem[audio]):
198
+ - `pydub>=0.25.0` - Audio manipulation
199
+
200
+ ### System (Docker):
201
+ - `ffmpeg` - Audio codec support (installed in Dockerfile)
202
+
203
+ ### External APIs:
204
+ - OpenAI Whisper API - Speech-to-text transcription
205
+
206
+ ## Error Handling
207
+
208
+ ### Missing API Key
209
+ ```python
210
+ transcriber = AudioTranscriber() # No API key
211
+
212
+ # Raises: ValueError("OpenAI API key required for transcription")
213
+ result = transcriber.transcribe_file("audio.wav")
214
+ ```
215
+
216
+ ### File Too Large
217
+ ```python
218
+ # Whisper API limit: 25 MB
219
+ transcriber.transcribe_file("huge_file.wav")
220
+
221
+ # Raises: ValueError("Audio file too large: 30.5 MB (max 25 MB)")
222
+ ```
223
+
224
+ ### No pydub
225
+ ```python
226
+ # Without pydub installed
227
+ chunker = AudioChunker()
228
+ chunker.chunk_audio("audio.m4a")
229
+
230
+ # Raises: RuntimeError("pydub required for .m4a files")
231
+ ```
232
+
233
+ ## Best Practices
234
+
235
+ 1. **Chunk Before Transcribing**
236
+ - Don't send entire 2-hour recordings to Whisper
237
+ - Chunk into 1-minute segments for better quality
238
+ - Easier to debug and retry failed segments
239
+
240
+ 2. **Monitor Costs**
241
+ - Log transcription duration and cost
242
+ - Set budgets for long recordings
243
+ - Use `transcriber.transcribe_chunks()` for cost estimation
244
+
245
+ 3. **Handle Failures Gracefully**
246
+ - Chunks can fail independently
247
+ - Retry logic for transient errors
248
+ - Save partial results
249
+
250
+ 4. **Cleanup Temporary Files**
251
+ - Always call `chunker.cleanup_chunks()` when done
252
+ - Or use context manager (future enhancement)
253
+
254
+ 5. **Use Silence Detection**
255
+ - Default settings work well for most speech
256
+ - Adjust `silence_threshold_db` for noisy recordings
257
+ - Increase `min_silence_ms` for natural pauses
258
+
259
+ ## Integration with REM
260
+
261
+ ### File Processing
262
+
263
+ ```python
264
+ # rem/workers/file_processor.py
265
+ from rem.services.audio import AudioChunker, AudioTranscriber
266
+
267
+ async def process_audio_file(file_path: Path, user_id: str):
268
+ """Process audio file and create REM resources."""
269
+
270
+ # 1. Chunk audio
271
+ chunker = AudioChunker()
272
+ chunks = chunker.chunk_audio(file_path)
273
+
274
+ # 2. Transcribe chunks
275
+ transcriber = AudioTranscriber()
276
+ results = transcriber.transcribe_chunks(chunks)
277
+
278
+ # 3. Create REM resources
279
+ for i, result in enumerate(results):
280
+ resource = Resource(
281
+ name=f"{file_path.stem} - Part {i+1}",
282
+ uri=f"{file_path.as_uri()}#t={result.start_seconds},{result.end_seconds}",
283
+ content=result.text,
284
+ timestamp=datetime.now(),
285
+ category="transcription",
286
+ user_id=user_id,
287
+ )
288
+ await repository.upsert(resource)
289
+
290
+ # 4. Cleanup
291
+ chunker.cleanup_chunks(chunks)
292
+ ```
293
+
294
+ ### Dreaming Worker
295
+
296
+ ```python
297
+ # rem/workers/dreaming.py
298
+ from rem.services.audio import AudioChunker, AudioTranscriber
299
+
300
+ async def extract_moments_from_audio(audio_resource: Resource):
301
+ """Extract moments from audio transcription."""
302
+
303
+ # Audio already transcribed and stored as Resource
304
+ # Use transcription content to identify temporal moments
305
+
306
+ # Example: Split by speaker changes, topic shifts, etc.
307
+ moments = extract_temporal_segments(audio_resource.content)
308
+
309
+ for moment in moments:
310
+ await repository.upsert(moment)
311
+ ```
312
+
313
+ ## Logging
314
+
315
+ All logs use loguru (REM standard):
316
+
317
+ ```python
318
+ from loguru import logger
319
+
320
+ # Chunker logs
321
+ logger.info("Chunking audio: /path/to/file.m4a")
322
+ logger.debug("Found silence at 58.3s (target: 60.0s)")
323
+ logger.info("Created 5 chunks in /tmp/rem_audio_chunks_xyz")
324
+
325
+ # Transcriber logs
326
+ logger.info("Transcribing chunk 1/5 (58.0s - 118.0s)")
327
+ logger.debug("Sending 2.3 MB to OpenAI Whisper API")
328
+ logger.info("✓ Transcription complete: 245 characters")
329
+ logger.info("Estimated cost: $0.180 (30.0 minutes)")
330
+ ```
331
+
332
+ ## Testing
333
+
334
+ ```bash
335
+ # Run audio service tests
336
+ pytest tests/unit/services/audio/
337
+
338
+ # Test with real files (requires OpenAI API key)
339
+ export OPENAI_API_KEY=sk-...
340
+ pytest tests/integration/services/audio/
341
+ ```
342
+
343
+ ## Future Enhancements
344
+
345
+ 1. **Context Manager for Cleanup**
346
+ ```python
347
+ with AudioChunker() as chunker:
348
+ chunks = chunker.chunk_audio("file.m4a")
349
+ # Auto-cleanup on exit
350
+ ```
351
+
352
+ 2. **Batch Transcription**
353
+ - Parallel API requests
354
+ - Rate limiting
355
+ - Progress tracking
356
+
357
+ 3. **Speaker Diarization**
358
+ - Detect speaker changes
359
+ - Label speakers
360
+ - Split on speaker boundaries
361
+
362
+ 4. **Advanced Silence Detection**
363
+ - Machine learning-based VAD
364
+ - Energy-based fallback
365
+ - Adaptive thresholds
366
+
367
+ 5. **Format Detection**
368
+ - Auto-detect audio format
369
+ - Validate before processing
370
+ - Better error messages
371
+
372
+ ## References
373
+
374
+ - [OpenAI Whisper API](https://platform.openai.com/docs/guides/speech-to-text)
375
+ - [pydub Documentation](https://github.com/jiaaro/pydub)
376
+ - [ffmpeg Documentation](https://ffmpeg.org/documentation.html)
@@ -0,0 +1,15 @@
1
+ """
2
+ Audio processing service for REM.
3
+
4
+ Lightweight audio processing with minimal dependencies:
5
+ - wav module (stdlib) for WAV file handling
6
+ - pydub (optional) for format conversion (M4A, MP3, etc.)
7
+ - requests (already a dependency) for OpenAI Whisper API
8
+
9
+ No torch, torchaudio, or other heavy ML dependencies.
10
+ """
11
+
12
+ from .chunker import AudioChunker
13
+ from .transcriber import AudioTranscriber
14
+
15
+ __all__ = ["AudioChunker", "AudioTranscriber"]