fow-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. fly_on_the_wall/__init__.py +3 -0
  2. fly_on_the_wall/audio.py +164 -0
  3. fly_on_the_wall/audio_metadata.py +241 -0
  4. fly_on_the_wall/cache.py +26 -0
  5. fly_on_the_wall/cleanup.py +29 -0
  6. fly_on_the_wall/cli.py +641 -0
  7. fly_on_the_wall/cli_costs.py +81 -0
  8. fly_on_the_wall/cli_menu.py +163 -0
  9. fly_on_the_wall/cli_publish.py +141 -0
  10. fly_on_the_wall/cli_speaker_review.py +315 -0
  11. fly_on_the_wall/cli_watch.py +209 -0
  12. fly_on_the_wall/config.py +92 -0
  13. fly_on_the_wall/costs.py +169 -0
  14. fly_on_the_wall/db.py +508 -0
  15. fly_on_the_wall/doctor.py +142 -0
  16. fly_on_the_wall/embeddings.py +142 -0
  17. fly_on_the_wall/exporting.py +155 -0
  18. fly_on_the_wall/glossary.py +31 -0
  19. fly_on_the_wall/meetings.py +382 -0
  20. fly_on_the_wall/normalization.py +166 -0
  21. fly_on_the_wall/people.py +82 -0
  22. fly_on_the_wall/people_embeddings.py +68 -0
  23. fly_on_the_wall/pipeline.py +120 -0
  24. fly_on_the_wall/processing.py +427 -0
  25. fly_on_the_wall/providers/__init__.py +1 -0
  26. fly_on_the_wall/providers/elevenlabs.py +145 -0
  27. fly_on_the_wall/providers/openai_analysis.py +195 -0
  28. fly_on_the_wall/providers/openai_cleanup.py +91 -0
  29. fly_on_the_wall/publishing.py +410 -0
  30. fly_on_the_wall/reanalysis.py +172 -0
  31. fly_on_the_wall/recording_quality.py +141 -0
  32. fly_on_the_wall/rendering.py +115 -0
  33. fly_on_the_wall/secrets.py +93 -0
  34. fly_on_the_wall/service_pricing.py +75 -0
  35. fly_on_the_wall/setup.py +221 -0
  36. fly_on_the_wall/speaker_identity.py +173 -0
  37. fly_on_the_wall/speaker_matching.py +134 -0
  38. fly_on_the_wall/speakers.py +221 -0
  39. fly_on_the_wall/storage.py +53 -0
  40. fly_on_the_wall/voice_samples.py +125 -0
  41. fly_on_the_wall/watch.py +347 -0
  42. fow_cli-0.1.0.dist-info/METADATA +447 -0
  43. fow_cli-0.1.0.dist-info/RECORD +46 -0
  44. fow_cli-0.1.0.dist-info/WHEEL +4 -0
  45. fow_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. fow_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,120 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Sequence
4
+ from dataclasses import dataclass, field
5
+ from sqlite3 import Connection
6
+
7
+ StageStatus = str
8
+
9
+ PENDING: StageStatus = "pending"
10
+ RUNNING: StageStatus = "running"
11
+ DONE: StageStatus = "done"
12
+ FAILED: StageStatus = "failed"
13
+ STALE: StageStatus = "stale"
14
+
15
+
16
+ class PipelineError(RuntimeError):
17
+ """Raised when a pipeline stage cannot run."""
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class Stage:
22
+ name: str
23
+ run: Callable[[Connection, str], None]
24
+ dependencies: tuple[str, ...] = field(default_factory=tuple)
25
+
26
+
27
+ def run_pipeline(
28
+ connection: Connection,
29
+ meeting_id: str,
30
+ stages: Sequence[Stage],
31
+ force: bool = False,
32
+ ) -> list[str]:
33
+ completed: list[str] = []
34
+ stage_by_name = {stage.name: stage for stage in stages}
35
+
36
+ for stage in stages:
37
+ _ensure_dependencies_done(connection, meeting_id, stage, stage_by_name)
38
+ if not force and get_stage_status(connection, meeting_id, stage.name) == DONE:
39
+ continue
40
+ run_stage(connection, meeting_id, stage)
41
+ completed.append(stage.name)
42
+
43
+ return completed
44
+
45
+
46
+ def run_stage(connection: Connection, meeting_id: str, stage: Stage) -> None:
47
+ set_stage_status(connection, meeting_id, stage.name, RUNNING)
48
+ try:
49
+ stage.run(connection, meeting_id)
50
+ except Exception as exc:
51
+ set_stage_status(connection, meeting_id, stage.name, FAILED, str(exc))
52
+ raise
53
+ set_stage_status(connection, meeting_id, stage.name, DONE)
54
+
55
+
56
+ def get_stage_status(connection: Connection, meeting_id: str, stage_name: str) -> StageStatus | None:
57
+ row = connection.execute(
58
+ "SELECT status FROM pipeline_stages WHERE meeting_id = ? AND stage_name = ?",
59
+ (meeting_id, stage_name),
60
+ ).fetchone()
61
+ return None if row is None else row["status"]
62
+
63
+
64
+ def set_stage_status(
65
+ connection: Connection,
66
+ meeting_id: str,
67
+ stage_name: str,
68
+ status: StageStatus,
69
+ error_message: str | None = None,
70
+ ) -> None:
71
+ with connection:
72
+ connection.execute(
73
+ """
74
+ INSERT INTO pipeline_stages(
75
+ meeting_id,
76
+ stage_name,
77
+ status,
78
+ error_message,
79
+ started_at,
80
+ completed_at,
81
+ updated_at
82
+ ) VALUES (
83
+ ?,
84
+ ?,
85
+ ?,
86
+ ?,
87
+ CASE WHEN ? = 'running' THEN CURRENT_TIMESTAMP ELSE NULL END,
88
+ CASE WHEN ? = 'done' THEN CURRENT_TIMESTAMP ELSE NULL END,
89
+ CURRENT_TIMESTAMP
90
+ )
91
+ ON CONFLICT(meeting_id, stage_name) DO UPDATE SET
92
+ status = excluded.status,
93
+ error_message = excluded.error_message,
94
+ started_at = CASE
95
+ WHEN excluded.status = 'running' THEN CURRENT_TIMESTAMP
96
+ ELSE pipeline_stages.started_at
97
+ END,
98
+ completed_at = CASE
99
+ WHEN excluded.status = 'done' THEN CURRENT_TIMESTAMP
100
+ ELSE NULL
101
+ END,
102
+ updated_at = CURRENT_TIMESTAMP
103
+ """,
104
+ (meeting_id, stage_name, status, error_message, status, status),
105
+ )
106
+
107
+
108
+ def mark_stale(connection: Connection, meeting_id: str, stage_names: Sequence[str]) -> None:
109
+ for stage_name in stage_names:
110
+ set_stage_status(connection, meeting_id, stage_name, STALE)
111
+
112
+
113
+ def _ensure_dependencies_done(
114
+ connection: Connection, meeting_id: str, stage: Stage, stage_by_name: dict[str, Stage]
115
+ ) -> None:
116
+ for dependency in stage.dependencies:
117
+ if dependency not in stage_by_name:
118
+ raise PipelineError(f"Unknown dependency {dependency!r} for stage {stage.name!r}.")
119
+ if get_stage_status(connection, meeting_id, dependency) != DONE:
120
+ raise PipelineError(f"Stage {stage.name!r} requires {dependency!r} to be done.")
@@ -0,0 +1,427 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from collections.abc import Callable
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from sqlite3 import Connection
8
+
9
+ from fly_on_the_wall.cache import read_cached_text, text_sha256, write_cached_text
10
+ from fly_on_the_wall.cleanup import deterministic_cleanup
11
+ from fly_on_the_wall.config import AppConfig
12
+ from fly_on_the_wall.costs import record_openai_usage
13
+ from fly_on_the_wall.embeddings import EmbeddingBackend
14
+ from fly_on_the_wall.exporting import ExportResult, export_markdown_transcript
15
+ from fly_on_the_wall.glossary import load_glossary_terms
16
+ from fly_on_the_wall.meetings import (
17
+ Meeting,
18
+ get_meeting,
19
+ import_meeting,
20
+ latest_completed_provider_run,
21
+ update_generated_title,
22
+ )
23
+ from fly_on_the_wall.normalization import normalize_provider_run
24
+ from fly_on_the_wall.providers.elevenlabs import run_transcription
25
+ from fly_on_the_wall.providers.openai_analysis import (
26
+ DEFAULT_ANALYSIS_MODEL,
27
+ AnalysisRequest,
28
+ OpenAIAnalysisError,
29
+ OpenAIRequestOptions,
30
+ TitleRequest,
31
+ analyze_meeting,
32
+ fallback_analysis,
33
+ suggest_meeting_title,
34
+ )
35
+ from fly_on_the_wall.providers.openai_cleanup import (
36
+ CLEANUP_PROMPT_VERSION,
37
+ OpenAICleanupError,
38
+ cleanup_transcript,
39
+ )
40
+ from fly_on_the_wall.providers.openai_cleanup import (
41
+ DEFAULT_MODEL as DEFAULT_CLEANUP_MODEL,
42
+ )
43
+ from fly_on_the_wall.publishing import publish_enabled_targets
44
+ from fly_on_the_wall.recording_quality import (
45
+ RecordingIgnoredError,
46
+ assess_after_transcription,
47
+ assess_before_transcription,
48
+ store_recording_quality,
49
+ )
50
+ from fly_on_the_wall.rendering import render_named_transcript
51
+ from fly_on_the_wall.secrets import get_api_key
52
+ from fly_on_the_wall.speaker_identity import match_provider_run_speakers
53
+ from fly_on_the_wall.storage import StoragePaths, ensure_storage_layout
54
+
55
+ TranscribeFn = Callable[[Connection, str, Path, StoragePaths], str]
56
+ ProgressFn = Callable[[str], None]
57
+
58
+
59
+ @dataclass(frozen=True)
60
+ class ProcessResult:
61
+ meeting: Meeting
62
+ provider_run_id: str
63
+ export: ExportResult
64
+
65
+
66
+ @dataclass(frozen=True)
67
+ class RefreshContext:
68
+ connection: Connection
69
+ meeting: Meeting
70
+ config: AppConfig
71
+ paths: StoragePaths
72
+ description: str | None
73
+ embedding_backend: EmbeddingBackend | None
74
+ progress: TimedProgress
75
+
76
+
77
+ @dataclass(frozen=True)
78
+ class TranscriptArtifacts:
79
+ deterministic: str
80
+ cleaned: str
81
+
82
+
83
+ def process_audio(
84
+ connection: Connection,
85
+ audio_path: Path,
86
+ title: str | None,
87
+ config: AppConfig,
88
+ storage: StoragePaths | None = None,
89
+ description: str | None = None,
90
+ transcribe_fn: TranscribeFn | None = None,
91
+ embedding_backend: EmbeddingBackend | None = None,
92
+ progress: ProgressFn | None = None,
93
+ ) -> ProcessResult:
94
+ paths = storage or ensure_storage_layout()
95
+ timed_progress = TimedProgress(progress)
96
+ with timed_progress.step("Importing audio"):
97
+ meeting = import_meeting(connection, audio_path, title, config, paths, description)
98
+ timed_progress.message(f"Audio duration: {_audio_duration_label(connection, meeting.id)}")
99
+ pre_quality = assess_before_transcription(connection, meeting)
100
+ if pre_quality is not None:
101
+ store_recording_quality(connection, meeting.id, pre_quality)
102
+ if pre_quality.status in {"empty", "nonsense"}:
103
+ timed_progress.message(f"Ignoring recording ({pre_quality.reason})")
104
+ raise RecordingIgnoredError(meeting, pre_quality)
105
+
106
+ existing_provider_run = latest_completed_provider_run(connection, meeting.id)
107
+ if existing_provider_run is None:
108
+ with timed_progress.step("Transcribing audio with ElevenLabs"):
109
+ resolved_transcribe = transcribe_fn or _run_elevenlabs_transcription
110
+ provider_run_id = resolved_transcribe(connection, meeting.id, meeting.imported_audio_path, paths)
111
+ else:
112
+ timed_progress.message("Reusing completed ElevenLabs transcription")
113
+ provider_run_id = existing_provider_run["id"]
114
+ context = RefreshContext(connection, meeting, config, paths, description, embedding_backend, timed_progress)
115
+ export = _refresh_provider_run(context, provider_run_id)
116
+ timed_progress.message(f"Done ({timed_progress.total_elapsed()})")
117
+ return ProcessResult(_meeting_from_database(connection, meeting.id), provider_run_id, export)
118
+
119
+
120
+ def refresh_meeting(
121
+ connection: Connection,
122
+ meeting_id_or_slug: str,
123
+ config: AppConfig,
124
+ storage: StoragePaths | None = None,
125
+ embedding_backend: EmbeddingBackend | None = None,
126
+ progress: ProgressFn | None = None,
127
+ ) -> ProcessResult:
128
+ paths = storage or ensure_storage_layout()
129
+ meeting_row = get_meeting(connection, meeting_id_or_slug)
130
+ if meeting_row is None:
131
+ raise ValueError(f"Meeting not found: {meeting_id_or_slug}")
132
+
133
+ provider_run = latest_completed_provider_run(connection, meeting_row["id"])
134
+ if provider_run is None:
135
+ raise ValueError(f"No completed transcription found for meeting: {meeting_id_or_slug}")
136
+
137
+ meeting = Meeting(
138
+ id=meeting_row["id"],
139
+ slug=meeting_row["slug"],
140
+ title=meeting_row["title"],
141
+ title_source=meeting_row.get("title_source", "manual"),
142
+ language=meeting_row["language"],
143
+ imported_audio_path=Path(meeting_row["imported_audio_path"]),
144
+ audio_sha256=meeting_row.get("audio_sha256"),
145
+ generated_title=meeting_row.get("generated_title"),
146
+ )
147
+ timed_progress = TimedProgress(progress)
148
+ timed_progress.message(f"Refreshing meeting {meeting.slug}")
149
+ context = RefreshContext(
150
+ connection,
151
+ meeting,
152
+ config,
153
+ paths,
154
+ meeting_row.get("description"),
155
+ embedding_backend,
156
+ timed_progress,
157
+ )
158
+ export = _refresh_provider_run(context, provider_run["id"])
159
+ timed_progress.message(f"Done ({timed_progress.total_elapsed()})")
160
+ return ProcessResult(_meeting_from_database(connection, meeting.id), provider_run["id"], export)
161
+
162
+
163
+ def _refresh_provider_run(context: RefreshContext, provider_run_id: str) -> ExportResult:
164
+ with context.progress.step("Normalizing transcript"):
165
+ segments = normalize_provider_run(context.connection, provider_run_id)
166
+ quality = assess_after_transcription(context.connection, context.meeting, segments)
167
+ store_recording_quality(context.connection, context.meeting.id, quality)
168
+ if quality.status in {"empty", "nonsense"}:
169
+ context.progress.message(f"Ignoring recording ({quality.reason})")
170
+ raise RecordingIgnoredError(context.meeting, quality)
171
+
172
+ with context.progress.step("Matching speaker identities"):
173
+ try:
174
+ match_provider_run_speakers(
175
+ context.connection,
176
+ provider_run_id,
177
+ context.embedding_backend,
178
+ context.paths,
179
+ )
180
+ except RuntimeError as exc:
181
+ context.progress.message(f"Speaker identity matching skipped ({exc})")
182
+ with context.progress.step("Rendering named transcript"):
183
+ named_transcript = render_named_transcript(context.connection, provider_run_id, storage=context.paths)
184
+ with context.progress.step("Running deterministic cleanup"):
185
+ deterministic_transcript = deterministic_cleanup(named_transcript)
186
+ artifacts = _cleanup_transcript(context, deterministic_transcript)
187
+ analysis = _analyze_transcript(context, artifacts.cleaned)
188
+ _suggest_and_apply_title(context, artifacts.cleaned, analysis)
189
+
190
+ with context.progress.step("Exporting markdown"):
191
+ export = export_markdown_transcript(
192
+ context.connection, context.meeting.id, artifacts.cleaned, analysis, context.paths
193
+ )
194
+ _publish_enabled_targets(context.connection, context.meeting.id, context.progress)
195
+ return export
196
+
197
+
198
+ def _cleanup_transcript(context: RefreshContext, deterministic_transcript: str) -> TranscriptArtifacts:
199
+ if context.config.cleanup_mode != "light" or not get_api_key("openai"):
200
+ return TranscriptArtifacts(deterministic_transcript, deterministic_transcript)
201
+
202
+ glossary_terms = load_glossary_terms(context.config.glossary_path)
203
+ cleanup_cache_key = text_sha256(
204
+ "\n".join(
205
+ [
206
+ DEFAULT_CLEANUP_MODEL,
207
+ CLEANUP_PROMPT_VERSION,
208
+ context.description or "",
209
+ "\n".join(glossary_terms),
210
+ deterministic_transcript,
211
+ ]
212
+ )
213
+ )
214
+ cleanup_cache_dir = context.paths.artifacts / context.meeting.id / "llm-cleanup"
215
+ cached_cleanup = read_cached_text(cleanup_cache_dir, cleanup_cache_key)
216
+ if cached_cleanup is not None:
217
+ context.progress.message("Reusing OpenAI light cleanup")
218
+ return TranscriptArtifacts(deterministic_transcript, cached_cleanup)
219
+
220
+ with context.progress.step("Running OpenAI light cleanup"):
221
+ try:
222
+ cleaned_transcript = cleanup_transcript(
223
+ deterministic_transcript,
224
+ glossary_terms=glossary_terms,
225
+ meeting_context=context.description,
226
+ usage_callback=lambda response: record_openai_usage(
227
+ context.connection,
228
+ meeting_id=context.meeting.id,
229
+ model=DEFAULT_CLEANUP_MODEL,
230
+ service="cleanup",
231
+ response=response,
232
+ ),
233
+ )
234
+ write_cached_text(cleanup_cache_dir, cleanup_cache_key, cleaned_transcript)
235
+ return TranscriptArtifacts(deterministic_transcript, cleaned_transcript)
236
+ except OpenAICleanupError as exc:
237
+ context.progress.message(f"OpenAI cleanup failed; exporting deterministic cleanup ({exc})")
238
+ return TranscriptArtifacts(deterministic_transcript, deterministic_transcript)
239
+
240
+
241
+ def _publish_enabled_targets(connection: Connection, meeting_id: str, progress: TimedProgress) -> None:
242
+ try:
243
+ published = publish_enabled_targets(connection, meeting_id)
244
+ except ValueError as exc:
245
+ progress.message(f"Publishing skipped ({exc})")
246
+ return
247
+ for result in published:
248
+ progress.message(f"Published to {result.target.name}: {result.output_path}")
249
+
250
+
251
+ def _suggest_and_apply_title(
252
+ context: RefreshContext,
253
+ transcript: str,
254
+ analysis: str,
255
+ ) -> None:
256
+ if not get_api_key("openai"):
257
+ return
258
+
259
+ meeting = get_meeting(context.connection, context.meeting.id)
260
+ if meeting is None:
261
+ raise ValueError(f"Meeting not found: {context.meeting.id}")
262
+
263
+ if meeting.get("title_source") == "manual":
264
+ return
265
+
266
+ title_cache_key = text_sha256("\n".join([DEFAULT_ANALYSIS_MODEL, context.description or "", transcript, analysis]))
267
+ title_cache_dir = context.paths.artifacts / context.meeting.id / "generated-title"
268
+ cached_title = read_cached_text(title_cache_dir, title_cache_key)
269
+ if cached_title is not None:
270
+ context.progress.message("Reusing generated meeting title")
271
+ generated_title = cached_title
272
+ else:
273
+ with context.progress.step("Generating meeting title"):
274
+ try:
275
+ generated_title = suggest_meeting_title(
276
+ TitleRequest(
277
+ transcript,
278
+ analysis,
279
+ meeting_context=context.description,
280
+ options=OpenAIRequestOptions(
281
+ usage_callback=lambda response: record_openai_usage(
282
+ context.connection,
283
+ meeting_id=context.meeting.id,
284
+ model=DEFAULT_ANALYSIS_MODEL,
285
+ service="title",
286
+ response=response,
287
+ )
288
+ ),
289
+ ),
290
+ )
291
+ except OpenAIAnalysisError as exc:
292
+ context.progress.message(f"Meeting title generation failed ({exc})")
293
+ return
294
+ write_cached_text(title_cache_dir, title_cache_key, generated_title)
295
+
296
+ if generated_title.strip():
297
+ update_generated_title(context.connection, context.meeting.id, generated_title)
298
+
299
+
300
+ def _run_elevenlabs_transcription(
301
+ connection: Connection, meeting_id: str, audio_path: Path, storage: StoragePaths
302
+ ) -> str:
303
+ return run_transcription(connection, meeting_id, audio_path, storage)
304
+
305
+
306
+ def _meeting_from_database(connection: Connection, meeting_id: str) -> Meeting:
307
+ row = get_meeting(connection, meeting_id)
308
+ if row is None:
309
+ raise ValueError(f"Meeting not found: {meeting_id}")
310
+ return Meeting(
311
+ id=row["id"],
312
+ slug=row["slug"],
313
+ title=row["title"],
314
+ title_source=row.get("title_source", "manual"),
315
+ language=row["language"],
316
+ imported_audio_path=Path(row["imported_audio_path"]),
317
+ audio_sha256=row.get("audio_sha256"),
318
+ generated_title=row.get("generated_title"),
319
+ )
320
+
321
+
322
+ def _report(progress: ProgressFn | None, message: str) -> None:
323
+ if progress is not None:
324
+ progress(message)
325
+
326
+
327
+ class TimedProgress:
328
+ def __init__(self, progress: ProgressFn | None) -> None:
329
+ self.progress = progress
330
+ self.started_at = time.monotonic()
331
+
332
+ def message(self, message: str) -> None:
333
+ _report(self.progress, message)
334
+
335
+ def step(self, label: str) -> TimedStep:
336
+ return TimedStep(self, label)
337
+
338
+ def total_elapsed(self) -> str:
339
+ return _format_elapsed(time.monotonic() - self.started_at)
340
+
341
+
342
+ class TimedStep:
343
+ def __init__(self, progress: TimedProgress, label: str) -> None:
344
+ self.progress = progress
345
+ self.label = label
346
+ self.started_at = 0.0
347
+
348
+ def __enter__(self) -> None:
349
+ self.started_at = time.monotonic()
350
+ self.progress.message(self.label)
351
+
352
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
353
+ elapsed = _format_elapsed(time.monotonic() - self.started_at)
354
+ self.progress.message(f"{self.label} completed in {elapsed}")
355
+
356
+
357
+ def _audio_duration_label(connection: Connection, meeting_id: str) -> str:
358
+ duration = _audio_duration_from_metadata(connection, meeting_id)
359
+ if duration is None:
360
+ return "Unknown"
361
+ return _format_duration(duration)
362
+
363
+
364
+ def _audio_duration_from_metadata(connection: Connection, meeting_id: str) -> float | None:
365
+ row = connection.execute(
366
+ "SELECT duration_seconds FROM audio_metadata WHERE meeting_id = ?",
367
+ (meeting_id,),
368
+ ).fetchone()
369
+ if row is None or row["duration_seconds"] is None:
370
+ return None
371
+ return float(row["duration_seconds"])
372
+
373
+
374
+ def _format_duration(seconds: float) -> str:
375
+ total_seconds = int(seconds)
376
+ hours, remainder = divmod(total_seconds, 3600)
377
+ minutes, seconds = divmod(remainder, 60)
378
+ if hours:
379
+ return f"{hours}h {minutes}m {seconds}s"
380
+ if minutes:
381
+ return f"{minutes}m {seconds}s"
382
+ return f"{seconds}s"
383
+
384
+
385
+ def _format_elapsed(seconds: float) -> str:
386
+ if seconds < 1:
387
+ return f"{seconds:.2f}s"
388
+ return _format_duration(seconds)
389
+
390
+
391
+ def _analyze_transcript(
392
+ context: RefreshContext,
393
+ transcript: str,
394
+ ) -> str:
395
+ if not get_api_key("openai"):
396
+ return fallback_analysis("OPENAI_API_KEY is missing")
397
+
398
+ analysis_cache_key = text_sha256("\n".join([DEFAULT_ANALYSIS_MODEL, context.description or "", transcript]))
399
+ analysis_cache_dir = context.paths.artifacts / context.meeting.id / "analysis"
400
+ cached_analysis = read_cached_text(analysis_cache_dir, analysis_cache_key)
401
+ if cached_analysis is not None:
402
+ context.progress.message("Reusing meeting analysis")
403
+ return cached_analysis
404
+
405
+ with context.progress.step("Analyzing meeting"):
406
+ try:
407
+ analysis = analyze_meeting(
408
+ AnalysisRequest(
409
+ transcript,
410
+ meeting_context=context.description,
411
+ options=OpenAIRequestOptions(
412
+ usage_callback=lambda response: record_openai_usage(
413
+ context.connection,
414
+ meeting_id=context.meeting.id,
415
+ model=DEFAULT_ANALYSIS_MODEL,
416
+ service="analysis",
417
+ response=response,
418
+ )
419
+ ),
420
+ ),
421
+ )
422
+ except OpenAIAnalysisError as exc:
423
+ context.progress.message(f"Meeting analysis failed; exporting fallback analysis ({exc})")
424
+ return fallback_analysis(str(exc))
425
+
426
+ write_cached_text(analysis_cache_dir, analysis_cache_key, analysis)
427
+ return analysis
@@ -0,0 +1 @@
1
+ """Transcription and processing provider integrations."""
@@ -0,0 +1,145 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from sqlite3 import Connection
6
+ from typing import Any
7
+ from uuid import uuid4
8
+
9
+ import httpx
10
+
11
+ from fly_on_the_wall.costs import record_service_usage
12
+ from fly_on_the_wall.secrets import get_api_key
13
+ from fly_on_the_wall.storage import StoragePaths, storage_paths
14
+
15
+ API_URL = "https://api.elevenlabs.io/v1/speech-to-text"
16
+ PROVIDER = "elevenlabs"
17
+ MODEL = "scribe_v2"
18
+
19
+
20
+ class ElevenLabsError(RuntimeError):
21
+ """Raised when ElevenLabs transcription fails."""
22
+
23
+
24
+ def transcribe_audio(
25
+ audio_path: Path,
26
+ api_key: str | None = None,
27
+ client: httpx.Client | None = None,
28
+ num_speakers: int | None = None,
29
+ diarization_threshold: float | None = None,
30
+ no_verbatim: bool = False,
31
+ ) -> dict[str, Any]:
32
+ resolved_api_key = api_key or get_api_key(PROVIDER)
33
+ if not resolved_api_key:
34
+ raise ElevenLabsError("Missing ELEVENLABS_API_KEY.")
35
+
36
+ data = {
37
+ "model_id": MODEL,
38
+ "tag_audio_events": "true",
39
+ "timestamps_granularity": "word",
40
+ "diarize": "true",
41
+ "temperature": "0",
42
+ "seed": "1",
43
+ "no_verbatim": str(no_verbatim).lower(),
44
+ }
45
+ if num_speakers is not None:
46
+ data["num_speakers"] = str(num_speakers)
47
+ if diarization_threshold is not None:
48
+ data["diarization_threshold"] = str(diarization_threshold)
49
+
50
+ close_client = client is None
51
+ http_client = client or httpx.Client(timeout=600)
52
+ try:
53
+ with audio_path.open("rb") as audio_file:
54
+ response = http_client.post(
55
+ API_URL,
56
+ headers={"xi-api-key": resolved_api_key},
57
+ data=data,
58
+ files={"file": (audio_path.name, audio_file)},
59
+ )
60
+ response.raise_for_status()
61
+ return response.json()
62
+ except httpx.HTTPStatusError as exc:
63
+ message = f"ElevenLabs HTTP {exc.response.status_code}: {exc.response.text}"
64
+ raise ElevenLabsError(message) from exc
65
+ except httpx.HTTPError as exc:
66
+ raise ElevenLabsError(f"ElevenLabs request failed: {exc}") from exc
67
+ finally:
68
+ if close_client:
69
+ http_client.close()
70
+
71
+
72
+ def run_transcription(
73
+ connection: Connection,
74
+ meeting_id: str,
75
+ audio_path: Path,
76
+ storage: StoragePaths | None = None,
77
+ client: httpx.Client | None = None,
78
+ api_key: str | None = None,
79
+ ) -> str:
80
+ paths = storage or storage_paths()
81
+ provider_run_id = str(uuid4())
82
+ raw_response_path = paths.artifacts / meeting_id / "provider-runs" / f"{provider_run_id}.raw.json"
83
+ raw_response_path.parent.mkdir(parents=True, exist_ok=True)
84
+
85
+ _insert_provider_run(connection, provider_run_id, meeting_id, raw_response_path, "running")
86
+ try:
87
+ response = transcribe_audio(audio_path, api_key=api_key, client=client)
88
+ raw_response_path.write_text(json.dumps(response, indent=2, ensure_ascii=False) + "\n")
89
+ duration = float(response.get("audio_duration_secs") or 0)
90
+ record_service_usage(
91
+ connection,
92
+ meeting_id=meeting_id,
93
+ provider_run_id=provider_run_id,
94
+ provider=PROVIDER,
95
+ model=MODEL,
96
+ service="transcription",
97
+ unit="audio_second",
98
+ input_quantity=duration,
99
+ usage={"audio_duration_secs": duration},
100
+ )
101
+ except Exception:
102
+ _set_provider_run_status(connection, provider_run_id, "failed")
103
+ raise
104
+
105
+ _set_provider_run_status(connection, provider_run_id, "done")
106
+ return provider_run_id
107
+
108
+
109
+ def _insert_provider_run(
110
+ connection: Connection,
111
+ provider_run_id: str,
112
+ meeting_id: str,
113
+ raw_response_path: Path,
114
+ status: str,
115
+ ) -> None:
116
+ with connection:
117
+ connection.execute(
118
+ """
119
+ INSERT INTO provider_runs(
120
+ id,
121
+ meeting_id,
122
+ provider,
123
+ model,
124
+ raw_response_path,
125
+ status
126
+ ) VALUES (?, ?, ?, ?, ?, ?)
127
+ """,
128
+ (provider_run_id, meeting_id, PROVIDER, MODEL, str(raw_response_path), status),
129
+ )
130
+
131
+
132
+ def _set_provider_run_status(connection: Connection, provider_run_id: str, status: str) -> None:
133
+ with connection:
134
+ connection.execute(
135
+ """
136
+ UPDATE provider_runs
137
+ SET status = ?,
138
+ completed_at = CASE
139
+ WHEN ? = 'done' THEN CURRENT_TIMESTAMP
140
+ ELSE completed_at
141
+ END
142
+ WHERE id = ?
143
+ """,
144
+ (status, status, provider_run_id),
145
+ )