fow-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. fly_on_the_wall/__init__.py +3 -0
  2. fly_on_the_wall/audio.py +164 -0
  3. fly_on_the_wall/audio_metadata.py +241 -0
  4. fly_on_the_wall/cache.py +26 -0
  5. fly_on_the_wall/cleanup.py +29 -0
  6. fly_on_the_wall/cli.py +641 -0
  7. fly_on_the_wall/cli_costs.py +81 -0
  8. fly_on_the_wall/cli_menu.py +163 -0
  9. fly_on_the_wall/cli_publish.py +141 -0
  10. fly_on_the_wall/cli_speaker_review.py +315 -0
  11. fly_on_the_wall/cli_watch.py +209 -0
  12. fly_on_the_wall/config.py +92 -0
  13. fly_on_the_wall/costs.py +169 -0
  14. fly_on_the_wall/db.py +508 -0
  15. fly_on_the_wall/doctor.py +142 -0
  16. fly_on_the_wall/embeddings.py +142 -0
  17. fly_on_the_wall/exporting.py +155 -0
  18. fly_on_the_wall/glossary.py +31 -0
  19. fly_on_the_wall/meetings.py +382 -0
  20. fly_on_the_wall/normalization.py +166 -0
  21. fly_on_the_wall/people.py +82 -0
  22. fly_on_the_wall/people_embeddings.py +68 -0
  23. fly_on_the_wall/pipeline.py +120 -0
  24. fly_on_the_wall/processing.py +427 -0
  25. fly_on_the_wall/providers/__init__.py +1 -0
  26. fly_on_the_wall/providers/elevenlabs.py +145 -0
  27. fly_on_the_wall/providers/openai_analysis.py +195 -0
  28. fly_on_the_wall/providers/openai_cleanup.py +91 -0
  29. fly_on_the_wall/publishing.py +410 -0
  30. fly_on_the_wall/reanalysis.py +172 -0
  31. fly_on_the_wall/recording_quality.py +141 -0
  32. fly_on_the_wall/rendering.py +115 -0
  33. fly_on_the_wall/secrets.py +93 -0
  34. fly_on_the_wall/service_pricing.py +75 -0
  35. fly_on_the_wall/setup.py +221 -0
  36. fly_on_the_wall/speaker_identity.py +173 -0
  37. fly_on_the_wall/speaker_matching.py +134 -0
  38. fly_on_the_wall/speakers.py +221 -0
  39. fly_on_the_wall/storage.py +53 -0
  40. fly_on_the_wall/voice_samples.py +125 -0
  41. fly_on_the_wall/watch.py +347 -0
  42. fow_cli-0.1.0.dist-info/METADATA +447 -0
  43. fow_cli-0.1.0.dist-info/RECORD +46 -0
  44. fow_cli-0.1.0.dist-info/WHEEL +4 -0
  45. fow_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. fow_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,347 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from collections.abc import Callable, Iterable
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from sqlite3 import Connection
8
+ from uuid import uuid4
9
+
10
+ from fly_on_the_wall.config import AppConfig
11
+ from fly_on_the_wall.meetings import file_sha256
12
+ from fly_on_the_wall.processing import ProcessResult, process_audio
13
+ from fly_on_the_wall.recording_quality import RecordingIgnoredError
14
+ from fly_on_the_wall.storage import StoragePaths
15
+
16
+ AUDIO_EXTENSIONS = frozenset({".aac", ".caf", ".m4a", ".mp3", ".wav"})
17
+ DEFAULT_STABLE_AGE_SECONDS = 5
18
+ TEMP_SUFFIXES = (".crdownload", ".download", ".part", ".tmp")
19
+
20
+ ProgressFn = Callable[[str], None]
21
+ ProcessFn = Callable[..., ProcessResult]
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class WatchFolder:
26
+ id: str
27
+ name: str | None
28
+ path: Path
29
+ enabled: bool
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class WatchScanResult:
34
+ seen: int
35
+ processed: int
36
+ ignored: int
37
+ skipped: int
38
+ failed: int
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class WatchFile:
43
+ folder_id: str
44
+ path: Path
45
+ size_bytes: int
46
+ mtime_ns: int
47
+ mtime: float
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class WatchFileResult:
52
+ processed: int = 0
53
+ ignored: int = 0
54
+ skipped: int = 0
55
+ failed: int = 0
56
+
57
+
58
+ @dataclass(frozen=True)
59
+ class WatchScanContext:
60
+ connection: Connection
61
+ config: AppConfig
62
+ storage: StoragePaths | None
63
+ process_fn: ProcessFn
64
+ stable_age_seconds: int
65
+ now: float
66
+ progress: ProgressFn | None
67
+
68
+
69
+ def add_watch_folder(connection: Connection, path: Path, name: str | None = None) -> WatchFolder:
70
+ resolved_path = _resolve_folder_path(path)
71
+ folder_id = str(uuid4())
72
+ with connection:
73
+ connection.execute(
74
+ """
75
+ INSERT INTO watch_folders(id, name, path, enabled)
76
+ VALUES (?, ?, ?, 1)
77
+ """,
78
+ (folder_id, name, str(resolved_path)),
79
+ )
80
+ return WatchFolder(folder_id, name, resolved_path, True)
81
+
82
+
83
+ def list_watch_folders(connection: Connection) -> list[WatchFolder]:
84
+ return [
85
+ _watch_folder_from_row(row)
86
+ for row in connection.execute(
87
+ """
88
+ SELECT id, name, path, enabled
89
+ FROM watch_folders
90
+ ORDER BY created_at, path
91
+ """
92
+ ).fetchall()
93
+ ]
94
+
95
+
96
+ def get_watch_folder(connection: Connection, identifier: str) -> WatchFolder | None:
97
+ identifier_path = str(Path(identifier).expanduser())
98
+ resolved_identifier_path = str(Path(identifier).expanduser().resolve())
99
+ row = connection.execute(
100
+ """
101
+ SELECT id, name, path, enabled
102
+ FROM watch_folders
103
+ WHERE id = ? OR name = ? OR path = ? OR path = ?
104
+ """,
105
+ (identifier, identifier, identifier_path, resolved_identifier_path),
106
+ ).fetchone()
107
+ return None if row is None else _watch_folder_from_row(row)
108
+
109
+
110
+ def remove_watch_folder(connection: Connection, identifier: str) -> WatchFolder | None:
111
+ folder = get_watch_folder(connection, identifier)
112
+ if folder is None:
113
+ return None
114
+ with connection:
115
+ connection.execute("DELETE FROM watch_folders WHERE id = ?", (folder.id,))
116
+ return folder
117
+
118
+
119
+ def set_watch_folder_enabled(connection: Connection, identifier: str, enabled: bool) -> WatchFolder | None:
120
+ folder = get_watch_folder(connection, identifier)
121
+ if folder is None:
122
+ return None
123
+ with connection:
124
+ connection.execute(
125
+ """
126
+ UPDATE watch_folders
127
+ SET enabled = ?, updated_at = CURRENT_TIMESTAMP
128
+ WHERE id = ?
129
+ """,
130
+ (1 if enabled else 0, folder.id),
131
+ )
132
+ return WatchFolder(folder.id, folder.name, folder.path, enabled)
133
+
134
+
135
+ def scan_watch_folders(
136
+ connection: Connection,
137
+ config: AppConfig,
138
+ storage: StoragePaths | None = None,
139
+ process_fn: ProcessFn = process_audio,
140
+ stable_age_seconds: int = DEFAULT_STABLE_AGE_SECONDS,
141
+ progress: ProgressFn | None = None,
142
+ ) -> WatchScanResult:
143
+ seen = processed = ignored = skipped = failed = 0
144
+ context = WatchScanContext(connection, config, storage, process_fn, stable_age_seconds, time.time(), progress)
145
+
146
+ for folder in list_watch_folders(connection):
147
+ if not folder.enabled:
148
+ continue
149
+ if not folder.path.is_dir():
150
+ _report(progress, f"Skipping missing folder {folder.path}")
151
+ continue
152
+
153
+ for audio_path in _audio_files(folder.path):
154
+ seen += 1
155
+ result = _scan_audio_file(context, _watch_file(folder.id, audio_path))
156
+ processed += result.processed
157
+ ignored += result.ignored
158
+ skipped += result.skipped
159
+ failed += result.failed
160
+
161
+ return WatchScanResult(seen=seen, processed=processed, ignored=ignored, skipped=skipped, failed=failed)
162
+
163
+
164
+ def _resolve_folder_path(path: Path) -> Path:
165
+ return path.expanduser().resolve()
166
+
167
+
168
+ def _watch_folder_from_row(row) -> WatchFolder:
169
+ return WatchFolder(
170
+ id=row["id"],
171
+ name=row["name"],
172
+ path=Path(row["path"]),
173
+ enabled=bool(row["enabled"]),
174
+ )
175
+
176
+
177
+ def _audio_files(folder: Path) -> Iterable[Path]:
178
+ for path in sorted(folder.rglob("*")):
179
+ if not path.is_file():
180
+ continue
181
+ if path.name.startswith("."):
182
+ continue
183
+ if path.suffix.lower() not in AUDIO_EXTENSIONS:
184
+ continue
185
+ if path.name.lower().endswith(TEMP_SUFFIXES):
186
+ continue
187
+ yield path
188
+
189
+
190
+ def _watch_file(folder_id: str, path: Path) -> WatchFile:
191
+ stat = path.stat()
192
+ return WatchFile(folder_id, path, stat.st_size, stat.st_mtime_ns, stat.st_mtime)
193
+
194
+
195
+ def _scan_audio_file(
196
+ context: WatchScanContext,
197
+ item: WatchFile,
198
+ ) -> WatchFileResult:
199
+ was_seen_unchanged = _item_seen_unchanged(context.connection, item)
200
+ _upsert_seen_item(context.connection, item)
201
+
202
+ if not was_seen_unchanged and context.now - item.mtime < context.stable_age_seconds:
203
+ _report(context.progress, f"Skipping recently modified file {item.path}")
204
+ return WatchFileResult(skipped=1)
205
+
206
+ if _item_final_for_current_file(context.connection, item):
207
+ return WatchFileResult(skipped=1)
208
+
209
+ return _process_audio_file(context, item)
210
+
211
+
212
+ def _process_audio_file(
213
+ context: WatchScanContext,
214
+ item: WatchFile,
215
+ ) -> WatchFileResult:
216
+ _report(context.progress, f"Processing {item.path}")
217
+ _mark_item_processing(context.connection, item, file_sha256(item.path))
218
+ try:
219
+ result = context.process_fn(
220
+ context.connection,
221
+ item.path,
222
+ None,
223
+ context.config,
224
+ storage=context.storage,
225
+ progress=context.progress,
226
+ )
227
+ except RecordingIgnoredError as exc:
228
+ _mark_item_ignored(context.connection, item.path, exc.meeting.id, exc.quality.reason)
229
+ _report(context.progress, f"Ignored {item.path}: {exc.quality.reason}")
230
+ return WatchFileResult(ignored=1)
231
+ except Exception as exc:
232
+ _mark_item_failed(context.connection, item.path, str(exc))
233
+ _report(context.progress, f"Failed {item.path}: {exc}")
234
+ return WatchFileResult(failed=1)
235
+
236
+ _mark_item_done(context.connection, item.path, result.meeting.id)
237
+ return WatchFileResult(processed=1)
238
+
239
+
240
+ def _upsert_seen_item(connection: Connection, item: WatchFile) -> None:
241
+ existing = _watch_item(connection, item.path)
242
+ with connection:
243
+ if existing is None:
244
+ connection.execute(
245
+ """
246
+ INSERT INTO watch_items(id, folder_id, path, size_bytes, mtime_ns, status)
247
+ VALUES (?, ?, ?, ?, ?, 'pending')
248
+ """,
249
+ (str(uuid4()), item.folder_id, str(item.path), item.size_bytes, item.mtime_ns),
250
+ )
251
+ return
252
+
253
+ if existing["size_bytes"] != item.size_bytes or existing["mtime_ns"] != item.mtime_ns:
254
+ connection.execute(
255
+ """
256
+ UPDATE watch_items
257
+ SET folder_id = ?, size_bytes = ?, mtime_ns = ?, status = 'pending',
258
+ error_message = NULL, last_seen_at = CURRENT_TIMESTAMP,
259
+ updated_at = CURRENT_TIMESTAMP
260
+ WHERE path = ?
261
+ """,
262
+ (item.folder_id, item.size_bytes, item.mtime_ns, str(item.path)),
263
+ )
264
+ else:
265
+ connection.execute(
266
+ """
267
+ UPDATE watch_items
268
+ SET folder_id = ?, last_seen_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP
269
+ WHERE path = ?
270
+ """,
271
+ (item.folder_id, str(item.path)),
272
+ )
273
+
274
+
275
+ def _item_final_for_current_file(connection: Connection, file: WatchFile) -> bool:
276
+ item = _watch_item(connection, file.path)
277
+ return bool(
278
+ item is not None
279
+ and item["status"] in {"done", "ignored"}
280
+ and item["size_bytes"] == file.size_bytes
281
+ and item["mtime_ns"] == file.mtime_ns
282
+ )
283
+
284
+
285
+ def _item_seen_unchanged(connection: Connection, file: WatchFile) -> bool:
286
+ item = _watch_item(connection, file.path)
287
+ return bool(item is not None and item["size_bytes"] == file.size_bytes and item["mtime_ns"] == file.mtime_ns)
288
+
289
+
290
+ def _mark_item_processing(connection: Connection, item: WatchFile, file_hash: str) -> None:
291
+ with connection:
292
+ connection.execute(
293
+ """
294
+ UPDATE watch_items
295
+ SET file_sha256 = ?, size_bytes = ?, mtime_ns = ?, status = 'processing',
296
+ error_message = NULL, updated_at = CURRENT_TIMESTAMP
297
+ WHERE path = ?
298
+ """,
299
+ (file_hash, item.size_bytes, item.mtime_ns, str(item.path)),
300
+ )
301
+
302
+
303
+ def _mark_item_done(connection: Connection, path: Path, meeting_id: str) -> None:
304
+ with connection:
305
+ connection.execute(
306
+ """
307
+ UPDATE watch_items
308
+ SET status = 'done', meeting_id = ?, error_message = NULL,
309
+ processed_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP
310
+ WHERE path = ?
311
+ """,
312
+ (meeting_id, str(path)),
313
+ )
314
+
315
+
316
+ def _mark_item_ignored(connection: Connection, path: Path, meeting_id: str, reason: str) -> None:
317
+ with connection:
318
+ connection.execute(
319
+ """
320
+ UPDATE watch_items
321
+ SET status = 'ignored', meeting_id = ?, error_message = ?,
322
+ processed_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP
323
+ WHERE path = ?
324
+ """,
325
+ (meeting_id, reason, str(path)),
326
+ )
327
+
328
+
329
+ def _mark_item_failed(connection: Connection, path: Path, error_message: str) -> None:
330
+ with connection:
331
+ connection.execute(
332
+ """
333
+ UPDATE watch_items
334
+ SET status = 'failed', error_message = ?, updated_at = CURRENT_TIMESTAMP
335
+ WHERE path = ?
336
+ """,
337
+ (error_message, str(path)),
338
+ )
339
+
340
+
341
+ def _watch_item(connection: Connection, path: Path):
342
+ return connection.execute("SELECT * FROM watch_items WHERE path = ?", (str(path),)).fetchone()
343
+
344
+
345
+ def _report(progress: ProgressFn | None, message: str) -> None:
346
+ if progress is not None:
347
+ progress(message)