fow-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fly_on_the_wall/__init__.py +3 -0
- fly_on_the_wall/audio.py +164 -0
- fly_on_the_wall/audio_metadata.py +241 -0
- fly_on_the_wall/cache.py +26 -0
- fly_on_the_wall/cleanup.py +29 -0
- fly_on_the_wall/cli.py +641 -0
- fly_on_the_wall/cli_costs.py +81 -0
- fly_on_the_wall/cli_menu.py +163 -0
- fly_on_the_wall/cli_publish.py +141 -0
- fly_on_the_wall/cli_speaker_review.py +315 -0
- fly_on_the_wall/cli_watch.py +209 -0
- fly_on_the_wall/config.py +92 -0
- fly_on_the_wall/costs.py +169 -0
- fly_on_the_wall/db.py +508 -0
- fly_on_the_wall/doctor.py +142 -0
- fly_on_the_wall/embeddings.py +142 -0
- fly_on_the_wall/exporting.py +155 -0
- fly_on_the_wall/glossary.py +31 -0
- fly_on_the_wall/meetings.py +382 -0
- fly_on_the_wall/normalization.py +166 -0
- fly_on_the_wall/people.py +82 -0
- fly_on_the_wall/people_embeddings.py +68 -0
- fly_on_the_wall/pipeline.py +120 -0
- fly_on_the_wall/processing.py +427 -0
- fly_on_the_wall/providers/__init__.py +1 -0
- fly_on_the_wall/providers/elevenlabs.py +145 -0
- fly_on_the_wall/providers/openai_analysis.py +195 -0
- fly_on_the_wall/providers/openai_cleanup.py +91 -0
- fly_on_the_wall/publishing.py +410 -0
- fly_on_the_wall/reanalysis.py +172 -0
- fly_on_the_wall/recording_quality.py +141 -0
- fly_on_the_wall/rendering.py +115 -0
- fly_on_the_wall/secrets.py +93 -0
- fly_on_the_wall/service_pricing.py +75 -0
- fly_on_the_wall/setup.py +221 -0
- fly_on_the_wall/speaker_identity.py +173 -0
- fly_on_the_wall/speaker_matching.py +134 -0
- fly_on_the_wall/speakers.py +221 -0
- fly_on_the_wall/storage.py +53 -0
- fly_on_the_wall/voice_samples.py +125 -0
- fly_on_the_wall/watch.py +347 -0
- fow_cli-0.1.0.dist-info/METADATA +447 -0
- fow_cli-0.1.0.dist-info/RECORD +46 -0
- fow_cli-0.1.0.dist-info/WHEEL +4 -0
- fow_cli-0.1.0.dist-info/entry_points.txt +2 -0
- fow_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
fly_on_the_wall/watch.py
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from collections.abc import Callable, Iterable
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from sqlite3 import Connection
|
|
8
|
+
from uuid import uuid4
|
|
9
|
+
|
|
10
|
+
from fly_on_the_wall.config import AppConfig
|
|
11
|
+
from fly_on_the_wall.meetings import file_sha256
|
|
12
|
+
from fly_on_the_wall.processing import ProcessResult, process_audio
|
|
13
|
+
from fly_on_the_wall.recording_quality import RecordingIgnoredError
|
|
14
|
+
from fly_on_the_wall.storage import StoragePaths
|
|
15
|
+
|
|
16
|
+
AUDIO_EXTENSIONS = frozenset({".aac", ".caf", ".m4a", ".mp3", ".wav"})
|
|
17
|
+
DEFAULT_STABLE_AGE_SECONDS = 5
|
|
18
|
+
TEMP_SUFFIXES = (".crdownload", ".download", ".part", ".tmp")
|
|
19
|
+
|
|
20
|
+
ProgressFn = Callable[[str], None]
|
|
21
|
+
ProcessFn = Callable[..., ProcessResult]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class WatchFolder:
|
|
26
|
+
id: str
|
|
27
|
+
name: str | None
|
|
28
|
+
path: Path
|
|
29
|
+
enabled: bool
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class WatchScanResult:
|
|
34
|
+
seen: int
|
|
35
|
+
processed: int
|
|
36
|
+
ignored: int
|
|
37
|
+
skipped: int
|
|
38
|
+
failed: int
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class WatchFile:
|
|
43
|
+
folder_id: str
|
|
44
|
+
path: Path
|
|
45
|
+
size_bytes: int
|
|
46
|
+
mtime_ns: int
|
|
47
|
+
mtime: float
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class WatchFileResult:
|
|
52
|
+
processed: int = 0
|
|
53
|
+
ignored: int = 0
|
|
54
|
+
skipped: int = 0
|
|
55
|
+
failed: int = 0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(frozen=True)
|
|
59
|
+
class WatchScanContext:
|
|
60
|
+
connection: Connection
|
|
61
|
+
config: AppConfig
|
|
62
|
+
storage: StoragePaths | None
|
|
63
|
+
process_fn: ProcessFn
|
|
64
|
+
stable_age_seconds: int
|
|
65
|
+
now: float
|
|
66
|
+
progress: ProgressFn | None
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def add_watch_folder(connection: Connection, path: Path, name: str | None = None) -> WatchFolder:
|
|
70
|
+
resolved_path = _resolve_folder_path(path)
|
|
71
|
+
folder_id = str(uuid4())
|
|
72
|
+
with connection:
|
|
73
|
+
connection.execute(
|
|
74
|
+
"""
|
|
75
|
+
INSERT INTO watch_folders(id, name, path, enabled)
|
|
76
|
+
VALUES (?, ?, ?, 1)
|
|
77
|
+
""",
|
|
78
|
+
(folder_id, name, str(resolved_path)),
|
|
79
|
+
)
|
|
80
|
+
return WatchFolder(folder_id, name, resolved_path, True)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def list_watch_folders(connection: Connection) -> list[WatchFolder]:
|
|
84
|
+
return [
|
|
85
|
+
_watch_folder_from_row(row)
|
|
86
|
+
for row in connection.execute(
|
|
87
|
+
"""
|
|
88
|
+
SELECT id, name, path, enabled
|
|
89
|
+
FROM watch_folders
|
|
90
|
+
ORDER BY created_at, path
|
|
91
|
+
"""
|
|
92
|
+
).fetchall()
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def get_watch_folder(connection: Connection, identifier: str) -> WatchFolder | None:
|
|
97
|
+
identifier_path = str(Path(identifier).expanduser())
|
|
98
|
+
resolved_identifier_path = str(Path(identifier).expanduser().resolve())
|
|
99
|
+
row = connection.execute(
|
|
100
|
+
"""
|
|
101
|
+
SELECT id, name, path, enabled
|
|
102
|
+
FROM watch_folders
|
|
103
|
+
WHERE id = ? OR name = ? OR path = ? OR path = ?
|
|
104
|
+
""",
|
|
105
|
+
(identifier, identifier, identifier_path, resolved_identifier_path),
|
|
106
|
+
).fetchone()
|
|
107
|
+
return None if row is None else _watch_folder_from_row(row)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def remove_watch_folder(connection: Connection, identifier: str) -> WatchFolder | None:
|
|
111
|
+
folder = get_watch_folder(connection, identifier)
|
|
112
|
+
if folder is None:
|
|
113
|
+
return None
|
|
114
|
+
with connection:
|
|
115
|
+
connection.execute("DELETE FROM watch_folders WHERE id = ?", (folder.id,))
|
|
116
|
+
return folder
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def set_watch_folder_enabled(connection: Connection, identifier: str, enabled: bool) -> WatchFolder | None:
|
|
120
|
+
folder = get_watch_folder(connection, identifier)
|
|
121
|
+
if folder is None:
|
|
122
|
+
return None
|
|
123
|
+
with connection:
|
|
124
|
+
connection.execute(
|
|
125
|
+
"""
|
|
126
|
+
UPDATE watch_folders
|
|
127
|
+
SET enabled = ?, updated_at = CURRENT_TIMESTAMP
|
|
128
|
+
WHERE id = ?
|
|
129
|
+
""",
|
|
130
|
+
(1 if enabled else 0, folder.id),
|
|
131
|
+
)
|
|
132
|
+
return WatchFolder(folder.id, folder.name, folder.path, enabled)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def scan_watch_folders(
|
|
136
|
+
connection: Connection,
|
|
137
|
+
config: AppConfig,
|
|
138
|
+
storage: StoragePaths | None = None,
|
|
139
|
+
process_fn: ProcessFn = process_audio,
|
|
140
|
+
stable_age_seconds: int = DEFAULT_STABLE_AGE_SECONDS,
|
|
141
|
+
progress: ProgressFn | None = None,
|
|
142
|
+
) -> WatchScanResult:
|
|
143
|
+
seen = processed = ignored = skipped = failed = 0
|
|
144
|
+
context = WatchScanContext(connection, config, storage, process_fn, stable_age_seconds, time.time(), progress)
|
|
145
|
+
|
|
146
|
+
for folder in list_watch_folders(connection):
|
|
147
|
+
if not folder.enabled:
|
|
148
|
+
continue
|
|
149
|
+
if not folder.path.is_dir():
|
|
150
|
+
_report(progress, f"Skipping missing folder {folder.path}")
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
for audio_path in _audio_files(folder.path):
|
|
154
|
+
seen += 1
|
|
155
|
+
result = _scan_audio_file(context, _watch_file(folder.id, audio_path))
|
|
156
|
+
processed += result.processed
|
|
157
|
+
ignored += result.ignored
|
|
158
|
+
skipped += result.skipped
|
|
159
|
+
failed += result.failed
|
|
160
|
+
|
|
161
|
+
return WatchScanResult(seen=seen, processed=processed, ignored=ignored, skipped=skipped, failed=failed)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _resolve_folder_path(path: Path) -> Path:
|
|
165
|
+
return path.expanduser().resolve()
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _watch_folder_from_row(row) -> WatchFolder:
|
|
169
|
+
return WatchFolder(
|
|
170
|
+
id=row["id"],
|
|
171
|
+
name=row["name"],
|
|
172
|
+
path=Path(row["path"]),
|
|
173
|
+
enabled=bool(row["enabled"]),
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _audio_files(folder: Path) -> Iterable[Path]:
|
|
178
|
+
for path in sorted(folder.rglob("*")):
|
|
179
|
+
if not path.is_file():
|
|
180
|
+
continue
|
|
181
|
+
if path.name.startswith("."):
|
|
182
|
+
continue
|
|
183
|
+
if path.suffix.lower() not in AUDIO_EXTENSIONS:
|
|
184
|
+
continue
|
|
185
|
+
if path.name.lower().endswith(TEMP_SUFFIXES):
|
|
186
|
+
continue
|
|
187
|
+
yield path
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _watch_file(folder_id: str, path: Path) -> WatchFile:
|
|
191
|
+
stat = path.stat()
|
|
192
|
+
return WatchFile(folder_id, path, stat.st_size, stat.st_mtime_ns, stat.st_mtime)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _scan_audio_file(
|
|
196
|
+
context: WatchScanContext,
|
|
197
|
+
item: WatchFile,
|
|
198
|
+
) -> WatchFileResult:
|
|
199
|
+
was_seen_unchanged = _item_seen_unchanged(context.connection, item)
|
|
200
|
+
_upsert_seen_item(context.connection, item)
|
|
201
|
+
|
|
202
|
+
if not was_seen_unchanged and context.now - item.mtime < context.stable_age_seconds:
|
|
203
|
+
_report(context.progress, f"Skipping recently modified file {item.path}")
|
|
204
|
+
return WatchFileResult(skipped=1)
|
|
205
|
+
|
|
206
|
+
if _item_final_for_current_file(context.connection, item):
|
|
207
|
+
return WatchFileResult(skipped=1)
|
|
208
|
+
|
|
209
|
+
return _process_audio_file(context, item)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _process_audio_file(
|
|
213
|
+
context: WatchScanContext,
|
|
214
|
+
item: WatchFile,
|
|
215
|
+
) -> WatchFileResult:
|
|
216
|
+
_report(context.progress, f"Processing {item.path}")
|
|
217
|
+
_mark_item_processing(context.connection, item, file_sha256(item.path))
|
|
218
|
+
try:
|
|
219
|
+
result = context.process_fn(
|
|
220
|
+
context.connection,
|
|
221
|
+
item.path,
|
|
222
|
+
None,
|
|
223
|
+
context.config,
|
|
224
|
+
storage=context.storage,
|
|
225
|
+
progress=context.progress,
|
|
226
|
+
)
|
|
227
|
+
except RecordingIgnoredError as exc:
|
|
228
|
+
_mark_item_ignored(context.connection, item.path, exc.meeting.id, exc.quality.reason)
|
|
229
|
+
_report(context.progress, f"Ignored {item.path}: {exc.quality.reason}")
|
|
230
|
+
return WatchFileResult(ignored=1)
|
|
231
|
+
except Exception as exc:
|
|
232
|
+
_mark_item_failed(context.connection, item.path, str(exc))
|
|
233
|
+
_report(context.progress, f"Failed {item.path}: {exc}")
|
|
234
|
+
return WatchFileResult(failed=1)
|
|
235
|
+
|
|
236
|
+
_mark_item_done(context.connection, item.path, result.meeting.id)
|
|
237
|
+
return WatchFileResult(processed=1)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _upsert_seen_item(connection: Connection, item: WatchFile) -> None:
|
|
241
|
+
existing = _watch_item(connection, item.path)
|
|
242
|
+
with connection:
|
|
243
|
+
if existing is None:
|
|
244
|
+
connection.execute(
|
|
245
|
+
"""
|
|
246
|
+
INSERT INTO watch_items(id, folder_id, path, size_bytes, mtime_ns, status)
|
|
247
|
+
VALUES (?, ?, ?, ?, ?, 'pending')
|
|
248
|
+
""",
|
|
249
|
+
(str(uuid4()), item.folder_id, str(item.path), item.size_bytes, item.mtime_ns),
|
|
250
|
+
)
|
|
251
|
+
return
|
|
252
|
+
|
|
253
|
+
if existing["size_bytes"] != item.size_bytes or existing["mtime_ns"] != item.mtime_ns:
|
|
254
|
+
connection.execute(
|
|
255
|
+
"""
|
|
256
|
+
UPDATE watch_items
|
|
257
|
+
SET folder_id = ?, size_bytes = ?, mtime_ns = ?, status = 'pending',
|
|
258
|
+
error_message = NULL, last_seen_at = CURRENT_TIMESTAMP,
|
|
259
|
+
updated_at = CURRENT_TIMESTAMP
|
|
260
|
+
WHERE path = ?
|
|
261
|
+
""",
|
|
262
|
+
(item.folder_id, item.size_bytes, item.mtime_ns, str(item.path)),
|
|
263
|
+
)
|
|
264
|
+
else:
|
|
265
|
+
connection.execute(
|
|
266
|
+
"""
|
|
267
|
+
UPDATE watch_items
|
|
268
|
+
SET folder_id = ?, last_seen_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP
|
|
269
|
+
WHERE path = ?
|
|
270
|
+
""",
|
|
271
|
+
(item.folder_id, str(item.path)),
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _item_final_for_current_file(connection: Connection, file: WatchFile) -> bool:
|
|
276
|
+
item = _watch_item(connection, file.path)
|
|
277
|
+
return bool(
|
|
278
|
+
item is not None
|
|
279
|
+
and item["status"] in {"done", "ignored"}
|
|
280
|
+
and item["size_bytes"] == file.size_bytes
|
|
281
|
+
and item["mtime_ns"] == file.mtime_ns
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _item_seen_unchanged(connection: Connection, file: WatchFile) -> bool:
|
|
286
|
+
item = _watch_item(connection, file.path)
|
|
287
|
+
return bool(item is not None and item["size_bytes"] == file.size_bytes and item["mtime_ns"] == file.mtime_ns)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _mark_item_processing(connection: Connection, item: WatchFile, file_hash: str) -> None:
|
|
291
|
+
with connection:
|
|
292
|
+
connection.execute(
|
|
293
|
+
"""
|
|
294
|
+
UPDATE watch_items
|
|
295
|
+
SET file_sha256 = ?, size_bytes = ?, mtime_ns = ?, status = 'processing',
|
|
296
|
+
error_message = NULL, updated_at = CURRENT_TIMESTAMP
|
|
297
|
+
WHERE path = ?
|
|
298
|
+
""",
|
|
299
|
+
(file_hash, item.size_bytes, item.mtime_ns, str(item.path)),
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _mark_item_done(connection: Connection, path: Path, meeting_id: str) -> None:
|
|
304
|
+
with connection:
|
|
305
|
+
connection.execute(
|
|
306
|
+
"""
|
|
307
|
+
UPDATE watch_items
|
|
308
|
+
SET status = 'done', meeting_id = ?, error_message = NULL,
|
|
309
|
+
processed_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP
|
|
310
|
+
WHERE path = ?
|
|
311
|
+
""",
|
|
312
|
+
(meeting_id, str(path)),
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _mark_item_ignored(connection: Connection, path: Path, meeting_id: str, reason: str) -> None:
|
|
317
|
+
with connection:
|
|
318
|
+
connection.execute(
|
|
319
|
+
"""
|
|
320
|
+
UPDATE watch_items
|
|
321
|
+
SET status = 'ignored', meeting_id = ?, error_message = ?,
|
|
322
|
+
processed_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP
|
|
323
|
+
WHERE path = ?
|
|
324
|
+
""",
|
|
325
|
+
(meeting_id, reason, str(path)),
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _mark_item_failed(connection: Connection, path: Path, error_message: str) -> None:
|
|
330
|
+
with connection:
|
|
331
|
+
connection.execute(
|
|
332
|
+
"""
|
|
333
|
+
UPDATE watch_items
|
|
334
|
+
SET status = 'failed', error_message = ?, updated_at = CURRENT_TIMESTAMP
|
|
335
|
+
WHERE path = ?
|
|
336
|
+
""",
|
|
337
|
+
(error_message, str(path)),
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def _watch_item(connection: Connection, path: Path):
|
|
342
|
+
return connection.execute("SELECT * FROM watch_items WHERE path = ?", (str(path),)).fetchone()
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _report(progress: ProgressFn | None, message: str) -> None:
|
|
346
|
+
if progress is not None:
|
|
347
|
+
progress(message)
|