fow-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. fly_on_the_wall/__init__.py +3 -0
  2. fly_on_the_wall/audio.py +164 -0
  3. fly_on_the_wall/audio_metadata.py +241 -0
  4. fly_on_the_wall/cache.py +26 -0
  5. fly_on_the_wall/cleanup.py +29 -0
  6. fly_on_the_wall/cli.py +641 -0
  7. fly_on_the_wall/cli_costs.py +81 -0
  8. fly_on_the_wall/cli_menu.py +163 -0
  9. fly_on_the_wall/cli_publish.py +141 -0
  10. fly_on_the_wall/cli_speaker_review.py +315 -0
  11. fly_on_the_wall/cli_watch.py +209 -0
  12. fly_on_the_wall/config.py +92 -0
  13. fly_on_the_wall/costs.py +169 -0
  14. fly_on_the_wall/db.py +508 -0
  15. fly_on_the_wall/doctor.py +142 -0
  16. fly_on_the_wall/embeddings.py +142 -0
  17. fly_on_the_wall/exporting.py +155 -0
  18. fly_on_the_wall/glossary.py +31 -0
  19. fly_on_the_wall/meetings.py +382 -0
  20. fly_on_the_wall/normalization.py +166 -0
  21. fly_on_the_wall/people.py +82 -0
  22. fly_on_the_wall/people_embeddings.py +68 -0
  23. fly_on_the_wall/pipeline.py +120 -0
  24. fly_on_the_wall/processing.py +427 -0
  25. fly_on_the_wall/providers/__init__.py +1 -0
  26. fly_on_the_wall/providers/elevenlabs.py +145 -0
  27. fly_on_the_wall/providers/openai_analysis.py +195 -0
  28. fly_on_the_wall/providers/openai_cleanup.py +91 -0
  29. fly_on_the_wall/publishing.py +410 -0
  30. fly_on_the_wall/reanalysis.py +172 -0
  31. fly_on_the_wall/recording_quality.py +141 -0
  32. fly_on_the_wall/rendering.py +115 -0
  33. fly_on_the_wall/secrets.py +93 -0
  34. fly_on_the_wall/service_pricing.py +75 -0
  35. fly_on_the_wall/setup.py +221 -0
  36. fly_on_the_wall/speaker_identity.py +173 -0
  37. fly_on_the_wall/speaker_matching.py +134 -0
  38. fly_on_the_wall/speakers.py +221 -0
  39. fly_on_the_wall/storage.py +53 -0
  40. fly_on_the_wall/voice_samples.py +125 -0
  41. fly_on_the_wall/watch.py +347 -0
  42. fow_cli-0.1.0.dist-info/METADATA +447 -0
  43. fow_cli-0.1.0.dist-info/RECORD +46 -0
  44. fow_cli-0.1.0.dist-info/WHEEL +4 -0
  45. fow_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. fow_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,3 @@
1
+ """Fly on the Wall CLI application."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,164 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import subprocess
5
+ import sys
6
+ import time
7
+ from pathlib import Path
8
+ from select import select
9
+
10
+
11
+ class AudioError(RuntimeError):
12
+ """Raised when an audio operation fails."""
13
+
14
+
15
+ def get_duration(audio_path: Path) -> float:
16
+ result = _run(
17
+ [
18
+ "ffprobe",
19
+ "-v",
20
+ "error",
21
+ "-show_entries",
22
+ "format=duration",
23
+ "-of",
24
+ "default=noprint_wrappers=1:nokey=1",
25
+ str(audio_path),
26
+ ]
27
+ )
28
+ try:
29
+ return float(result.stdout.strip())
30
+ except ValueError as exc:
31
+ raise AudioError(f"Could not read duration for {audio_path}") from exc
32
+
33
+
34
+ def probe_metadata(audio_path: Path) -> dict:
35
+ result = _run(
36
+ [
37
+ "ffprobe",
38
+ "-v",
39
+ "quiet",
40
+ "-print_format",
41
+ "json",
42
+ "-show_format",
43
+ "-show_streams",
44
+ str(audio_path),
45
+ ]
46
+ )
47
+ try:
48
+ return json.loads(result.stdout)
49
+ except json.JSONDecodeError as exc:
50
+ raise AudioError(f"Could not parse metadata for {audio_path}") from exc
51
+
52
+
53
+ def convert_to_wav(input_path: Path, output_path: Path) -> Path:
54
+ output_path.parent.mkdir(parents=True, exist_ok=True)
55
+ _run(["ffmpeg", "-y", "-i", str(input_path), str(output_path)])
56
+ return output_path
57
+
58
+
59
+ def normalize_for_embedding(input_path: Path, output_path: Path) -> Path:
60
+ output_path.parent.mkdir(parents=True, exist_ok=True)
61
+ _run(
62
+ [
63
+ "ffmpeg",
64
+ "-y",
65
+ "-i",
66
+ str(input_path),
67
+ "-ac",
68
+ "1",
69
+ "-ar",
70
+ "16000",
71
+ str(output_path),
72
+ ]
73
+ )
74
+ return output_path
75
+
76
+
77
+ def extract_clip(input_path: Path, output_path: Path, start: float, end: float) -> Path:
78
+ if end <= start:
79
+ raise ValueError("Clip end must be greater than start.")
80
+
81
+ output_path.parent.mkdir(parents=True, exist_ok=True)
82
+ _run(
83
+ [
84
+ "ffmpeg",
85
+ "-y",
86
+ "-ss",
87
+ f"{max(0.0, start):.3f}",
88
+ "-to",
89
+ f"{end:.3f}",
90
+ "-i",
91
+ str(input_path),
92
+ str(output_path),
93
+ ]
94
+ )
95
+ return output_path
96
+
97
+
98
+ def play_audio(audio_path: Path, player: str = "ffplay", stop_on_enter: bool = False) -> None:
99
+ command = audio_playback_command(audio_path, player)
100
+ if stop_on_enter:
101
+ _run_until_enter(command)
102
+ return
103
+ _run(command)
104
+
105
+
106
+ def audio_playback_command(audio_path: Path, player: str = "ffplay") -> list[str]:
107
+ if player == "ffplay":
108
+ return ["ffplay", "-nodisp", "-autoexit", str(audio_path)]
109
+ return [player, str(audio_path)]
110
+
111
+
112
+ def start_audio_playback(audio_path: Path, player: str = "ffplay") -> subprocess.Popen:
113
+ command = audio_playback_command(audio_path, player)
114
+ try:
115
+ return subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
116
+ except FileNotFoundError as exc:
117
+ raise AudioError(f"Required audio tool not found: {command[0]}") from exc
118
+
119
+
120
+ def stop_audio_playback(process: subprocess.Popen) -> None:
121
+ if process.poll() is not None:
122
+ return
123
+ process.terminate()
124
+ try:
125
+ process.wait(timeout=2)
126
+ except subprocess.TimeoutExpired:
127
+ process.kill()
128
+ process.wait()
129
+
130
+
131
+ def _run_until_enter(command: list[str]) -> None:
132
+ try:
133
+ process = subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
134
+ except FileNotFoundError as exc:
135
+ raise AudioError(f"Required audio tool not found: {command[0]}") from exc
136
+
137
+ try:
138
+ while process.poll() is None:
139
+ if sys.stdin.isatty():
140
+ ready, _, _ = select([sys.stdin], [], [], 0.1)
141
+ if ready:
142
+ sys.stdin.readline()
143
+ process.terminate()
144
+ break
145
+ else:
146
+ time.sleep(0.1)
147
+ except KeyboardInterrupt:
148
+ process.terminate()
149
+ finally:
150
+ try:
151
+ process.wait(timeout=2)
152
+ except subprocess.TimeoutExpired:
153
+ process.kill()
154
+ process.wait()
155
+
156
+
157
+ def _run(command: list[str]) -> subprocess.CompletedProcess[str]:
158
+ try:
159
+ return subprocess.run(command, check=True, capture_output=True, text=True)
160
+ except FileNotFoundError as exc:
161
+ raise AudioError(f"Required audio tool not found: {command[0]}") from exc
162
+ except subprocess.CalledProcessError as exc:
163
+ message = exc.stderr.strip() or exc.stdout.strip() or str(exc)
164
+ raise AudioError(f"Audio command failed: {message}") from exc
@@ -0,0 +1,241 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from dataclasses import dataclass
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from sqlite3 import Connection
9
+
10
+ from fly_on_the_wall.audio import AudioError, probe_metadata
11
+ from fly_on_the_wall.storage import StoragePaths
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class NormalizedAudioMetadata:
16
+ recorded_at: str | None = None
17
+ recorded_at_source: str | None = None
18
+ recorded_at_confidence: str | None = None
19
+ duration_seconds: float | None = None
20
+ size_bytes: int | None = None
21
+ bit_rate: int | None = None
22
+ codec: str | None = None
23
+ sample_rate: int | None = None
24
+ channels: int | None = None
25
+ channel_layout: str | None = None
26
+ container_format: str | None = None
27
+ metadata_title: str | None = None
28
+ metadata_artist: str | None = None
29
+ metadata_album: str | None = None
30
+ metadata_genre: str | None = None
31
+ metadata_comment: str | None = None
32
+ metadata_encoder: str | None = None
33
+ device_or_software: str | None = None
34
+
35
+
36
+ def extract_and_store_audio_metadata(
37
+ connection: Connection,
38
+ meeting_id: str,
39
+ audio_path: Path,
40
+ storage: StoragePaths,
41
+ ) -> None:
42
+ try:
43
+ raw_metadata = probe_metadata(audio_path)
44
+ except AudioError:
45
+ return
46
+
47
+ metadata_dir = storage.artifacts / meeting_id
48
+ metadata_dir.mkdir(parents=True, exist_ok=True)
49
+ raw_metadata_path = metadata_dir / "audio-metadata.ffprobe.json"
50
+ raw_metadata_path.write_text(json.dumps(raw_metadata, indent=2) + "\n")
51
+
52
+ normalized = normalize_audio_metadata(raw_metadata, audio_path)
53
+ with connection:
54
+ connection.execute(
55
+ """
56
+ INSERT OR REPLACE INTO audio_metadata(
57
+ id,
58
+ meeting_id,
59
+ raw_metadata_path,
60
+ recorded_at,
61
+ recorded_at_source,
62
+ recorded_at_confidence,
63
+ duration_seconds,
64
+ size_bytes,
65
+ bit_rate,
66
+ codec,
67
+ sample_rate,
68
+ channels,
69
+ channel_layout,
70
+ container_format,
71
+ metadata_title,
72
+ metadata_artist,
73
+ metadata_album,
74
+ metadata_genre,
75
+ metadata_comment,
76
+ metadata_encoder,
77
+ device_or_software
78
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
79
+ """,
80
+ (
81
+ meeting_id,
82
+ meeting_id,
83
+ str(raw_metadata_path),
84
+ normalized.recorded_at,
85
+ normalized.recorded_at_source,
86
+ normalized.recorded_at_confidence,
87
+ normalized.duration_seconds,
88
+ normalized.size_bytes,
89
+ normalized.bit_rate,
90
+ normalized.codec,
91
+ normalized.sample_rate,
92
+ normalized.channels,
93
+ normalized.channel_layout,
94
+ normalized.container_format,
95
+ normalized.metadata_title,
96
+ normalized.metadata_artist,
97
+ normalized.metadata_album,
98
+ normalized.metadata_genre,
99
+ normalized.metadata_comment,
100
+ normalized.metadata_encoder,
101
+ normalized.device_or_software,
102
+ ),
103
+ )
104
+
105
+
106
+ def normalize_audio_metadata(raw_metadata: dict, audio_path: Path) -> NormalizedAudioMetadata:
107
+ audio_stream = _first_audio_stream(raw_metadata)
108
+ format_data = raw_metadata.get("format") if isinstance(raw_metadata.get("format"), dict) else {}
109
+ format_tags = _normalized_tags(format_data.get("tags"))
110
+ stream_tags = _normalized_tags(audio_stream.get("tags"))
111
+ tags = {**stream_tags, **format_tags}
112
+ recorded_at, recorded_at_source, recorded_at_confidence = _recorded_at(tags, audio_path)
113
+
114
+ return NormalizedAudioMetadata(
115
+ recorded_at=recorded_at,
116
+ recorded_at_source=recorded_at_source,
117
+ recorded_at_confidence=recorded_at_confidence,
118
+ duration_seconds=_optional_float(format_data.get("duration")),
119
+ size_bytes=_optional_int(format_data.get("size")),
120
+ bit_rate=_optional_int(format_data.get("bit_rate") or audio_stream.get("bit_rate")),
121
+ codec=_optional_str(audio_stream.get("codec_name")),
122
+ sample_rate=_optional_int(audio_stream.get("sample_rate")),
123
+ channels=_optional_int(audio_stream.get("channels")),
124
+ channel_layout=_optional_str(audio_stream.get("channel_layout")),
125
+ container_format=_optional_str(format_data.get("format_name")),
126
+ metadata_title=tags.get("title"),
127
+ metadata_artist=tags.get("artist"),
128
+ metadata_album=tags.get("album"),
129
+ metadata_genre=tags.get("genre"),
130
+ metadata_comment=tags.get("comment"),
131
+ metadata_encoder=tags.get("encoder"),
132
+ device_or_software=tags.get("artist") or tags.get("encoder"),
133
+ )
134
+
135
+
136
+ def _first_audio_stream(raw_metadata: dict) -> dict:
137
+ streams = raw_metadata.get("streams")
138
+ if not isinstance(streams, list):
139
+ return {}
140
+ for stream in streams:
141
+ if isinstance(stream, dict) and stream.get("codec_type") == "audio":
142
+ return stream
143
+ return {}
144
+
145
+
146
+ def _normalized_tags(tags: object) -> dict[str, str]:
147
+ if not isinstance(tags, dict):
148
+ return {}
149
+ return {str(key).lower(): str(value).strip() for key, value in tags.items() if str(value).strip()}
150
+
151
+
152
+ def _recorded_at(tags: dict[str, str], audio_path: Path) -> tuple[str | None, str | None, str | None]:
153
+ for key in ("date", "creation_time", "com.apple.quicktime.creationdate"):
154
+ parsed = _parse_datetime(tags.get(key))
155
+ if parsed is not None:
156
+ return parsed, f"metadata.{key}", "medium"
157
+
158
+ title = tags.get("title")
159
+ parsed = _parse_philips_title_datetime(title)
160
+ if parsed is not None:
161
+ return parsed, "metadata.title", "high"
162
+
163
+ parsed = _parse_recup_filename_datetime(audio_path.name)
164
+ if parsed is not None:
165
+ return parsed, "filename.recup", "medium"
166
+
167
+ parsed = _parse_recorder_filename_datetime(audio_path.name)
168
+ if parsed is not None:
169
+ return parsed, "filename.recorder", "medium"
170
+
171
+ try:
172
+ mtime = datetime.fromtimestamp(audio_path.stat().st_mtime).replace(microsecond=0)
173
+ except OSError:
174
+ return None, None, None
175
+ return _format_datetime(mtime), "filesystem.mtime", "low"
176
+
177
+
178
+ def _parse_philips_title_datetime(value: str | None) -> str | None:
179
+ if not value:
180
+ return None
181
+ match = re.search(r"(\d{4}-\d{2}-\d{2})[ T](\d{2}:\d{2}:\d{2})", value)
182
+ if match is None:
183
+ return None
184
+ return _parse_datetime(f"{match.group(1)} {match.group(2)}")
185
+
186
+
187
+ def _parse_recup_filename_datetime(value: str) -> str | None:
188
+ match = re.search(r"DV-(\d{4})-(\d{2})-(\d{2})-(\d{2})(\d{2})(\d{2})", value)
189
+ if match is None:
190
+ return None
191
+ year, month, day, hour, minute, second = match.groups()
192
+ return _parse_datetime(f"{year}-{month}-{day} {hour}:{minute}:{second}")
193
+
194
+
195
+ def _parse_recorder_filename_datetime(value: str) -> str | None:
196
+ match = re.search(r"(\d{2})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})", value)
197
+ if match is None:
198
+ return None
199
+ year, month, day, hour, minute, second = match.groups()
200
+ return _parse_datetime(f"20{year}-{month}-{day} {hour}:{minute}:{second}")
201
+
202
+
203
+ def _parse_datetime(value: str | None) -> str | None:
204
+ if not value:
205
+ return None
206
+ normalized = value.strip().replace("Z", "+00:00")
207
+ for fmt in (None, "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"):
208
+ try:
209
+ if fmt is None:
210
+ parsed = datetime.fromisoformat(normalized)
211
+ else:
212
+ parsed = datetime.strptime(normalized, fmt)
213
+ except ValueError:
214
+ continue
215
+ return _format_datetime(parsed.replace(tzinfo=None))
216
+ return None
217
+
218
+
219
+ def _format_datetime(value: datetime) -> str:
220
+ return value.strftime("%Y-%m-%d %H:%M:%S")
221
+
222
+
223
+ def _optional_str(value: object) -> str | None:
224
+ if value is None:
225
+ return None
226
+ text = str(value).strip()
227
+ return text or None
228
+
229
+
230
+ def _optional_int(value: object) -> int | None:
231
+ try:
232
+ return int(value) if value is not None else None
233
+ except (TypeError, ValueError):
234
+ return None
235
+
236
+
237
+ def _optional_float(value: object) -> float | None:
238
+ try:
239
+ return float(value) if value is not None else None
240
+ except (TypeError, ValueError):
241
+ return None
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from pathlib import Path
5
+
6
+
7
+ def text_sha256(value: str) -> str:
8
+ return hashlib.sha256(value.encode()).hexdigest()
9
+
10
+
11
+ def read_cached_text(cache_dir: Path, cache_key: str) -> str | None:
12
+ path = _cache_path(cache_dir, cache_key)
13
+ if not path.exists():
14
+ return None
15
+ return path.read_text()
16
+
17
+
18
+ def write_cached_text(cache_dir: Path, cache_key: str, value: str) -> Path:
19
+ path = _cache_path(cache_dir, cache_key)
20
+ path.parent.mkdir(parents=True, exist_ok=True)
21
+ path.write_text(value)
22
+ return path
23
+
24
+
25
+ def _cache_path(cache_dir: Path, cache_key: str) -> Path:
26
+ return cache_dir / f"{cache_key}.txt"
@@ -0,0 +1,29 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+
6
+ def deterministic_cleanup(transcript: str) -> str:
7
+ turns = [_parse_turn(line) for line in transcript.splitlines() if line.strip()]
8
+ merged: list[tuple[str, str]] = []
9
+ for speaker, text in turns:
10
+ cleaned_text = normalize_whitespace(text)
11
+ if not cleaned_text:
12
+ continue
13
+ if merged and merged[-1][0] == speaker:
14
+ previous_speaker, previous_text = merged[-1]
15
+ merged[-1] = (previous_speaker, normalize_whitespace(f"{previous_text} {cleaned_text}"))
16
+ else:
17
+ merged.append((speaker, cleaned_text))
18
+ return "\n\n".join(f"{speaker}: {text}" for speaker, text in merged)
19
+
20
+
21
+ def normalize_whitespace(value: str) -> str:
22
+ return re.sub(r"\s+", " ", value).strip()
23
+
24
+
25
+ def _parse_turn(line: str) -> tuple[str, str]:
26
+ if ":" not in line:
27
+ return "Unknown", line
28
+ speaker, text = line.split(":", 1)
29
+ return normalize_whitespace(speaker), text