fow-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. fly_on_the_wall/__init__.py +3 -0
  2. fly_on_the_wall/audio.py +164 -0
  3. fly_on_the_wall/audio_metadata.py +241 -0
  4. fly_on_the_wall/cache.py +26 -0
  5. fly_on_the_wall/cleanup.py +29 -0
  6. fly_on_the_wall/cli.py +641 -0
  7. fly_on_the_wall/cli_costs.py +81 -0
  8. fly_on_the_wall/cli_menu.py +163 -0
  9. fly_on_the_wall/cli_publish.py +141 -0
  10. fly_on_the_wall/cli_speaker_review.py +315 -0
  11. fly_on_the_wall/cli_watch.py +209 -0
  12. fly_on_the_wall/config.py +92 -0
  13. fly_on_the_wall/costs.py +169 -0
  14. fly_on_the_wall/db.py +508 -0
  15. fly_on_the_wall/doctor.py +142 -0
  16. fly_on_the_wall/embeddings.py +142 -0
  17. fly_on_the_wall/exporting.py +155 -0
  18. fly_on_the_wall/glossary.py +31 -0
  19. fly_on_the_wall/meetings.py +382 -0
  20. fly_on_the_wall/normalization.py +166 -0
  21. fly_on_the_wall/people.py +82 -0
  22. fly_on_the_wall/people_embeddings.py +68 -0
  23. fly_on_the_wall/pipeline.py +120 -0
  24. fly_on_the_wall/processing.py +427 -0
  25. fly_on_the_wall/providers/__init__.py +1 -0
  26. fly_on_the_wall/providers/elevenlabs.py +145 -0
  27. fly_on_the_wall/providers/openai_analysis.py +195 -0
  28. fly_on_the_wall/providers/openai_cleanup.py +91 -0
  29. fly_on_the_wall/publishing.py +410 -0
  30. fly_on_the_wall/reanalysis.py +172 -0
  31. fly_on_the_wall/recording_quality.py +141 -0
  32. fly_on_the_wall/rendering.py +115 -0
  33. fly_on_the_wall/secrets.py +93 -0
  34. fly_on_the_wall/service_pricing.py +75 -0
  35. fly_on_the_wall/setup.py +221 -0
  36. fly_on_the_wall/speaker_identity.py +173 -0
  37. fly_on_the_wall/speaker_matching.py +134 -0
  38. fly_on_the_wall/speakers.py +221 -0
  39. fly_on_the_wall/storage.py +53 -0
  40. fly_on_the_wall/voice_samples.py +125 -0
  41. fly_on_the_wall/watch.py +347 -0
  42. fow_cli-0.1.0.dist-info/METADATA +447 -0
  43. fow_cli-0.1.0.dist-info/RECORD +46 -0
  44. fow_cli-0.1.0.dist-info/WHEEL +4 -0
  45. fow_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. fow_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,221 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from sqlite3 import Connection
5
+ from uuid import uuid4
6
+
7
+ from fly_on_the_wall.people import create_person, get_person
8
+
9
+
10
+ def list_unknown_speakers(connection: Connection, meeting_id_or_slug: str | None = None) -> list[dict]:
11
+ params: list[str] = []
12
+ meeting_filter = ""
13
+ if meeting_id_or_slug:
14
+ meeting_filter = "AND (meetings.id = ? OR meetings.slug = ?)"
15
+ params.extend([meeting_id_or_slug, meeting_id_or_slug])
16
+
17
+ rows = connection.execute(
18
+ f"""
19
+ SELECT local_speakers.id,
20
+ local_speakers.label,
21
+ meetings.slug AS meeting_slug,
22
+ provider_runs.id AS provider_run_id,
23
+ COUNT(segments.id) AS segment_count
24
+ FROM local_speakers
25
+ JOIN meetings ON meetings.id = local_speakers.meeting_id
26
+ JOIN provider_runs ON provider_runs.id = local_speakers.provider_run_id
27
+ LEFT JOIN segments ON segments.local_speaker_id = local_speakers.id
28
+ LEFT JOIN speaker_assignments
29
+ ON speaker_assignments.local_speaker_id = local_speakers.id
30
+ WHERE (speaker_assignments.id IS NULL OR speaker_assignments.status = 'unknown')
31
+ {meeting_filter}
32
+ GROUP BY local_speakers.id
33
+ ORDER BY meetings.created_at DESC, local_speakers.label
34
+ """,
35
+ params,
36
+ ).fetchall()
37
+ return [dict(row) for row in rows]
38
+
39
+
40
+ def list_uncertain_speakers(connection: Connection, meeting_id_or_slug: str | None = None) -> list[dict]:
41
+ params: list[str] = []
42
+ meeting_filter = ""
43
+ if meeting_id_or_slug:
44
+ meeting_filter = "AND (meetings.id = ? OR meetings.slug = ?)"
45
+ params.extend([meeting_id_or_slug, meeting_id_or_slug])
46
+
47
+ rows = connection.execute(
48
+ f"""
49
+ SELECT local_speakers.id,
50
+ local_speakers.label,
51
+ meetings.slug AS meeting_slug,
52
+ provider_runs.id AS provider_run_id,
53
+ speaker_assignments.person_id AS suggested_person_id,
54
+ people.display_name AS suggested_person_name,
55
+ speaker_assignments.confidence,
56
+ speaker_assignments.margin,
57
+ COUNT(segments.id) AS segment_count
58
+ FROM local_speakers
59
+ JOIN meetings ON meetings.id = local_speakers.meeting_id
60
+ JOIN provider_runs ON provider_runs.id = local_speakers.provider_run_id
61
+ JOIN speaker_assignments
62
+ ON speaker_assignments.local_speaker_id = local_speakers.id
63
+ JOIN people ON people.id = speaker_assignments.person_id
64
+ LEFT JOIN segments ON segments.local_speaker_id = local_speakers.id
65
+ WHERE speaker_assignments.status = 'uncertain'
66
+ {meeting_filter}
67
+ GROUP BY local_speakers.id
68
+ ORDER BY meetings.created_at DESC, speaker_assignments.confidence DESC, local_speakers.label
69
+ """,
70
+ params,
71
+ ).fetchall()
72
+ return [dict(row) for row in rows]
73
+
74
+
75
+ def list_review_speakers(
76
+ connection: Connection,
77
+ meeting_id_or_slug: str | None = None,
78
+ include_uncertain: bool = False,
79
+ only_uncertain: bool = False,
80
+ ) -> list[dict]:
81
+ uncertain = [
82
+ _with_review_kind(speaker, "uncertain") for speaker in list_uncertain_speakers(connection, meeting_id_or_slug)
83
+ ]
84
+ if only_uncertain:
85
+ return uncertain
86
+
87
+ unknown = [
88
+ _with_review_kind(speaker, "unknown") for speaker in list_unknown_speakers(connection, meeting_id_or_slug)
89
+ ]
90
+ if include_uncertain:
91
+ return unknown + uncertain
92
+ return unknown
93
+
94
+
95
+ def confirm_speaker_assignment(connection: Connection, local_speaker_id: str) -> dict:
96
+ row = connection.execute(
97
+ """
98
+ SELECT speaker_assignments.person_id, people.display_name
99
+ FROM speaker_assignments
100
+ JOIN people ON people.id = speaker_assignments.person_id
101
+ WHERE speaker_assignments.local_speaker_id = ?
102
+ AND speaker_assignments.status = 'uncertain'
103
+ """,
104
+ (local_speaker_id,),
105
+ ).fetchone()
106
+ if row is None:
107
+ raise ValueError(f"No uncertain speaker assignment found: {local_speaker_id}")
108
+ return assign_speaker_to_person(connection, local_speaker_id, row["person_id"])
109
+
110
+
111
+ def _with_review_kind(speaker: dict, review_kind: str) -> dict:
112
+ result = dict(speaker)
113
+ result["review_kind"] = review_kind
114
+ return result
115
+
116
+
117
+ def speaker_examples(connection: Connection, local_speaker_id: str, limit: int = 3) -> list[dict]:
118
+ rows = connection.execute(
119
+ """
120
+ SELECT text, start_time, end_time
121
+ FROM segments
122
+ WHERE local_speaker_id = ?
123
+ ORDER BY sequence
124
+ LIMIT ?
125
+ """,
126
+ (local_speaker_id, limit),
127
+ ).fetchall()
128
+ return [dict(row) for row in rows]
129
+
130
+
131
+ def assign_speaker_to_person(connection: Connection, local_speaker_id: str, person_id_or_name: str) -> dict:
132
+ person = get_person(connection, person_id_or_name)
133
+ if person is None:
134
+ person = create_person(connection, person_id_or_name)
135
+ created_person = True
136
+ else:
137
+ created_person = False
138
+
139
+ meeting_id = _local_speaker_meeting_id(connection, local_speaker_id)
140
+ if meeting_id is None:
141
+ raise ValueError(f"Local speaker not found: {local_speaker_id}")
142
+
143
+ with connection:
144
+ connection.execute(
145
+ """
146
+ INSERT INTO speaker_assignments(id, local_speaker_id, person_id, status, evidence_json)
147
+ VALUES (?, ?, ?, ?, ?)
148
+ ON CONFLICT(local_speaker_id) DO UPDATE SET
149
+ person_id = excluded.person_id,
150
+ status = excluded.status,
151
+ evidence_json = excluded.evidence_json
152
+ """,
153
+ (
154
+ str(uuid4()),
155
+ local_speaker_id,
156
+ person.id,
157
+ "known",
158
+ json.dumps({"method": "user_correction"}),
159
+ ),
160
+ )
161
+ _record_correction(connection, "speaker_assignment", meeting_id, local_speaker_id, person.id)
162
+ return {
163
+ "local_speaker_id": local_speaker_id,
164
+ "person_id": person.id,
165
+ "name": person.display_name,
166
+ "created_person": created_person,
167
+ }
168
+
169
+
170
+ def mark_speaker_unknown(connection: Connection, local_speaker_id: str) -> None:
171
+ _mark_speaker_status(connection, local_speaker_id, "unknown")
172
+
173
+
174
+ def mark_speaker_ignored(connection: Connection, local_speaker_id: str) -> None:
175
+ _mark_speaker_status(connection, local_speaker_id, "ignored")
176
+
177
+
178
+ def _mark_speaker_status(connection: Connection, local_speaker_id: str, status: str) -> None:
179
+ meeting_id = _local_speaker_meeting_id(connection, local_speaker_id)
180
+ if meeting_id is None:
181
+ raise ValueError(f"Local speaker not found: {local_speaker_id}")
182
+
183
+ with connection:
184
+ connection.execute(
185
+ """
186
+ INSERT INTO speaker_assignments(id, local_speaker_id, person_id, status, evidence_json)
187
+ VALUES (?, ?, NULL, ?, ?)
188
+ ON CONFLICT(local_speaker_id) DO UPDATE SET
189
+ person_id = NULL,
190
+ status = excluded.status,
191
+ evidence_json = excluded.evidence_json
192
+ """,
193
+ (
194
+ str(uuid4()),
195
+ local_speaker_id,
196
+ status,
197
+ json.dumps({"method": "user_correction", "action": status}),
198
+ ),
199
+ )
200
+ _record_correction(connection, "speaker_assignment", meeting_id, local_speaker_id, None)
201
+
202
+
203
+ def _local_speaker_meeting_id(connection: Connection, local_speaker_id: str) -> str | None:
204
+ row = connection.execute("SELECT meeting_id FROM local_speakers WHERE id = ?", (local_speaker_id,)).fetchone()
205
+ return None if row is None else row["meeting_id"]
206
+
207
+
208
+ def _record_correction(
209
+ connection: Connection,
210
+ correction_type: str,
211
+ meeting_id: str,
212
+ local_speaker_id: str,
213
+ person_id: str | None,
214
+ ) -> None:
215
+ connection.execute(
216
+ """
217
+ INSERT INTO corrections(id, correction_type, meeting_id, local_speaker_id, person_id)
218
+ VALUES (?, ?, ?, ?, ?)
219
+ """,
220
+ (str(uuid4()), correction_type, meeting_id, local_speaker_id, person_id),
221
+ )
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ from fly_on_the_wall.config import APP_DIR_NAME
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class StoragePaths:
12
+ root: Path
13
+ database: Path
14
+ audio: Path
15
+ artifacts: Path
16
+ voice_samples: Path
17
+ exports: Path
18
+
19
+ @property
20
+ def directories(self) -> tuple[Path, ...]:
21
+ return (
22
+ self.root,
23
+ self.audio,
24
+ self.artifacts,
25
+ self.voice_samples,
26
+ self.exports,
27
+ )
28
+
29
+
30
+ def data_dir() -> Path:
31
+ xdg_data_home = os.environ.get("XDG_DATA_HOME")
32
+ if xdg_data_home:
33
+ return Path(xdg_data_home).expanduser() / APP_DIR_NAME
34
+ return Path.home() / ".local" / "share" / APP_DIR_NAME
35
+
36
+
37
+ def storage_paths(root: Path | None = None) -> StoragePaths:
38
+ storage_root = root or data_dir()
39
+ return StoragePaths(
40
+ root=storage_root,
41
+ database=storage_root / "fly.db",
42
+ audio=storage_root / "audio",
43
+ artifacts=storage_root / "artifacts",
44
+ voice_samples=storage_root / "voice-samples",
45
+ exports=storage_root / "exports",
46
+ )
47
+
48
+
49
+ def ensure_storage_layout(root: Path | None = None) -> StoragePaths:
50
+ paths = storage_paths(root)
51
+ for directory in paths.directories:
52
+ directory.mkdir(parents=True, exist_ok=True)
53
+ return paths
@@ -0,0 +1,125 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from sqlite3 import Connection
7
+ from uuid import uuid4
8
+
9
+ from fly_on_the_wall.audio import extract_clip
10
+ from fly_on_the_wall.storage import StoragePaths, storage_paths
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class VoiceSample:
15
+ id: str
16
+ person_id: str
17
+ audio_path: Path
18
+ source_meeting_id: str | None = None
19
+ source_local_speaker_id: str | None = None
20
+ start_time: float | None = None
21
+ end_time: float | None = None
22
+
23
+
24
+ def create_voice_sample_from_clip(
25
+ connection: Connection,
26
+ person_id: str,
27
+ clip_path: Path,
28
+ storage: StoragePaths | None = None,
29
+ ) -> VoiceSample:
30
+ if not clip_path.is_file():
31
+ raise FileNotFoundError(f"Voice sample clip does not exist: {clip_path}")
32
+
33
+ paths = storage or storage_paths()
34
+ sample_id = str(uuid4())
35
+ stored_path = paths.voice_samples / person_id / f"{sample_id}{clip_path.suffix}"
36
+ stored_path.parent.mkdir(parents=True, exist_ok=True)
37
+ shutil.copy2(clip_path, stored_path)
38
+
39
+ sample = VoiceSample(id=sample_id, person_id=person_id, audio_path=stored_path)
40
+ _insert_voice_sample(connection, sample)
41
+ return sample
42
+
43
+
44
+ def create_voice_sample_from_span(
45
+ connection: Connection,
46
+ person_id: str,
47
+ source_audio_path: Path,
48
+ source_meeting_id: str,
49
+ source_local_speaker_id: str | None,
50
+ start_time: float,
51
+ end_time: float,
52
+ storage: StoragePaths | None = None,
53
+ ) -> VoiceSample:
54
+ paths = storage or storage_paths()
55
+ sample_id = str(uuid4())
56
+ stored_path = paths.voice_samples / person_id / f"{sample_id}.wav"
57
+ extract_clip(source_audio_path, stored_path, start_time, end_time)
58
+
59
+ sample = VoiceSample(
60
+ id=sample_id,
61
+ person_id=person_id,
62
+ audio_path=stored_path,
63
+ source_meeting_id=source_meeting_id,
64
+ source_local_speaker_id=source_local_speaker_id,
65
+ start_time=start_time,
66
+ end_time=end_time,
67
+ )
68
+ _insert_voice_sample(connection, sample)
69
+ return sample
70
+
71
+
72
+ def list_voice_samples(connection: Connection, person_id: str) -> list[VoiceSample]:
73
+ rows = connection.execute(
74
+ """
75
+ SELECT id,
76
+ person_id,
77
+ audio_path,
78
+ source_meeting_id,
79
+ source_local_speaker_id,
80
+ start_time,
81
+ end_time
82
+ FROM voice_samples
83
+ WHERE person_id = ?
84
+ ORDER BY created_at
85
+ """,
86
+ (person_id,),
87
+ ).fetchall()
88
+ return [
89
+ VoiceSample(
90
+ id=row["id"],
91
+ person_id=row["person_id"],
92
+ audio_path=Path(row["audio_path"]),
93
+ source_meeting_id=row["source_meeting_id"],
94
+ source_local_speaker_id=row["source_local_speaker_id"],
95
+ start_time=row["start_time"],
96
+ end_time=row["end_time"],
97
+ )
98
+ for row in rows
99
+ ]
100
+
101
+
102
+ def _insert_voice_sample(connection: Connection, sample: VoiceSample) -> None:
103
+ with connection:
104
+ connection.execute(
105
+ """
106
+ INSERT INTO voice_samples(
107
+ id,
108
+ person_id,
109
+ source_meeting_id,
110
+ source_local_speaker_id,
111
+ start_time,
112
+ end_time,
113
+ audio_path
114
+ ) VALUES (?, ?, ?, ?, ?, ?, ?)
115
+ """,
116
+ (
117
+ sample.id,
118
+ sample.person_id,
119
+ sample.source_meeting_id,
120
+ sample.source_local_speaker_id,
121
+ sample.start_time,
122
+ sample.end_time,
123
+ str(sample.audio_path),
124
+ ),
125
+ )