fow-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fly_on_the_wall/__init__.py +3 -0
- fly_on_the_wall/audio.py +164 -0
- fly_on_the_wall/audio_metadata.py +241 -0
- fly_on_the_wall/cache.py +26 -0
- fly_on_the_wall/cleanup.py +29 -0
- fly_on_the_wall/cli.py +641 -0
- fly_on_the_wall/cli_costs.py +81 -0
- fly_on_the_wall/cli_menu.py +163 -0
- fly_on_the_wall/cli_publish.py +141 -0
- fly_on_the_wall/cli_speaker_review.py +315 -0
- fly_on_the_wall/cli_watch.py +209 -0
- fly_on_the_wall/config.py +92 -0
- fly_on_the_wall/costs.py +169 -0
- fly_on_the_wall/db.py +508 -0
- fly_on_the_wall/doctor.py +142 -0
- fly_on_the_wall/embeddings.py +142 -0
- fly_on_the_wall/exporting.py +155 -0
- fly_on_the_wall/glossary.py +31 -0
- fly_on_the_wall/meetings.py +382 -0
- fly_on_the_wall/normalization.py +166 -0
- fly_on_the_wall/people.py +82 -0
- fly_on_the_wall/people_embeddings.py +68 -0
- fly_on_the_wall/pipeline.py +120 -0
- fly_on_the_wall/processing.py +427 -0
- fly_on_the_wall/providers/__init__.py +1 -0
- fly_on_the_wall/providers/elevenlabs.py +145 -0
- fly_on_the_wall/providers/openai_analysis.py +195 -0
- fly_on_the_wall/providers/openai_cleanup.py +91 -0
- fly_on_the_wall/publishing.py +410 -0
- fly_on_the_wall/reanalysis.py +172 -0
- fly_on_the_wall/recording_quality.py +141 -0
- fly_on_the_wall/rendering.py +115 -0
- fly_on_the_wall/secrets.py +93 -0
- fly_on_the_wall/service_pricing.py +75 -0
- fly_on_the_wall/setup.py +221 -0
- fly_on_the_wall/speaker_identity.py +173 -0
- fly_on_the_wall/speaker_matching.py +134 -0
- fly_on_the_wall/speakers.py +221 -0
- fly_on_the_wall/storage.py +53 -0
- fly_on_the_wall/voice_samples.py +125 -0
- fly_on_the_wall/watch.py +347 -0
- fow_cli-0.1.0.dist-info/METADATA +447 -0
- fow_cli-0.1.0.dist-info/RECORD +46 -0
- fow_cli-0.1.0.dist-info/WHEEL +4 -0
- fow_cli-0.1.0.dist-info/entry_points.txt +2 -0
- fow_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from sqlite3 import Connection
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
|
|
7
|
+
from fly_on_the_wall.people import create_person, get_person
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def list_unknown_speakers(connection: Connection, meeting_id_or_slug: str | None = None) -> list[dict]:
|
|
11
|
+
params: list[str] = []
|
|
12
|
+
meeting_filter = ""
|
|
13
|
+
if meeting_id_or_slug:
|
|
14
|
+
meeting_filter = "AND (meetings.id = ? OR meetings.slug = ?)"
|
|
15
|
+
params.extend([meeting_id_or_slug, meeting_id_or_slug])
|
|
16
|
+
|
|
17
|
+
rows = connection.execute(
|
|
18
|
+
f"""
|
|
19
|
+
SELECT local_speakers.id,
|
|
20
|
+
local_speakers.label,
|
|
21
|
+
meetings.slug AS meeting_slug,
|
|
22
|
+
provider_runs.id AS provider_run_id,
|
|
23
|
+
COUNT(segments.id) AS segment_count
|
|
24
|
+
FROM local_speakers
|
|
25
|
+
JOIN meetings ON meetings.id = local_speakers.meeting_id
|
|
26
|
+
JOIN provider_runs ON provider_runs.id = local_speakers.provider_run_id
|
|
27
|
+
LEFT JOIN segments ON segments.local_speaker_id = local_speakers.id
|
|
28
|
+
LEFT JOIN speaker_assignments
|
|
29
|
+
ON speaker_assignments.local_speaker_id = local_speakers.id
|
|
30
|
+
WHERE (speaker_assignments.id IS NULL OR speaker_assignments.status = 'unknown')
|
|
31
|
+
{meeting_filter}
|
|
32
|
+
GROUP BY local_speakers.id
|
|
33
|
+
ORDER BY meetings.created_at DESC, local_speakers.label
|
|
34
|
+
""",
|
|
35
|
+
params,
|
|
36
|
+
).fetchall()
|
|
37
|
+
return [dict(row) for row in rows]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def list_uncertain_speakers(connection: Connection, meeting_id_or_slug: str | None = None) -> list[dict]:
|
|
41
|
+
params: list[str] = []
|
|
42
|
+
meeting_filter = ""
|
|
43
|
+
if meeting_id_or_slug:
|
|
44
|
+
meeting_filter = "AND (meetings.id = ? OR meetings.slug = ?)"
|
|
45
|
+
params.extend([meeting_id_or_slug, meeting_id_or_slug])
|
|
46
|
+
|
|
47
|
+
rows = connection.execute(
|
|
48
|
+
f"""
|
|
49
|
+
SELECT local_speakers.id,
|
|
50
|
+
local_speakers.label,
|
|
51
|
+
meetings.slug AS meeting_slug,
|
|
52
|
+
provider_runs.id AS provider_run_id,
|
|
53
|
+
speaker_assignments.person_id AS suggested_person_id,
|
|
54
|
+
people.display_name AS suggested_person_name,
|
|
55
|
+
speaker_assignments.confidence,
|
|
56
|
+
speaker_assignments.margin,
|
|
57
|
+
COUNT(segments.id) AS segment_count
|
|
58
|
+
FROM local_speakers
|
|
59
|
+
JOIN meetings ON meetings.id = local_speakers.meeting_id
|
|
60
|
+
JOIN provider_runs ON provider_runs.id = local_speakers.provider_run_id
|
|
61
|
+
JOIN speaker_assignments
|
|
62
|
+
ON speaker_assignments.local_speaker_id = local_speakers.id
|
|
63
|
+
JOIN people ON people.id = speaker_assignments.person_id
|
|
64
|
+
LEFT JOIN segments ON segments.local_speaker_id = local_speakers.id
|
|
65
|
+
WHERE speaker_assignments.status = 'uncertain'
|
|
66
|
+
{meeting_filter}
|
|
67
|
+
GROUP BY local_speakers.id
|
|
68
|
+
ORDER BY meetings.created_at DESC, speaker_assignments.confidence DESC, local_speakers.label
|
|
69
|
+
""",
|
|
70
|
+
params,
|
|
71
|
+
).fetchall()
|
|
72
|
+
return [dict(row) for row in rows]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def list_review_speakers(
|
|
76
|
+
connection: Connection,
|
|
77
|
+
meeting_id_or_slug: str | None = None,
|
|
78
|
+
include_uncertain: bool = False,
|
|
79
|
+
only_uncertain: bool = False,
|
|
80
|
+
) -> list[dict]:
|
|
81
|
+
uncertain = [
|
|
82
|
+
_with_review_kind(speaker, "uncertain") for speaker in list_uncertain_speakers(connection, meeting_id_or_slug)
|
|
83
|
+
]
|
|
84
|
+
if only_uncertain:
|
|
85
|
+
return uncertain
|
|
86
|
+
|
|
87
|
+
unknown = [
|
|
88
|
+
_with_review_kind(speaker, "unknown") for speaker in list_unknown_speakers(connection, meeting_id_or_slug)
|
|
89
|
+
]
|
|
90
|
+
if include_uncertain:
|
|
91
|
+
return unknown + uncertain
|
|
92
|
+
return unknown
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def confirm_speaker_assignment(connection: Connection, local_speaker_id: str) -> dict:
|
|
96
|
+
row = connection.execute(
|
|
97
|
+
"""
|
|
98
|
+
SELECT speaker_assignments.person_id, people.display_name
|
|
99
|
+
FROM speaker_assignments
|
|
100
|
+
JOIN people ON people.id = speaker_assignments.person_id
|
|
101
|
+
WHERE speaker_assignments.local_speaker_id = ?
|
|
102
|
+
AND speaker_assignments.status = 'uncertain'
|
|
103
|
+
""",
|
|
104
|
+
(local_speaker_id,),
|
|
105
|
+
).fetchone()
|
|
106
|
+
if row is None:
|
|
107
|
+
raise ValueError(f"No uncertain speaker assignment found: {local_speaker_id}")
|
|
108
|
+
return assign_speaker_to_person(connection, local_speaker_id, row["person_id"])
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _with_review_kind(speaker: dict, review_kind: str) -> dict:
|
|
112
|
+
result = dict(speaker)
|
|
113
|
+
result["review_kind"] = review_kind
|
|
114
|
+
return result
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def speaker_examples(connection: Connection, local_speaker_id: str, limit: int = 3) -> list[dict]:
|
|
118
|
+
rows = connection.execute(
|
|
119
|
+
"""
|
|
120
|
+
SELECT text, start_time, end_time
|
|
121
|
+
FROM segments
|
|
122
|
+
WHERE local_speaker_id = ?
|
|
123
|
+
ORDER BY sequence
|
|
124
|
+
LIMIT ?
|
|
125
|
+
""",
|
|
126
|
+
(local_speaker_id, limit),
|
|
127
|
+
).fetchall()
|
|
128
|
+
return [dict(row) for row in rows]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def assign_speaker_to_person(connection: Connection, local_speaker_id: str, person_id_or_name: str) -> dict:
|
|
132
|
+
person = get_person(connection, person_id_or_name)
|
|
133
|
+
if person is None:
|
|
134
|
+
person = create_person(connection, person_id_or_name)
|
|
135
|
+
created_person = True
|
|
136
|
+
else:
|
|
137
|
+
created_person = False
|
|
138
|
+
|
|
139
|
+
meeting_id = _local_speaker_meeting_id(connection, local_speaker_id)
|
|
140
|
+
if meeting_id is None:
|
|
141
|
+
raise ValueError(f"Local speaker not found: {local_speaker_id}")
|
|
142
|
+
|
|
143
|
+
with connection:
|
|
144
|
+
connection.execute(
|
|
145
|
+
"""
|
|
146
|
+
INSERT INTO speaker_assignments(id, local_speaker_id, person_id, status, evidence_json)
|
|
147
|
+
VALUES (?, ?, ?, ?, ?)
|
|
148
|
+
ON CONFLICT(local_speaker_id) DO UPDATE SET
|
|
149
|
+
person_id = excluded.person_id,
|
|
150
|
+
status = excluded.status,
|
|
151
|
+
evidence_json = excluded.evidence_json
|
|
152
|
+
""",
|
|
153
|
+
(
|
|
154
|
+
str(uuid4()),
|
|
155
|
+
local_speaker_id,
|
|
156
|
+
person.id,
|
|
157
|
+
"known",
|
|
158
|
+
json.dumps({"method": "user_correction"}),
|
|
159
|
+
),
|
|
160
|
+
)
|
|
161
|
+
_record_correction(connection, "speaker_assignment", meeting_id, local_speaker_id, person.id)
|
|
162
|
+
return {
|
|
163
|
+
"local_speaker_id": local_speaker_id,
|
|
164
|
+
"person_id": person.id,
|
|
165
|
+
"name": person.display_name,
|
|
166
|
+
"created_person": created_person,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def mark_speaker_unknown(connection: Connection, local_speaker_id: str) -> None:
|
|
171
|
+
_mark_speaker_status(connection, local_speaker_id, "unknown")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def mark_speaker_ignored(connection: Connection, local_speaker_id: str) -> None:
|
|
175
|
+
_mark_speaker_status(connection, local_speaker_id, "ignored")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _mark_speaker_status(connection: Connection, local_speaker_id: str, status: str) -> None:
|
|
179
|
+
meeting_id = _local_speaker_meeting_id(connection, local_speaker_id)
|
|
180
|
+
if meeting_id is None:
|
|
181
|
+
raise ValueError(f"Local speaker not found: {local_speaker_id}")
|
|
182
|
+
|
|
183
|
+
with connection:
|
|
184
|
+
connection.execute(
|
|
185
|
+
"""
|
|
186
|
+
INSERT INTO speaker_assignments(id, local_speaker_id, person_id, status, evidence_json)
|
|
187
|
+
VALUES (?, ?, NULL, ?, ?)
|
|
188
|
+
ON CONFLICT(local_speaker_id) DO UPDATE SET
|
|
189
|
+
person_id = NULL,
|
|
190
|
+
status = excluded.status,
|
|
191
|
+
evidence_json = excluded.evidence_json
|
|
192
|
+
""",
|
|
193
|
+
(
|
|
194
|
+
str(uuid4()),
|
|
195
|
+
local_speaker_id,
|
|
196
|
+
status,
|
|
197
|
+
json.dumps({"method": "user_correction", "action": status}),
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
_record_correction(connection, "speaker_assignment", meeting_id, local_speaker_id, None)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _local_speaker_meeting_id(connection: Connection, local_speaker_id: str) -> str | None:
|
|
204
|
+
row = connection.execute("SELECT meeting_id FROM local_speakers WHERE id = ?", (local_speaker_id,)).fetchone()
|
|
205
|
+
return None if row is None else row["meeting_id"]
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _record_correction(
|
|
209
|
+
connection: Connection,
|
|
210
|
+
correction_type: str,
|
|
211
|
+
meeting_id: str,
|
|
212
|
+
local_speaker_id: str,
|
|
213
|
+
person_id: str | None,
|
|
214
|
+
) -> None:
|
|
215
|
+
connection.execute(
|
|
216
|
+
"""
|
|
217
|
+
INSERT INTO corrections(id, correction_type, meeting_id, local_speaker_id, person_id)
|
|
218
|
+
VALUES (?, ?, ?, ?, ?)
|
|
219
|
+
""",
|
|
220
|
+
(str(uuid4()), correction_type, meeting_id, local_speaker_id, person_id),
|
|
221
|
+
)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from fly_on_the_wall.config import APP_DIR_NAME
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class StoragePaths:
|
|
12
|
+
root: Path
|
|
13
|
+
database: Path
|
|
14
|
+
audio: Path
|
|
15
|
+
artifacts: Path
|
|
16
|
+
voice_samples: Path
|
|
17
|
+
exports: Path
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def directories(self) -> tuple[Path, ...]:
|
|
21
|
+
return (
|
|
22
|
+
self.root,
|
|
23
|
+
self.audio,
|
|
24
|
+
self.artifacts,
|
|
25
|
+
self.voice_samples,
|
|
26
|
+
self.exports,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def data_dir() -> Path:
|
|
31
|
+
xdg_data_home = os.environ.get("XDG_DATA_HOME")
|
|
32
|
+
if xdg_data_home:
|
|
33
|
+
return Path(xdg_data_home).expanduser() / APP_DIR_NAME
|
|
34
|
+
return Path.home() / ".local" / "share" / APP_DIR_NAME
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def storage_paths(root: Path | None = None) -> StoragePaths:
|
|
38
|
+
storage_root = root or data_dir()
|
|
39
|
+
return StoragePaths(
|
|
40
|
+
root=storage_root,
|
|
41
|
+
database=storage_root / "fly.db",
|
|
42
|
+
audio=storage_root / "audio",
|
|
43
|
+
artifacts=storage_root / "artifacts",
|
|
44
|
+
voice_samples=storage_root / "voice-samples",
|
|
45
|
+
exports=storage_root / "exports",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def ensure_storage_layout(root: Path | None = None) -> StoragePaths:
|
|
50
|
+
paths = storage_paths(root)
|
|
51
|
+
for directory in paths.directories:
|
|
52
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
53
|
+
return paths
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from sqlite3 import Connection
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
|
|
9
|
+
from fly_on_the_wall.audio import extract_clip
|
|
10
|
+
from fly_on_the_wall.storage import StoragePaths, storage_paths
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class VoiceSample:
|
|
15
|
+
id: str
|
|
16
|
+
person_id: str
|
|
17
|
+
audio_path: Path
|
|
18
|
+
source_meeting_id: str | None = None
|
|
19
|
+
source_local_speaker_id: str | None = None
|
|
20
|
+
start_time: float | None = None
|
|
21
|
+
end_time: float | None = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def create_voice_sample_from_clip(
|
|
25
|
+
connection: Connection,
|
|
26
|
+
person_id: str,
|
|
27
|
+
clip_path: Path,
|
|
28
|
+
storage: StoragePaths | None = None,
|
|
29
|
+
) -> VoiceSample:
|
|
30
|
+
if not clip_path.is_file():
|
|
31
|
+
raise FileNotFoundError(f"Voice sample clip does not exist: {clip_path}")
|
|
32
|
+
|
|
33
|
+
paths = storage or storage_paths()
|
|
34
|
+
sample_id = str(uuid4())
|
|
35
|
+
stored_path = paths.voice_samples / person_id / f"{sample_id}{clip_path.suffix}"
|
|
36
|
+
stored_path.parent.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
shutil.copy2(clip_path, stored_path)
|
|
38
|
+
|
|
39
|
+
sample = VoiceSample(id=sample_id, person_id=person_id, audio_path=stored_path)
|
|
40
|
+
_insert_voice_sample(connection, sample)
|
|
41
|
+
return sample
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def create_voice_sample_from_span(
|
|
45
|
+
connection: Connection,
|
|
46
|
+
person_id: str,
|
|
47
|
+
source_audio_path: Path,
|
|
48
|
+
source_meeting_id: str,
|
|
49
|
+
source_local_speaker_id: str | None,
|
|
50
|
+
start_time: float,
|
|
51
|
+
end_time: float,
|
|
52
|
+
storage: StoragePaths | None = None,
|
|
53
|
+
) -> VoiceSample:
|
|
54
|
+
paths = storage or storage_paths()
|
|
55
|
+
sample_id = str(uuid4())
|
|
56
|
+
stored_path = paths.voice_samples / person_id / f"{sample_id}.wav"
|
|
57
|
+
extract_clip(source_audio_path, stored_path, start_time, end_time)
|
|
58
|
+
|
|
59
|
+
sample = VoiceSample(
|
|
60
|
+
id=sample_id,
|
|
61
|
+
person_id=person_id,
|
|
62
|
+
audio_path=stored_path,
|
|
63
|
+
source_meeting_id=source_meeting_id,
|
|
64
|
+
source_local_speaker_id=source_local_speaker_id,
|
|
65
|
+
start_time=start_time,
|
|
66
|
+
end_time=end_time,
|
|
67
|
+
)
|
|
68
|
+
_insert_voice_sample(connection, sample)
|
|
69
|
+
return sample
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def list_voice_samples(connection: Connection, person_id: str) -> list[VoiceSample]:
|
|
73
|
+
rows = connection.execute(
|
|
74
|
+
"""
|
|
75
|
+
SELECT id,
|
|
76
|
+
person_id,
|
|
77
|
+
audio_path,
|
|
78
|
+
source_meeting_id,
|
|
79
|
+
source_local_speaker_id,
|
|
80
|
+
start_time,
|
|
81
|
+
end_time
|
|
82
|
+
FROM voice_samples
|
|
83
|
+
WHERE person_id = ?
|
|
84
|
+
ORDER BY created_at
|
|
85
|
+
""",
|
|
86
|
+
(person_id,),
|
|
87
|
+
).fetchall()
|
|
88
|
+
return [
|
|
89
|
+
VoiceSample(
|
|
90
|
+
id=row["id"],
|
|
91
|
+
person_id=row["person_id"],
|
|
92
|
+
audio_path=Path(row["audio_path"]),
|
|
93
|
+
source_meeting_id=row["source_meeting_id"],
|
|
94
|
+
source_local_speaker_id=row["source_local_speaker_id"],
|
|
95
|
+
start_time=row["start_time"],
|
|
96
|
+
end_time=row["end_time"],
|
|
97
|
+
)
|
|
98
|
+
for row in rows
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _insert_voice_sample(connection: Connection, sample: VoiceSample) -> None:
|
|
103
|
+
with connection:
|
|
104
|
+
connection.execute(
|
|
105
|
+
"""
|
|
106
|
+
INSERT INTO voice_samples(
|
|
107
|
+
id,
|
|
108
|
+
person_id,
|
|
109
|
+
source_meeting_id,
|
|
110
|
+
source_local_speaker_id,
|
|
111
|
+
start_time,
|
|
112
|
+
end_time,
|
|
113
|
+
audio_path
|
|
114
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
115
|
+
""",
|
|
116
|
+
(
|
|
117
|
+
sample.id,
|
|
118
|
+
sample.person_id,
|
|
119
|
+
sample.source_meeting_id,
|
|
120
|
+
sample.source_local_speaker_id,
|
|
121
|
+
sample.start_time,
|
|
122
|
+
sample.end_time,
|
|
123
|
+
str(sample.audio_path),
|
|
124
|
+
),
|
|
125
|
+
)
|