fow-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. fly_on_the_wall/__init__.py +3 -0
  2. fly_on_the_wall/audio.py +164 -0
  3. fly_on_the_wall/audio_metadata.py +241 -0
  4. fly_on_the_wall/cache.py +26 -0
  5. fly_on_the_wall/cleanup.py +29 -0
  6. fly_on_the_wall/cli.py +641 -0
  7. fly_on_the_wall/cli_costs.py +81 -0
  8. fly_on_the_wall/cli_menu.py +163 -0
  9. fly_on_the_wall/cli_publish.py +141 -0
  10. fly_on_the_wall/cli_speaker_review.py +315 -0
  11. fly_on_the_wall/cli_watch.py +209 -0
  12. fly_on_the_wall/config.py +92 -0
  13. fly_on_the_wall/costs.py +169 -0
  14. fly_on_the_wall/db.py +508 -0
  15. fly_on_the_wall/doctor.py +142 -0
  16. fly_on_the_wall/embeddings.py +142 -0
  17. fly_on_the_wall/exporting.py +155 -0
  18. fly_on_the_wall/glossary.py +31 -0
  19. fly_on_the_wall/meetings.py +382 -0
  20. fly_on_the_wall/normalization.py +166 -0
  21. fly_on_the_wall/people.py +82 -0
  22. fly_on_the_wall/people_embeddings.py +68 -0
  23. fly_on_the_wall/pipeline.py +120 -0
  24. fly_on_the_wall/processing.py +427 -0
  25. fly_on_the_wall/providers/__init__.py +1 -0
  26. fly_on_the_wall/providers/elevenlabs.py +145 -0
  27. fly_on_the_wall/providers/openai_analysis.py +195 -0
  28. fly_on_the_wall/providers/openai_cleanup.py +91 -0
  29. fly_on_the_wall/publishing.py +410 -0
  30. fly_on_the_wall/reanalysis.py +172 -0
  31. fly_on_the_wall/recording_quality.py +141 -0
  32. fly_on_the_wall/rendering.py +115 -0
  33. fly_on_the_wall/secrets.py +93 -0
  34. fly_on_the_wall/service_pricing.py +75 -0
  35. fly_on_the_wall/setup.py +221 -0
  36. fly_on_the_wall/speaker_identity.py +173 -0
  37. fly_on_the_wall/speaker_matching.py +134 -0
  38. fly_on_the_wall/speakers.py +221 -0
  39. fly_on_the_wall/storage.py +53 -0
  40. fly_on_the_wall/voice_samples.py +125 -0
  41. fly_on_the_wall/watch.py +347 -0
  42. fow_cli-0.1.0.dist-info/METADATA +447 -0
  43. fow_cli-0.1.0.dist-info/RECORD +46 -0
  44. fow_cli-0.1.0.dist-info/WHEEL +4 -0
  45. fow_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. fow_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,195 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable
4
+ from dataclasses import dataclass, field
5
+
6
+ import httpx
7
+
8
+ from fly_on_the_wall.providers.openai_cleanup import API_URL
9
+ from fly_on_the_wall.secrets import get_api_key
10
+
11
+ DEFAULT_ANALYSIS_MODEL = "gpt-5.4-mini"
12
+
13
+
14
+ class OpenAIAnalysisError(RuntimeError):
15
+ """Raised when OpenAI meeting analysis fails."""
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class OpenAIRequestOptions:
20
+ model: str = DEFAULT_ANALYSIS_MODEL
21
+ api_key: str | None = None
22
+ client: httpx.Client | None = None
23
+ usage_callback: Callable[[dict], None] | None = None
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class AnalysisRequest:
28
+ transcript_markdown: str
29
+ meeting_context: str | None = None
30
+ options: OpenAIRequestOptions = field(default_factory=OpenAIRequestOptions)
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class TitleRequest:
35
+ transcript_markdown: str
36
+ analysis_markdown: str
37
+ meeting_context: str | None = None
38
+ options: OpenAIRequestOptions = field(default_factory=OpenAIRequestOptions)
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class ChatCompletionRequest:
43
+ system_prompt: str
44
+ user_prompt: str
45
+ options: OpenAIRequestOptions
46
+ timeout_seconds: int
47
+
48
+
49
+ def analyze_meeting(request: AnalysisRequest) -> str:
50
+ return _post_chat_completion(
51
+ ChatCompletionRequest(
52
+ system_prompt=_system_prompt(request.meeting_context),
53
+ user_prompt=request.transcript_markdown,
54
+ options=request.options,
55
+ timeout_seconds=180,
56
+ )
57
+ )
58
+
59
+
60
+ def suggest_meeting_title(request: TitleRequest) -> str:
61
+ content = _post_chat_completion(
62
+ ChatCompletionRequest(
63
+ system_prompt=_title_system_prompt(request.meeting_context),
64
+ user_prompt=(f"Transcript:\n{request.transcript_markdown}\n\nAnalysis:\n{request.analysis_markdown}"),
65
+ options=request.options,
66
+ timeout_seconds=60,
67
+ )
68
+ )
69
+ return _clean_title(content)
70
+
71
+
72
+ def _post_chat_completion(request: ChatCompletionRequest) -> str:
73
+ resolved_api_key = _require_api_key(request.options)
74
+ close_client = request.options.client is None
75
+ http_client = request.options.client or httpx.Client(timeout=request.timeout_seconds)
76
+ try:
77
+ response_json = _send_chat_completion(http_client, resolved_api_key, request)
78
+ _record_usage(request.options, response_json)
79
+ return _extract_content(response_json)
80
+ except httpx.HTTPStatusError as exc:
81
+ message = f"OpenAI HTTP {exc.response.status_code}: {exc.response.text}"
82
+ raise OpenAIAnalysisError(message) from exc
83
+ except httpx.HTTPError as exc:
84
+ raise OpenAIAnalysisError(f"OpenAI request failed: {exc}") from exc
85
+ finally:
86
+ _close_client(http_client, close_client)
87
+
88
+
89
+ def _require_api_key(options: OpenAIRequestOptions) -> str:
90
+ resolved_api_key = options.api_key or get_api_key("openai")
91
+ if not resolved_api_key:
92
+ raise OpenAIAnalysisError("Missing OPENAI_API_KEY.")
93
+ return resolved_api_key
94
+
95
+
96
+ def _close_client(client: httpx.Client, close_client: bool) -> None:
97
+ if close_client:
98
+ client.close()
99
+
100
+
101
+ def _send_chat_completion(client: httpx.Client, api_key: str, request: ChatCompletionRequest) -> dict:
102
+ response = client.post(
103
+ API_URL,
104
+ headers={"Authorization": f"Bearer {api_key}"},
105
+ json={
106
+ "model": request.options.model,
107
+ "temperature": 0,
108
+ "messages": [
109
+ {"role": "system", "content": request.system_prompt},
110
+ {"role": "user", "content": request.user_prompt},
111
+ ],
112
+ },
113
+ )
114
+ response.raise_for_status()
115
+ return response.json()
116
+
117
+
118
+ def _record_usage(options: OpenAIRequestOptions, response_json: dict) -> None:
119
+ if options.usage_callback is not None:
120
+ options.usage_callback(response_json)
121
+
122
+
123
+ def fallback_analysis(error: str | None = None) -> str:
124
+ detail = f" Analysis failed: {error}" if error else ""
125
+ return f"""
126
+ # Meeting Analysis
127
+
128
+ ## Summary
129
+
130
+ None identified.{detail}
131
+
132
+ ## Decisions
133
+
134
+ None identified.
135
+
136
+ ## Action Items
137
+
138
+ None identified.
139
+
140
+ ## Open Questions
141
+
142
+ None identified.
143
+
144
+ ## Important Details
145
+
146
+ None identified.
147
+ """.strip()
148
+
149
+
150
+ def _system_prompt(meeting_context: str | None) -> str:
151
+ context = meeting_context or "none"
152
+ return f"""
153
+ You analyze meeting transcripts for a personal note-taker.
154
+ Return concise Markdown with exactly these headings:
155
+ # Meeting Analysis
156
+ ## Summary
157
+ ## Decisions
158
+ ## Action Items
159
+ ## Open Questions
160
+ ## Important Details
161
+
162
+ Keep it short and prioritized. Do not invent facts.
163
+ If a section has no useful content, write "None identified."
164
+ For action items, use: - Owner: task. Due: date or Not mentioned.
165
+ Meeting context: {context}
166
+ """.strip()
167
+
168
+
169
+ def _title_system_prompt(meeting_context: str | None) -> str:
170
+ context = meeting_context or "none"
171
+ return f"""
172
+ You name meeting transcripts for a personal note-taker.
173
+ Return only one title, with no Markdown, labels, quotes, or punctuation wrapper.
174
+ Use 3 to 8 words.
175
+ Prefer concrete names, projects, organizations, and topics from the transcript.
176
+ Do not include dates unless the date is central to the meeting topic.
177
+ Do not return generic titles like "Meeting Summary" or "Team Meeting".
178
+ If the transcript has no meaningful content, return an empty string.
179
+ Meeting context: {context}
180
+ """.strip()
181
+
182
+
183
+ def _extract_content(response: dict) -> str:
184
+ try:
185
+ content = response["choices"][0]["message"]["content"]
186
+ except (KeyError, IndexError, TypeError) as exc:
187
+ raise OpenAIAnalysisError("OpenAI response did not contain message content.") from exc
188
+ return str(content).strip()
189
+
190
+
191
+ def _clean_title(value: str) -> str:
192
+ title = value.strip().strip("\"'`")
193
+ if title.lower() in {"meeting summary", "team meeting", "meeting", "untitled"}:
194
+ return ""
195
+ return " ".join(title.split())
@@ -0,0 +1,91 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable
4
+
5
+ import httpx
6
+
7
+ from fly_on_the_wall.secrets import get_api_key
8
+
9
+ API_URL = "https://api.openai.com/v1/chat/completions"
10
+ DEFAULT_MODEL = "gpt-5.4-mini"
11
+ DEFAULT_CLEANUP_TIMEOUT_SECONDS = 1800
12
+ CLEANUP_PROMPT_VERSION = "2026-06-04-manuscript-cleanup-v4"
13
+
14
+
15
+ class OpenAICleanupError(RuntimeError):
16
+ """Raised when OpenAI cleanup fails."""
17
+
18
+
19
+ def cleanup_transcript(
20
+ transcript: str,
21
+ glossary_terms: list[str] | None = None,
22
+ meeting_context: str | None = None,
23
+ model: str = DEFAULT_MODEL,
24
+ api_key: str | None = None,
25
+ client: httpx.Client | None = None,
26
+ usage_callback: Callable[[dict], None] | None = None,
27
+ ) -> str:
28
+ resolved_api_key = api_key or get_api_key("openai")
29
+ if not resolved_api_key:
30
+ raise OpenAICleanupError("Missing OPENAI_API_KEY.")
31
+
32
+ close_client = client is None
33
+ http_client = client or httpx.Client(timeout=DEFAULT_CLEANUP_TIMEOUT_SECONDS)
34
+ try:
35
+ response = http_client.post(
36
+ API_URL,
37
+ headers={"Authorization": f"Bearer {resolved_api_key}"},
38
+ json={
39
+ "model": model,
40
+ "temperature": 0,
41
+ "messages": [
42
+ {"role": "system", "content": _system_prompt(glossary_terms, meeting_context)},
43
+ {"role": "user", "content": transcript},
44
+ ],
45
+ },
46
+ )
47
+ response.raise_for_status()
48
+ response_json = response.json()
49
+ if usage_callback is not None:
50
+ usage_callback(response_json)
51
+ return _extract_content(response_json)
52
+ except httpx.HTTPStatusError as exc:
53
+ message = f"OpenAI HTTP {exc.response.status_code}: {exc.response.text}"
54
+ raise OpenAICleanupError(message) from exc
55
+ except httpx.HTTPError as exc:
56
+ raise OpenAICleanupError(f"OpenAI request failed: {exc}") from exc
57
+ finally:
58
+ if close_client:
59
+ http_client.close()
60
+
61
+
62
+ def _system_prompt(glossary_terms: list[str] | None, meeting_context: str | None) -> str:
63
+ glossary = ", ".join(glossary_terms or []) or "none"
64
+ context = meeting_context or "none"
65
+ return f"""
66
+ You clean meeting transcripts into readable manuscript-style dialogue.
67
+ Preserve speaker names, speaker order, source labels, language, and meaning.
68
+ Make the transcript pleasant to read rather than word-for-word: fix punctuation, casing,
69
+ obvious spacing, and lightly broken phrasing.
70
+ Remove verbal tics, hesitation sounds, repeated false starts, repeated words, and
71
+ filler/discourse-marker words when they only function as speaking habits rather than meaning.
72
+ For Swedish transcripts, words such as "liksom", "alltså", "såhär", "du vet", "eh" and
73
+ "äh" are usually conversational fillers. Default to removing them when the sentence still
74
+ means the same thing without them. Keep them only when they are inside quoted wording, part
75
+ of an idiom, or used with clear literal/comparative meaning, such as "på samma sätt som" or
76
+ "som om". Do not keep them for vague emphasis, hesitation, self-correction, or rhythm.
77
+ Prefer complete readable sentences over literal STT fragments, but do not summarize,
78
+ invent details, remove uncertainty markers, or add new content.
79
+ Preserve standalone acknowledgements such as yes/no/okay/mm and Swedish ja/nej/okej/mm.
80
+ Return only the cleaned manuscript.
81
+ Meeting context: {context}
82
+ Glossary terms: {glossary}
83
+ """.strip()
84
+
85
+
86
+ def _extract_content(response: dict) -> str:
87
+ try:
88
+ content = response["choices"][0]["message"]["content"]
89
+ except (KeyError, IndexError, TypeError) as exc:
90
+ raise OpenAICleanupError("OpenAI response did not contain message content.") from exc
91
+ return str(content).strip()
@@ -0,0 +1,410 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import re
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from sqlite3 import Connection
9
+ from uuid import uuid4
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class PublishTarget:
14
+ id: str
15
+ name: str
16
+ target_type: str
17
+ path: Path
18
+ auto_publish: bool
19
+ enabled: bool
20
+ settings: dict
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class PublishResult:
25
+ target: PublishTarget
26
+ output_path: Path
27
+ content_sha256: str
28
+
29
+
30
+ def add_publish_target(
31
+ connection: Connection,
32
+ target_type: str,
33
+ path: Path,
34
+ name: str,
35
+ auto_publish: bool = False,
36
+ enabled: bool = True,
37
+ settings: dict | None = None,
38
+ ) -> PublishTarget:
39
+ if target_type != "obsidian":
40
+ raise ValueError(f"Unsupported publish target type: {target_type}")
41
+
42
+ target_id = str(uuid4())
43
+ resolved_path = path.expanduser().resolve()
44
+ with connection:
45
+ connection.execute(
46
+ """
47
+ INSERT INTO publish_targets(
48
+ id, name, target_type, path, settings_json, auto_publish, enabled
49
+ ) VALUES (?, ?, ?, ?, ?, ?, ?)
50
+ """,
51
+ (
52
+ target_id,
53
+ name,
54
+ target_type,
55
+ str(resolved_path),
56
+ json.dumps(settings or {}, sort_keys=True),
57
+ 1 if auto_publish else 0,
58
+ 1 if enabled else 0,
59
+ ),
60
+ )
61
+ return PublishTarget(target_id, name, target_type, resolved_path, auto_publish, enabled, settings or {})
62
+
63
+
64
+ def list_publish_targets(connection: Connection) -> list[PublishTarget]:
65
+ return [
66
+ _target_from_row(row)
67
+ for row in connection.execute(
68
+ """
69
+ SELECT * FROM publish_targets
70
+ ORDER BY created_at, name
71
+ """
72
+ ).fetchall()
73
+ ]
74
+
75
+
76
+ def get_publish_target(connection: Connection, identifier: str) -> PublishTarget | None:
77
+ row = connection.execute(
78
+ """
79
+ SELECT * FROM publish_targets
80
+ WHERE id = ? OR name = ?
81
+ """,
82
+ (identifier, identifier),
83
+ ).fetchone()
84
+ return None if row is None else _target_from_row(row)
85
+
86
+
87
+ def remove_publish_target(connection: Connection, identifier: str) -> PublishTarget | None:
88
+ target = get_publish_target(connection, identifier)
89
+ if target is None:
90
+ return None
91
+ with connection:
92
+ connection.execute("DELETE FROM publish_targets WHERE id = ?", (target.id,))
93
+ return target
94
+
95
+
96
+ def set_publish_target_enabled(connection: Connection, identifier: str, enabled: bool) -> PublishTarget | None:
97
+ target = get_publish_target(connection, identifier)
98
+ if target is None:
99
+ return None
100
+ with connection:
101
+ connection.execute(
102
+ """
103
+ UPDATE publish_targets
104
+ SET enabled = ?, updated_at = CURRENT_TIMESTAMP
105
+ WHERE id = ?
106
+ """,
107
+ (1 if enabled else 0, target.id),
108
+ )
109
+ return PublishTarget(
110
+ target.id,
111
+ target.name,
112
+ target.target_type,
113
+ target.path,
114
+ target.auto_publish,
115
+ enabled,
116
+ target.settings,
117
+ )
118
+
119
+
120
+ def publish_meeting(connection: Connection, meeting_id_or_slug: str, target_identifier: str) -> PublishResult:
121
+ target = get_publish_target(connection, target_identifier)
122
+ if target is None:
123
+ raise ValueError(f"Publish target not found: {target_identifier}")
124
+ if not target.enabled:
125
+ raise ValueError(f"Publish target is disabled: {target.name}")
126
+ if target.target_type != "obsidian":
127
+ raise ValueError(f"Unsupported publish target type: {target.target_type}")
128
+
129
+ meeting = _meeting_with_metadata(connection, meeting_id_or_slug)
130
+ export = _latest_export(connection, meeting["id"])
131
+ transcript_path, analysis_path, manifest_path = _export_paths(export)
132
+ transcript_markdown = transcript_path.read_text()
133
+ analysis_markdown = _read_analysis_markdown(analysis_path)
134
+ manifest = json.loads(manifest_path.read_text())
135
+ output_path = _published_output_path(connection, meeting, target)
136
+ content = _obsidian_note(meeting, transcript_markdown, analysis_markdown, manifest)
137
+ content_hash = _sha256(content)
138
+
139
+ output_path.parent.mkdir(parents=True, exist_ok=True)
140
+ output_path.write_text(content)
141
+ _upsert_published_item(connection, meeting["id"], target.id, output_path, content_hash)
142
+ return PublishResult(target, output_path, content_hash)
143
+
144
+
145
+ def publish_all_meetings(
146
+ connection: Connection, target_identifier: str, only_unpublished: bool = False
147
+ ) -> list[PublishResult]:
148
+ target = get_publish_target(connection, target_identifier)
149
+ if target is None:
150
+ raise ValueError(f"Publish target not found: {target_identifier}")
151
+
152
+ results: list[PublishResult] = []
153
+ for meeting_id in _publishable_meeting_ids(connection, target.id, only_unpublished):
154
+ results.append(publish_meeting(connection, meeting_id, target.id))
155
+ return results
156
+
157
+
158
+ def publish_enabled_targets(connection: Connection, meeting_id: str) -> list[PublishResult]:
159
+ results: list[PublishResult] = []
160
+ for target in list_publish_targets(connection):
161
+ if target.enabled and target.auto_publish:
162
+ results.append(publish_meeting(connection, meeting_id, target.id))
163
+ return results
164
+
165
+
166
+ def _publishable_meeting_ids(connection: Connection, target_id: str, only_unpublished: bool) -> list[str]:
167
+ rows = connection.execute(
168
+ """
169
+ SELECT meetings.id
170
+ FROM meetings
171
+ WHERE EXISTS (
172
+ SELECT 1 FROM exports
173
+ WHERE exports.meeting_id = meetings.id AND exports.format = 'markdown'
174
+ )
175
+ AND (
176
+ ? = 0 OR NOT EXISTS (
177
+ SELECT 1 FROM published_items
178
+ WHERE published_items.meeting_id = meetings.id
179
+ AND published_items.target_id = ?
180
+ )
181
+ )
182
+ ORDER BY meetings.created_at
183
+ """,
184
+ (1 if only_unpublished else 0, target_id),
185
+ ).fetchall()
186
+ return [row["id"] for row in rows]
187
+
188
+
189
+ def _target_from_row(row) -> PublishTarget:
190
+ return PublishTarget(
191
+ id=row["id"],
192
+ name=row["name"],
193
+ target_type=row["target_type"],
194
+ path=Path(row["path"]),
195
+ auto_publish=bool(row["auto_publish"]),
196
+ enabled=bool(row["enabled"]),
197
+ settings=json.loads(row["settings_json"] or "{}"),
198
+ )
199
+
200
+
201
+ def _meeting_with_metadata(connection: Connection, meeting_id_or_slug: str) -> dict:
202
+ row = connection.execute(
203
+ """
204
+ SELECT meetings.*, audio_metadata.recorded_at, audio_metadata.recorded_at_confidence,
205
+ audio_metadata.duration_seconds, audio_metadata.device_or_software,
206
+ recording_quality.status AS recording_quality_status,
207
+ recording_quality.reason AS recording_quality_reason
208
+ FROM meetings
209
+ LEFT JOIN audio_metadata ON audio_metadata.meeting_id = meetings.id
210
+ LEFT JOIN recording_quality ON recording_quality.meeting_id = meetings.id
211
+ WHERE meetings.id = ? OR meetings.slug = ?
212
+ """,
213
+ (meeting_id_or_slug, meeting_id_or_slug),
214
+ ).fetchone()
215
+ if row is None:
216
+ raise ValueError(f"Meeting not found: {meeting_id_or_slug}")
217
+ return dict(row)
218
+
219
+
220
+ def _latest_export(connection: Connection, meeting_id: str) -> dict:
221
+ row = connection.execute(
222
+ """
223
+ SELECT exports.*, rowid
224
+ FROM exports
225
+ WHERE meeting_id = ? AND format = 'markdown'
226
+ ORDER BY created_at DESC, rowid DESC
227
+ LIMIT 1
228
+ """,
229
+ (meeting_id,),
230
+ ).fetchone()
231
+ if row is None:
232
+ raise ValueError(f"No markdown export found for meeting: {meeting_id}")
233
+ return dict(row)
234
+
235
+
236
+ def _export_paths(export: dict) -> tuple[Path, Path | None, Path]:
237
+ manifest_path = Path(export["manifest_path"])
238
+ manifest = json.loads(manifest_path.read_text())
239
+ transcript_path = Path(manifest.get("transcript_path") or manifest_path.parent / "transcript.md")
240
+ analysis_path = _optional_manifest_path(manifest, "analysis_path", manifest_path.parent / "analysis.md")
241
+ return transcript_path, analysis_path, manifest_path
242
+
243
+
244
+ def _optional_manifest_path(manifest: dict, key: str, fallback: Path) -> Path | None:
245
+ if manifest.get(key):
246
+ return Path(manifest[key])
247
+ if fallback.exists():
248
+ return fallback
249
+ return None
250
+
251
+
252
+ def _read_analysis_markdown(analysis_path: Path | None) -> str:
253
+ if analysis_path is not None and analysis_path.exists():
254
+ return analysis_path.read_text()
255
+ return "# Meeting Analysis\n\n## Summary\n\nNo analysis export found for this snapshot."
256
+
257
+
258
+ def _published_output_path(connection: Connection, meeting: dict, target: PublishTarget) -> Path:
259
+ row = connection.execute(
260
+ """
261
+ SELECT output_path FROM published_items
262
+ WHERE meeting_id = ? AND target_id = ?
263
+ """,
264
+ (meeting["id"], target.id),
265
+ ).fetchone()
266
+ if row is not None:
267
+ return Path(row["output_path"])
268
+
269
+ date = _meeting_date(meeting)
270
+ filename = _safe_filename(f"{date} {meeting['title']}.md")
271
+ return target.path / filename
272
+
273
+
274
+ def _obsidian_note(meeting: dict, transcript_markdown: str, analysis_markdown: str, manifest: dict) -> str:
275
+ date, time = _date_time(_meeting_timestamp(meeting))
276
+ frontmatter = {
277
+ "title": meeting["title"],
278
+ "date": date,
279
+ "time": time,
280
+ "source": "fly-on-the-wall",
281
+ "meeting_id": meeting["id"],
282
+ "slug": meeting["slug"],
283
+ "title_source": meeting.get("title_source"),
284
+ "recorded_at": meeting.get("recorded_at"),
285
+ "duration_seconds": meeting.get("duration_seconds"),
286
+ "recording_quality": meeting.get("recording_quality_status"),
287
+ "tags": ["meetings", "fly-on-the-wall"],
288
+ }
289
+ lines = ["---", *_yaml_lines(frontmatter), "---", ""]
290
+ lines.append("<!-- This note is managed by Fly on the Wall. Republishing may overwrite changes. -->")
291
+ lines.append("")
292
+ lines.append(f"# {meeting['title']}")
293
+ lines.append("")
294
+ lines.append("## Details")
295
+ lines.append("")
296
+ lines.append(f"Date: {date}")
297
+ lines.append(f"Time: {time}")
298
+ if meeting.get("duration_seconds") is not None:
299
+ lines.append(f"Duration: {_format_duration(float(meeting['duration_seconds']))}")
300
+ if meeting.get("device_or_software"):
301
+ lines.append(f"Device/Software: {meeting['device_or_software']}")
302
+ if meeting.get("recording_quality_status"):
303
+ lines.append(
304
+ f"Recording Quality: {meeting['recording_quality_status']} ({meeting['recording_quality_reason']})"
305
+ )
306
+ lines.append(f"Internal Export: {manifest.get('id', 'unknown')}")
307
+ lines.append("")
308
+ lines.append("## Analysis")
309
+ lines.append("")
310
+ lines.append(_strip_top_heading(analysis_markdown, "Meeting Analysis"))
311
+ lines.append("")
312
+ lines.append("## Manuscript")
313
+ lines.append("")
314
+ lines.append(_strip_transcript_heading(transcript_markdown))
315
+ return "\n".join(lines).rstrip() + "\n"
316
+
317
+
318
+ def _yaml_lines(values: dict) -> list[str]:
319
+ lines: list[str] = []
320
+ for key, value in values.items():
321
+ if value is None:
322
+ continue
323
+ if isinstance(value, list):
324
+ lines.append(f"{key}:")
325
+ lines.extend(f" - {_yaml_scalar(item)}" for item in value)
326
+ else:
327
+ lines.append(f"{key}: {_yaml_scalar(value)}")
328
+ return lines
329
+
330
+
331
+ def _yaml_scalar(value: object) -> str:
332
+ text = str(value)
333
+ if re.search(r"[:#\n,]", text):
334
+ return json.dumps(text, ensure_ascii=False)
335
+ return text
336
+
337
+
338
+ def _strip_top_heading(markdown: str, heading: str) -> str:
339
+ lines = markdown.strip().splitlines()
340
+ if lines and lines[0].strip() == f"# {heading}":
341
+ return "\n".join(lines[1:]).strip()
342
+ return markdown.strip()
343
+
344
+
345
+ def _strip_transcript_heading(markdown: str) -> str:
346
+ lines = markdown.strip().splitlines()
347
+ if lines and lines[0].startswith("# "):
348
+ lines = lines[1:]
349
+ while lines and lines[0].strip() not in {"## Transcript", "## Manuscript"}:
350
+ lines.pop(0)
351
+ if lines and lines[0].strip() in {"## Transcript", "## Manuscript"}:
352
+ lines.pop(0)
353
+ return "\n".join(lines).strip()
354
+
355
+
356
+ def _meeting_timestamp(meeting: dict) -> str | None:
357
+ if meeting.get("recorded_at_confidence") in {"high", "medium"}:
358
+ return meeting.get("recorded_at")
359
+ return meeting.get("created_at")
360
+
361
+
362
+ def _meeting_date(meeting: dict) -> str:
363
+ date, _ = _date_time(_meeting_timestamp(meeting))
364
+ return date if date != "Unknown" else "undated"
365
+
366
+
367
+ def _date_time(value: str | None) -> tuple[str, str]:
368
+ if not value:
369
+ return "Unknown", "Unknown"
370
+ date, _, time = value.partition(" ")
371
+ return date or "Unknown", time or "Unknown"
372
+
373
+
374
+ def _format_duration(seconds: float) -> str:
375
+ total_seconds = int(seconds)
376
+ hours, remainder = divmod(total_seconds, 3600)
377
+ minutes, seconds = divmod(remainder, 60)
378
+ if hours:
379
+ return f"{hours}h {minutes}m {seconds}s"
380
+ if minutes:
381
+ return f"{minutes}m {seconds}s"
382
+ return f"{seconds}s"
383
+
384
+
385
+ def _safe_filename(value: str) -> str:
386
+ safe = re.sub(r"[\\/:*?\"<>|]+", "-", value).strip()
387
+ return safe or "meeting.md"
388
+
389
+
390
+ def _upsert_published_item(
391
+ connection: Connection, meeting_id: str, target_id: str, output_path: Path, content_hash: str
392
+ ) -> None:
393
+ with connection:
394
+ connection.execute(
395
+ """
396
+ INSERT INTO published_items(
397
+ id, meeting_id, target_id, output_path, content_sha256
398
+ ) VALUES (?, ?, ?, ?, ?)
399
+ ON CONFLICT(meeting_id, target_id) DO UPDATE SET
400
+ output_path = excluded.output_path,
401
+ content_sha256 = excluded.content_sha256,
402
+ published_at = CURRENT_TIMESTAMP,
403
+ updated_at = CURRENT_TIMESTAMP
404
+ """,
405
+ (str(uuid4()), meeting_id, target_id, str(output_path), content_hash),
406
+ )
407
+
408
+
409
+ def _sha256(value: str) -> str:
410
+ return hashlib.sha256(value.encode()).hexdigest()