fow-cli 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fow_cli-0.2.0 → fow_cli-0.3.0}/CHANGELOG.md +9 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/PKG-INFO +29 -1
- {fow_cli-0.2.0 → fow_cli-0.3.0}/README.md +28 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/pyproject.toml +1 -1
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/__init__.py +1 -1
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/cli.py +2 -0
- fow_cli-0.3.0/src/fly_on_the_wall/cli_glossary.py +124 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/db.py +11 -1
- fow_cli-0.3.0/src/fly_on_the_wall/glossary.py +207 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/processing.py +29 -8
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/providers/elevenlabs.py +19 -4
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/providers/openai_analysis.py +22 -4
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/providers/openai_cleanup.py +12 -3
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/publishing.py +32 -3
- fow_cli-0.2.0/src/fly_on_the_wall/glossary.py +0 -31
- {fow_cli-0.2.0 → fow_cli-0.3.0}/.gitignore +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/LICENSE +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/api_keys.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/audio.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/audio_metadata.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/cache.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/cleanup.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/cli_costs.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/cli_menu.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/cli_publish.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/cli_speaker_review.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/cli_watch.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/config.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/costs.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/doctor.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/embeddings.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/exporting.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/meetings.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/normalization.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/people.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/people_embeddings.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/pipeline.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/providers/__init__.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/py.typed +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/reanalysis.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/recording_quality.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/rendering.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/secrets.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/service_pricing.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/setup.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/speaker_identity.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/speaker_matching.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/speakers.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/storage.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/voice_samples.py +0 -0
- {fow_cli-0.2.0 → fow_cli-0.3.0}/src/fly_on_the_wall/watch.py +0 -0
|
@@ -2,6 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to Fly on the Wall are documented here.
|
|
4
4
|
|
|
5
|
+
## [0.3.0] - 2026-06-13
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added glossary management with `fow glossary` commands.
|
|
10
|
+
- Added glossary and known-person hints for ElevenLabs transcription keyterms.
|
|
11
|
+
- Added glossary guidance to OpenAI cleanup, analysis, and title generation.
|
|
12
|
+
- Added Obsidian `participants` frontmatter links for known meeting speakers.
|
|
13
|
+
|
|
5
14
|
## [0.2.0] - 2026-06-09
|
|
6
15
|
|
|
7
16
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fow-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Personal CLI note-taker for turning meeting audio into cleaned meeting manuscripts.
|
|
5
5
|
Project-URL: Repository, https://github.com/henriksvensson/fly-on-the-wall
|
|
6
6
|
License-Expression: MIT
|
|
@@ -50,6 +50,8 @@ Issues and suggestions are welcome via GitHub Issues, but the project is provide
|
|
|
50
50
|
|
|
51
51
|
Audio is sent to configured transcription/AI providers during processing. Optional speaker identity embeddings run locally when installed with the `identity` extra. External providers may charge usage-based fees depending on your provider account, pricing plan, and processing volume.
|
|
52
52
|
|
|
53
|
+
Glossary/keyterm hints are sent to ElevenLabs when processing new recordings. ElevenLabs currently documents this as a billable add-on to speech-to-text usage.
|
|
54
|
+
|
|
53
55
|
## Development Transparency
|
|
54
56
|
|
|
55
57
|
This project was developed as an agentic coding project using [OpenCode](https://opencode.ai/) with [OpenAI](https://openai.com/) GPT-5.5. Code quality checks were supported by CodeScene's [CodeHealth](https://codescene.com/product/code-health) analysis.
|
|
@@ -271,6 +273,32 @@ fow people embeddings status
|
|
|
271
273
|
fow people embeddings backfill
|
|
272
274
|
```
|
|
273
275
|
|
|
276
|
+
## Glossary
|
|
277
|
+
|
|
278
|
+
Use the glossary for names, company names, project names, product names, acronyms, and domain-specific phrases that transcription or cleanup models may spell incorrectly.
|
|
279
|
+
|
|
280
|
+
Add terms with optional context:
|
|
281
|
+
|
|
282
|
+
```bash
|
|
283
|
+
fow glossary add "Hejare" --description "Company name"
|
|
284
|
+
fow glossary add "Datadrivna" --description "The phrase data driven in Swedish"
|
|
285
|
+
fow glossary add "Ants" --description "Company name"
|
|
286
|
+
fow glossary add "TT" --description "Company name, short for Theodora Tech"
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
Manage terms:
|
|
290
|
+
|
|
291
|
+
```bash
|
|
292
|
+
fow glossary list
|
|
293
|
+
fow glossary show "Hejare"
|
|
294
|
+
fow glossary update "TT" --description "Company name, short for Theodora Tech"
|
|
295
|
+
fow glossary disable "Ants"
|
|
296
|
+
fow glossary enable "Ants"
|
|
297
|
+
fow glossary remove "Ants"
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
During processing, `fow` combines enabled glossary terms with known people names. The combined list is sent to ElevenLabs as transcription keyterms for new transcriptions, and to OpenAI cleanup, analysis, and title generation as spelling/context guidance. Corrections are model-mediated; `fow` does not do deterministic search-and-replace from the glossary.
|
|
301
|
+
|
|
274
302
|
## Watched Folders
|
|
275
303
|
|
|
276
304
|
Fly on the Wall can watch local folders, mounted Dropbox/rclone folders, and removable recorder folders.
|
|
@@ -20,6 +20,8 @@ Issues and suggestions are welcome via GitHub Issues, but the project is provide
|
|
|
20
20
|
|
|
21
21
|
Audio is sent to configured transcription/AI providers during processing. Optional speaker identity embeddings run locally when installed with the `identity` extra. External providers may charge usage-based fees depending on your provider account, pricing plan, and processing volume.
|
|
22
22
|
|
|
23
|
+
Glossary/keyterm hints are sent to ElevenLabs when processing new recordings. ElevenLabs currently documents this as a billable add-on to speech-to-text usage.
|
|
24
|
+
|
|
23
25
|
## Development Transparency
|
|
24
26
|
|
|
25
27
|
This project was developed as an agentic coding project using [OpenCode](https://opencode.ai/) with [OpenAI](https://openai.com/) GPT-5.5. Code quality checks were supported by CodeScene's [CodeHealth](https://codescene.com/product/code-health) analysis.
|
|
@@ -241,6 +243,32 @@ fow people embeddings status
|
|
|
241
243
|
fow people embeddings backfill
|
|
242
244
|
```
|
|
243
245
|
|
|
246
|
+
## Glossary
|
|
247
|
+
|
|
248
|
+
Use the glossary for names, company names, project names, product names, acronyms, and domain-specific phrases that transcription or cleanup models may spell incorrectly.
|
|
249
|
+
|
|
250
|
+
Add terms with optional context:
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
fow glossary add "Hejare" --description "Company name"
|
|
254
|
+
fow glossary add "Datadrivna" --description "The phrase data driven in Swedish"
|
|
255
|
+
fow glossary add "Ants" --description "Company name"
|
|
256
|
+
fow glossary add "TT" --description "Company name, short for Theodora Tech"
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
Manage terms:
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
fow glossary list
|
|
263
|
+
fow glossary show "Hejare"
|
|
264
|
+
fow glossary update "TT" --description "Company name, short for Theodora Tech"
|
|
265
|
+
fow glossary disable "Ants"
|
|
266
|
+
fow glossary enable "Ants"
|
|
267
|
+
fow glossary remove "Ants"
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
During processing, `fow` combines enabled glossary terms with known people names. The combined list is sent to ElevenLabs as transcription keyterms for new transcriptions, and to OpenAI cleanup, analysis, and title generation as spelling/context guidance. Corrections are model-mediated; `fow` does not do deterministic search-and-replace from the glossary.
|
|
271
|
+
|
|
244
272
|
## Watched Folders
|
|
245
273
|
|
|
246
274
|
Fly on the Wall can watch local folders, mounted Dropbox/rclone folders, and removable recorder folders.
|
|
@@ -9,6 +9,7 @@ from rich.table import Table
|
|
|
9
9
|
|
|
10
10
|
from fly_on_the_wall import __version__
|
|
11
11
|
from fly_on_the_wall.cli_costs import costs_app
|
|
12
|
+
from fly_on_the_wall.cli_glossary import glossary_app
|
|
12
13
|
from fly_on_the_wall.cli_publish import publish_app
|
|
13
14
|
from fly_on_the_wall.cli_speaker_review import speakers_review
|
|
14
15
|
from fly_on_the_wall.cli_watch import watch_app
|
|
@@ -78,6 +79,7 @@ app.add_typer(meetings_app, name="meetings")
|
|
|
78
79
|
meetings_app.add_typer(meeting_speakers_app, name="speakers")
|
|
79
80
|
app.add_typer(refresh_app, name="refresh")
|
|
80
81
|
app.add_typer(secrets_app, name="secrets")
|
|
82
|
+
app.add_typer(glossary_app, name="glossary")
|
|
81
83
|
app.add_typer(watch_app, name="watch")
|
|
82
84
|
app.add_typer(publish_app, name="publish")
|
|
83
85
|
app.add_typer(costs_app, name="costs")
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Annotated
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
|
|
9
|
+
from fly_on_the_wall.db import database
|
|
10
|
+
from fly_on_the_wall.glossary import (
|
|
11
|
+
create_glossary_term,
|
|
12
|
+
get_glossary_term,
|
|
13
|
+
list_glossary_terms,
|
|
14
|
+
remove_glossary_term,
|
|
15
|
+
update_glossary_term,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
glossary_app = typer.Typer(help="Manage transcription and cleanup glossary terms.", no_args_is_help=True)
|
|
19
|
+
console = Console()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@glossary_app.command("add")
|
|
23
|
+
def glossary_add(
|
|
24
|
+
term: str,
|
|
25
|
+
description: Annotated[str | None, typer.Option("--description", "-d", help="Optional context.")] = None,
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Add a word or phrase to the glossary."""
|
|
28
|
+
with database() as connection:
|
|
29
|
+
try:
|
|
30
|
+
created = create_glossary_term(connection, term, description)
|
|
31
|
+
except ValueError as exc:
|
|
32
|
+
console.print(str(exc))
|
|
33
|
+
raise typer.Exit(code=1) from exc
|
|
34
|
+
console.print(f"Added glossary term: {created.term}")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@glossary_app.command("list")
|
|
38
|
+
def glossary_list(
|
|
39
|
+
all_terms: Annotated[bool, typer.Option("--all", help="Include disabled terms.")] = False,
|
|
40
|
+
) -> None:
|
|
41
|
+
"""List glossary terms."""
|
|
42
|
+
with database() as connection:
|
|
43
|
+
terms = list_glossary_terms(connection, include_disabled=all_terms)
|
|
44
|
+
if not terms:
|
|
45
|
+
console.print("No glossary terms found.")
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
table = Table(title="Glossary")
|
|
49
|
+
table.add_column("Term")
|
|
50
|
+
table.add_column("Description")
|
|
51
|
+
table.add_column("Enabled")
|
|
52
|
+
for term in terms:
|
|
53
|
+
table.add_row(term.term, term.description or "", "yes" if term.enabled else "no")
|
|
54
|
+
console.print(table)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@glossary_app.command("show")
|
|
58
|
+
def glossary_show(term: str) -> None:
|
|
59
|
+
"""Show one glossary term."""
|
|
60
|
+
with database() as connection:
|
|
61
|
+
found = get_glossary_term(connection, term)
|
|
62
|
+
if found is None:
|
|
63
|
+
console.print(f"Glossary term not found: {term}")
|
|
64
|
+
raise typer.Exit(code=1)
|
|
65
|
+
console.print(f"Term: {found.term}")
|
|
66
|
+
console.print(f"Description: {found.description or ''}")
|
|
67
|
+
console.print(f"Enabled: {'yes' if found.enabled else 'no'}")
|
|
68
|
+
console.print(f"ID: {found.id}")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@glossary_app.command("update")
|
|
72
|
+
def glossary_update(
|
|
73
|
+
term: str,
|
|
74
|
+
new_term: Annotated[str | None, typer.Option("--term", help="Replace the glossary term text.")] = None,
|
|
75
|
+
description: Annotated[str | None, typer.Option("--description", "-d", help="Replace the description.")] = None,
|
|
76
|
+
) -> None:
|
|
77
|
+
"""Update a glossary term or description."""
|
|
78
|
+
with database() as connection:
|
|
79
|
+
try:
|
|
80
|
+
updated = update_glossary_term(connection, term, term=new_term, description=description)
|
|
81
|
+
except ValueError as exc:
|
|
82
|
+
console.print(str(exc))
|
|
83
|
+
raise typer.Exit(code=1) from exc
|
|
84
|
+
console.print(f"Updated glossary term: {updated.term}")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@glossary_app.command("enable")
|
|
88
|
+
def glossary_enable(term: str) -> None:
|
|
89
|
+
"""Enable a glossary term."""
|
|
90
|
+
_set_enabled(term, True)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@glossary_app.command("disable")
|
|
94
|
+
def glossary_disable(term: str) -> None:
|
|
95
|
+
"""Disable a glossary term without deleting it."""
|
|
96
|
+
_set_enabled(term, False)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@glossary_app.command("remove")
|
|
100
|
+
def glossary_remove(
|
|
101
|
+
term: str,
|
|
102
|
+
yes: Annotated[bool, typer.Option("--yes", "-y", help="Remove without confirmation.")] = False,
|
|
103
|
+
) -> None:
|
|
104
|
+
"""Remove a glossary term."""
|
|
105
|
+
if not yes and not typer.confirm(f"Remove glossary term '{term}'?", default=False):
|
|
106
|
+
console.print("Cancelled.")
|
|
107
|
+
return
|
|
108
|
+
with database() as connection:
|
|
109
|
+
removed = remove_glossary_term(connection, term)
|
|
110
|
+
if not removed:
|
|
111
|
+
console.print(f"Glossary term not found: {term}")
|
|
112
|
+
raise typer.Exit(code=1)
|
|
113
|
+
console.print(f"Removed glossary term: {term}")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _set_enabled(term: str, enabled: bool) -> None:
|
|
117
|
+
with database() as connection:
|
|
118
|
+
try:
|
|
119
|
+
updated = update_glossary_term(connection, term, enabled=enabled)
|
|
120
|
+
except ValueError as exc:
|
|
121
|
+
console.print(str(exc))
|
|
122
|
+
raise typer.Exit(code=1) from exc
|
|
123
|
+
state = "Enabled" if enabled else "Disabled"
|
|
124
|
+
console.print(f"{state} glossary term: {updated.term}")
|
|
@@ -8,7 +8,7 @@ from pathlib import Path
|
|
|
8
8
|
|
|
9
9
|
from fly_on_the_wall.storage import ensure_storage_layout, storage_paths
|
|
10
10
|
|
|
11
|
-
SCHEMA_VERSION =
|
|
11
|
+
SCHEMA_VERSION = 18
|
|
12
12
|
|
|
13
13
|
SCHEMA_STATEMENTS = (
|
|
14
14
|
"""
|
|
@@ -43,6 +43,16 @@ SCHEMA_STATEMENTS = (
|
|
|
43
43
|
)
|
|
44
44
|
""",
|
|
45
45
|
"""
|
|
46
|
+
CREATE TABLE IF NOT EXISTS glossary_terms (
|
|
47
|
+
id TEXT PRIMARY KEY,
|
|
48
|
+
term TEXT NOT NULL UNIQUE,
|
|
49
|
+
description TEXT,
|
|
50
|
+
enabled INTEGER NOT NULL DEFAULT 1,
|
|
51
|
+
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
52
|
+
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
53
|
+
)
|
|
54
|
+
""",
|
|
55
|
+
"""
|
|
46
56
|
CREATE TABLE IF NOT EXISTS pipeline_stages (
|
|
47
57
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
48
58
|
meeting_id TEXT NOT NULL,
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from sqlite3 import Connection
|
|
6
|
+
from typing import Any
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
UNSUPPORTED_KEYTERM_CHARS = set("<>{}[]\\")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class GlossaryTerm:
|
|
16
|
+
id: str
|
|
17
|
+
term: str
|
|
18
|
+
description: str | None
|
|
19
|
+
enabled: bool
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def create_glossary_term(connection: Connection, term: str, description: str | None = None) -> GlossaryTerm:
|
|
23
|
+
normalized = _normalize_term(term)
|
|
24
|
+
normalized_description = _normalize_optional(description)
|
|
25
|
+
term_id = str(uuid4())
|
|
26
|
+
with connection:
|
|
27
|
+
connection.execute(
|
|
28
|
+
"""
|
|
29
|
+
INSERT INTO glossary_terms(id, term, description)
|
|
30
|
+
VALUES (?, ?, ?)
|
|
31
|
+
""",
|
|
32
|
+
(term_id, normalized, normalized_description),
|
|
33
|
+
)
|
|
34
|
+
return get_glossary_term(connection, normalized) # type: ignore[return-value]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_glossary_term(connection: Connection, term_or_id: str) -> GlossaryTerm | None:
|
|
38
|
+
row = connection.execute(
|
|
39
|
+
"""
|
|
40
|
+
SELECT * FROM glossary_terms
|
|
41
|
+
WHERE id = ? OR term = ?
|
|
42
|
+
""",
|
|
43
|
+
(term_or_id, term_or_id),
|
|
44
|
+
).fetchone()
|
|
45
|
+
return _term_from_row(row) if row is not None else None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def list_glossary_terms(connection: Connection, include_disabled: bool = False) -> list[GlossaryTerm]:
|
|
49
|
+
query = "SELECT * FROM glossary_terms"
|
|
50
|
+
if not include_disabled:
|
|
51
|
+
query += " WHERE enabled = 1"
|
|
52
|
+
query += " ORDER BY lower(term)"
|
|
53
|
+
return [_term_from_row(row) for row in connection.execute(query).fetchall()]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def update_glossary_term(
|
|
57
|
+
connection: Connection,
|
|
58
|
+
term_or_id: str,
|
|
59
|
+
*,
|
|
60
|
+
term: str | None = None,
|
|
61
|
+
description: str | None = None,
|
|
62
|
+
enabled: bool | None = None,
|
|
63
|
+
) -> GlossaryTerm:
|
|
64
|
+
existing = get_glossary_term(connection, term_or_id)
|
|
65
|
+
if existing is None:
|
|
66
|
+
raise ValueError(f"Glossary term not found: {term_or_id}")
|
|
67
|
+
|
|
68
|
+
updated_term = existing.term if term is None else _normalize_term(term)
|
|
69
|
+
updated_description = existing.description if description is None else _normalize_optional(description)
|
|
70
|
+
updated_enabled = existing.enabled if enabled is None else enabled
|
|
71
|
+
with connection:
|
|
72
|
+
connection.execute(
|
|
73
|
+
"""
|
|
74
|
+
UPDATE glossary_terms
|
|
75
|
+
SET term = ?,
|
|
76
|
+
description = ?,
|
|
77
|
+
enabled = ?,
|
|
78
|
+
updated_at = CURRENT_TIMESTAMP
|
|
79
|
+
WHERE id = ?
|
|
80
|
+
""",
|
|
81
|
+
(updated_term, updated_description, int(updated_enabled), existing.id),
|
|
82
|
+
)
|
|
83
|
+
return get_glossary_term(connection, existing.id) # type: ignore[return-value]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def remove_glossary_term(connection: Connection, term_or_id: str) -> bool:
|
|
87
|
+
existing = get_glossary_term(connection, term_or_id)
|
|
88
|
+
if existing is None:
|
|
89
|
+
return False
|
|
90
|
+
with connection:
|
|
91
|
+
connection.execute("DELETE FROM glossary_terms WHERE id = ?", (existing.id,))
|
|
92
|
+
return True
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def glossary_prompt_lines(connection: Connection, legacy_glossary_path: Path | None = None) -> list[str]:
|
|
96
|
+
lines: list[str] = []
|
|
97
|
+
seen: set[str] = set()
|
|
98
|
+
|
|
99
|
+
for item in list_glossary_terms(connection):
|
|
100
|
+
key = item.term.casefold()
|
|
101
|
+
if key in seen:
|
|
102
|
+
continue
|
|
103
|
+
seen.add(key)
|
|
104
|
+
if item.description:
|
|
105
|
+
lines.append(f"{item.term}: {item.description}")
|
|
106
|
+
else:
|
|
107
|
+
lines.append(item.term)
|
|
108
|
+
|
|
109
|
+
for term in load_glossary_terms(legacy_glossary_path):
|
|
110
|
+
key = term.casefold()
|
|
111
|
+
if key not in seen:
|
|
112
|
+
seen.add(key)
|
|
113
|
+
lines.append(term)
|
|
114
|
+
|
|
115
|
+
for name in _people_names(connection):
|
|
116
|
+
key = name.casefold()
|
|
117
|
+
if key not in seen:
|
|
118
|
+
seen.add(key)
|
|
119
|
+
lines.append(name)
|
|
120
|
+
|
|
121
|
+
return lines
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def transcription_keyterms(connection: Connection, legacy_glossary_path: Path | None = None) -> list[str]:
|
|
125
|
+
terms: list[str] = []
|
|
126
|
+
seen: set[str] = set()
|
|
127
|
+
for item in list_glossary_terms(connection):
|
|
128
|
+
_append_keyterm(terms, seen, item.term)
|
|
129
|
+
for term in load_glossary_terms(legacy_glossary_path):
|
|
130
|
+
_append_keyterm(terms, seen, term)
|
|
131
|
+
for name in _people_names(connection):
|
|
132
|
+
_append_keyterm(terms, seen, name)
|
|
133
|
+
return terms[:1000]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def load_glossary_terms(path: Path | None) -> list[str]:
|
|
137
|
+
if path is None or not path.exists():
|
|
138
|
+
return []
|
|
139
|
+
data = yaml.safe_load(path.read_text())
|
|
140
|
+
return sorted(set(_collect_terms(data)), key=str.casefold)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _append_keyterm(terms: list[str], seen: set[str], value: str) -> None:
|
|
144
|
+
normalized = " ".join(value.split())
|
|
145
|
+
key = normalized.casefold()
|
|
146
|
+
if key in seen or not _valid_keyterm(normalized):
|
|
147
|
+
return
|
|
148
|
+
seen.add(key)
|
|
149
|
+
terms.append(normalized)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _valid_keyterm(value: str) -> bool:
|
|
153
|
+
return (
|
|
154
|
+
bool(value)
|
|
155
|
+
and len(value) < 50
|
|
156
|
+
and len(value.split()) <= 5
|
|
157
|
+
and not any(char in value for char in UNSUPPORTED_KEYTERM_CHARS)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _people_names(connection: Connection) -> list[str]:
|
|
162
|
+
return [
|
|
163
|
+
str(row["display_name"])
|
|
164
|
+
for row in connection.execute("SELECT display_name FROM people ORDER BY lower(display_name)").fetchall()
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _normalize_term(value: str) -> str:
|
|
169
|
+
normalized = " ".join(value.split())
|
|
170
|
+
if not normalized:
|
|
171
|
+
raise ValueError("Glossary term cannot be empty")
|
|
172
|
+
return normalized
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _normalize_optional(value: str | None) -> str | None:
|
|
176
|
+
if value is None:
|
|
177
|
+
return None
|
|
178
|
+
normalized = " ".join(value.split())
|
|
179
|
+
return normalized or None
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _term_from_row(row: Any) -> GlossaryTerm:
|
|
183
|
+
return GlossaryTerm(
|
|
184
|
+
id=row["id"],
|
|
185
|
+
term=row["term"],
|
|
186
|
+
description=row["description"],
|
|
187
|
+
enabled=bool(row["enabled"]),
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _collect_terms(value: Any) -> list[str]:
|
|
192
|
+
if value is None:
|
|
193
|
+
return []
|
|
194
|
+
if isinstance(value, str):
|
|
195
|
+
normalized = " ".join(value.split())
|
|
196
|
+
return [normalized] if normalized else []
|
|
197
|
+
if isinstance(value, list):
|
|
198
|
+
terms: list[str] = []
|
|
199
|
+
for item in value:
|
|
200
|
+
terms.extend(_collect_terms(item))
|
|
201
|
+
return terms
|
|
202
|
+
if isinstance(value, dict):
|
|
203
|
+
terms = []
|
|
204
|
+
for item in value.values():
|
|
205
|
+
terms.extend(_collect_terms(item))
|
|
206
|
+
return terms
|
|
207
|
+
return []
|
|
@@ -13,7 +13,7 @@ from fly_on_the_wall.config import AppConfig
|
|
|
13
13
|
from fly_on_the_wall.costs import record_openai_usage
|
|
14
14
|
from fly_on_the_wall.embeddings import EmbeddingBackend
|
|
15
15
|
from fly_on_the_wall.exporting import ExportResult, export_markdown_transcript
|
|
16
|
-
from fly_on_the_wall.glossary import
|
|
16
|
+
from fly_on_the_wall.glossary import glossary_prompt_lines, transcription_keyterms
|
|
17
17
|
from fly_on_the_wall.meetings import (
|
|
18
18
|
Meeting,
|
|
19
19
|
get_meeting,
|
|
@@ -107,8 +107,17 @@ def process_audio(
|
|
|
107
107
|
existing_provider_run = latest_completed_provider_run(connection, meeting.id)
|
|
108
108
|
if existing_provider_run is None:
|
|
109
109
|
with timed_progress.step("Transcribing audio with ElevenLabs"):
|
|
110
|
-
|
|
111
|
-
|
|
110
|
+
if transcribe_fn is not None:
|
|
111
|
+
provider_run_id = transcribe_fn(connection, meeting.id, meeting.imported_audio_path, paths)
|
|
112
|
+
else:
|
|
113
|
+
keyterms = transcription_keyterms(connection, config.glossary_path)
|
|
114
|
+
provider_run_id = _run_elevenlabs_transcription(
|
|
115
|
+
connection,
|
|
116
|
+
meeting.id,
|
|
117
|
+
meeting.imported_audio_path,
|
|
118
|
+
paths,
|
|
119
|
+
keyterms,
|
|
120
|
+
)
|
|
112
121
|
else:
|
|
113
122
|
timed_progress.message("Reusing completed ElevenLabs transcription")
|
|
114
123
|
provider_run_id = existing_provider_run["id"]
|
|
@@ -200,7 +209,7 @@ def _cleanup_transcript(context: RefreshContext, deterministic_transcript: str)
|
|
|
200
209
|
if context.config.cleanup_mode != "light" or not get_api_key("openai"):
|
|
201
210
|
return TranscriptArtifacts(deterministic_transcript, deterministic_transcript)
|
|
202
211
|
|
|
203
|
-
glossary_terms =
|
|
212
|
+
glossary_terms = glossary_prompt_lines(context.connection, context.config.glossary_path)
|
|
204
213
|
cleanup_cache_key = text_sha256(
|
|
205
214
|
"\n".join(
|
|
206
215
|
[
|
|
@@ -260,7 +269,10 @@ def _suggest_and_apply_title(
|
|
|
260
269
|
if meeting.get("title_source") == "manual":
|
|
261
270
|
return
|
|
262
271
|
|
|
263
|
-
|
|
272
|
+
glossary_terms = glossary_prompt_lines(context.connection, context.config.glossary_path)
|
|
273
|
+
title_cache_key = text_sha256(
|
|
274
|
+
"\n".join([DEFAULT_ANALYSIS_MODEL, context.description or "", "\n".join(glossary_terms), transcript, analysis])
|
|
275
|
+
)
|
|
264
276
|
title_cache_dir = context.paths.artifacts / context.meeting.id / "generated-title"
|
|
265
277
|
cached_title = read_cached_text(title_cache_dir, title_cache_key)
|
|
266
278
|
if cached_title is not None:
|
|
@@ -274,6 +286,7 @@ def _suggest_and_apply_title(
|
|
|
274
286
|
transcript,
|
|
275
287
|
analysis,
|
|
276
288
|
meeting_context=context.description,
|
|
289
|
+
glossary_terms=glossary_terms,
|
|
277
290
|
options=OpenAIRequestOptions(
|
|
278
291
|
usage_callback=lambda response: _record_openai_usage(
|
|
279
292
|
context, DEFAULT_ANALYSIS_MODEL, "title", response
|
|
@@ -291,9 +304,13 @@ def _suggest_and_apply_title(
|
|
|
291
304
|
|
|
292
305
|
|
|
293
306
|
def _run_elevenlabs_transcription(
|
|
294
|
-
connection: Connection,
|
|
307
|
+
connection: Connection,
|
|
308
|
+
meeting_id: str,
|
|
309
|
+
audio_path: Path,
|
|
310
|
+
storage: StoragePaths,
|
|
311
|
+
keyterms: list[str] | None = None,
|
|
295
312
|
) -> str:
|
|
296
|
-
return run_transcription(connection, meeting_id, audio_path, storage)
|
|
313
|
+
return run_transcription(connection, meeting_id, audio_path, storage, keyterms=keyterms)
|
|
297
314
|
|
|
298
315
|
|
|
299
316
|
def _meeting_from_database(connection: Connection, meeting_id: str) -> Meeting:
|
|
@@ -398,7 +415,10 @@ def _analyze_transcript(
|
|
|
398
415
|
if not get_api_key("openai"):
|
|
399
416
|
return fallback_analysis("OPENAI_API_KEY is missing")
|
|
400
417
|
|
|
401
|
-
|
|
418
|
+
glossary_terms = glossary_prompt_lines(context.connection, context.config.glossary_path)
|
|
419
|
+
analysis_cache_key = text_sha256(
|
|
420
|
+
"\n".join([DEFAULT_ANALYSIS_MODEL, context.description or "", "\n".join(glossary_terms), transcript])
|
|
421
|
+
)
|
|
402
422
|
analysis_cache_dir = context.paths.artifacts / context.meeting.id / "analysis"
|
|
403
423
|
cached_analysis = read_cached_text(analysis_cache_dir, analysis_cache_key)
|
|
404
424
|
if cached_analysis is not None:
|
|
@@ -411,6 +431,7 @@ def _analyze_transcript(
|
|
|
411
431
|
AnalysisRequest(
|
|
412
432
|
transcript,
|
|
413
433
|
meeting_context=context.description,
|
|
434
|
+
glossary_terms=glossary_terms,
|
|
414
435
|
options=OpenAIRequestOptions(
|
|
415
436
|
usage_callback=lambda response: _record_openai_usage(
|
|
416
437
|
context, DEFAULT_ANALYSIS_MODEL, "analysis", response
|
|
@@ -28,6 +28,7 @@ def transcribe_audio(
|
|
|
28
28
|
num_speakers: int | None = None,
|
|
29
29
|
diarization_threshold: float | None = None,
|
|
30
30
|
no_verbatim: bool = False,
|
|
31
|
+
keyterms: list[str] | None = None,
|
|
31
32
|
) -> dict[str, Any]:
|
|
32
33
|
resolved_api_key = api_key or get_api_key(PROVIDER)
|
|
33
34
|
if not resolved_api_key:
|
|
@@ -46,6 +47,8 @@ def transcribe_audio(
|
|
|
46
47
|
data["num_speakers"] = str(num_speakers)
|
|
47
48
|
if diarization_threshold is not None:
|
|
48
49
|
data["diarization_threshold"] = str(diarization_threshold)
|
|
50
|
+
if keyterms:
|
|
51
|
+
data["keyterms"] = json.dumps(keyterms, ensure_ascii=False)
|
|
49
52
|
|
|
50
53
|
close_client = client is None
|
|
51
54
|
http_client = client or httpx.Client(timeout=600)
|
|
@@ -76,15 +79,17 @@ def run_transcription(
|
|
|
76
79
|
storage: StoragePaths | None = None,
|
|
77
80
|
client: httpx.Client | None = None,
|
|
78
81
|
api_key: str | None = None,
|
|
82
|
+
keyterms: list[str] | None = None,
|
|
79
83
|
) -> str:
|
|
80
84
|
paths = storage or storage_paths()
|
|
81
85
|
provider_run_id = str(uuid4())
|
|
82
86
|
raw_response_path = paths.artifacts / meeting_id / "provider-runs" / f"{provider_run_id}.raw.json"
|
|
83
87
|
raw_response_path.parent.mkdir(parents=True, exist_ok=True)
|
|
84
88
|
|
|
85
|
-
|
|
89
|
+
settings = {"keyterms": keyterms or []}
|
|
90
|
+
_insert_provider_run(connection, provider_run_id, meeting_id, raw_response_path, "running", settings)
|
|
86
91
|
try:
|
|
87
|
-
response = transcribe_audio(audio_path, api_key=api_key, client=client)
|
|
92
|
+
response = transcribe_audio(audio_path, api_key=api_key, client=client, keyterms=keyterms)
|
|
88
93
|
raw_response_path.write_text(json.dumps(response, indent=2, ensure_ascii=False) + "\n")
|
|
89
94
|
duration = float(response.get("audio_duration_secs") or 0)
|
|
90
95
|
record_service_usage(
|
|
@@ -112,6 +117,7 @@ def _insert_provider_run(
|
|
|
112
117
|
meeting_id: str,
|
|
113
118
|
raw_response_path: Path,
|
|
114
119
|
status: str,
|
|
120
|
+
settings: dict[str, Any] | None = None,
|
|
115
121
|
) -> None:
|
|
116
122
|
with connection:
|
|
117
123
|
connection.execute(
|
|
@@ -121,11 +127,20 @@ def _insert_provider_run(
|
|
|
121
127
|
meeting_id,
|
|
122
128
|
provider,
|
|
123
129
|
model,
|
|
130
|
+
settings_json,
|
|
124
131
|
raw_response_path,
|
|
125
132
|
status
|
|
126
|
-
) VALUES (?, ?, ?, ?, ?, ?)
|
|
133
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
127
134
|
""",
|
|
128
|
-
(
|
|
135
|
+
(
|
|
136
|
+
provider_run_id,
|
|
137
|
+
meeting_id,
|
|
138
|
+
PROVIDER,
|
|
139
|
+
MODEL,
|
|
140
|
+
json.dumps(settings or {}, sort_keys=True),
|
|
141
|
+
str(raw_response_path),
|
|
142
|
+
status,
|
|
143
|
+
),
|
|
129
144
|
)
|
|
130
145
|
|
|
131
146
|
|
|
@@ -28,6 +28,7 @@ class OpenAIRequestOptions:
|
|
|
28
28
|
class AnalysisRequest:
|
|
29
29
|
transcript_markdown: str
|
|
30
30
|
meeting_context: str | None = None
|
|
31
|
+
glossary_terms: list[str] | None = None
|
|
31
32
|
options: OpenAIRequestOptions = field(default_factory=OpenAIRequestOptions)
|
|
32
33
|
|
|
33
34
|
|
|
@@ -36,6 +37,7 @@ class TitleRequest:
|
|
|
36
37
|
transcript_markdown: str
|
|
37
38
|
analysis_markdown: str
|
|
38
39
|
meeting_context: str | None = None
|
|
40
|
+
glossary_terms: list[str] | None = None
|
|
39
41
|
options: OpenAIRequestOptions = field(default_factory=OpenAIRequestOptions)
|
|
40
42
|
|
|
41
43
|
|
|
@@ -50,7 +52,7 @@ class ChatCompletionRequest:
|
|
|
50
52
|
def analyze_meeting(request: AnalysisRequest) -> str:
|
|
51
53
|
return _post_chat_completion(
|
|
52
54
|
ChatCompletionRequest(
|
|
53
|
-
system_prompt=_system_prompt(request.meeting_context),
|
|
55
|
+
system_prompt=_system_prompt(request.meeting_context, request.glossary_terms),
|
|
54
56
|
user_prompt=request.transcript_markdown,
|
|
55
57
|
options=request.options,
|
|
56
58
|
timeout_seconds=180,
|
|
@@ -61,7 +63,7 @@ def analyze_meeting(request: AnalysisRequest) -> str:
|
|
|
61
63
|
def suggest_meeting_title(request: TitleRequest) -> str:
|
|
62
64
|
content = _post_chat_completion(
|
|
63
65
|
ChatCompletionRequest(
|
|
64
|
-
system_prompt=_title_system_prompt(request.meeting_context),
|
|
66
|
+
system_prompt=_title_system_prompt(request.meeting_context, request.glossary_terms),
|
|
65
67
|
user_prompt=(f"Transcript:\n{request.transcript_markdown}\n\nAnalysis:\n{request.analysis_markdown}"),
|
|
66
68
|
options=request.options,
|
|
67
69
|
timeout_seconds=60,
|
|
@@ -148,8 +150,9 @@ None identified.
|
|
|
148
150
|
""".strip()
|
|
149
151
|
|
|
150
152
|
|
|
151
|
-
def _system_prompt(meeting_context: str | None) -> str:
|
|
153
|
+
def _system_prompt(meeting_context: str | None, glossary_terms: list[str] | None) -> str:
|
|
152
154
|
context = meeting_context or "none"
|
|
155
|
+
glossary = _format_glossary(glossary_terms)
|
|
153
156
|
return f"""
|
|
154
157
|
You analyze meeting transcripts for a personal note-taker.
|
|
155
158
|
Return concise Markdown with exactly these headings:
|
|
@@ -163,12 +166,17 @@ Return concise Markdown with exactly these headings:
|
|
|
163
166
|
Keep it short and prioritized. Do not invent facts.
|
|
164
167
|
If a section has no useful content, write "None identified."
|
|
165
168
|
For action items, use: - Owner: task. Due: date or Not mentioned.
|
|
169
|
+
Use the glossary spellings when the transcript appears to refer to these names or domain terms.
|
|
170
|
+
Do not insert glossary terms unless the transcript context supports them.
|
|
166
171
|
Meeting context: {context}
|
|
172
|
+
Known names and terms:
|
|
173
|
+
{glossary}
|
|
167
174
|
""".strip()
|
|
168
175
|
|
|
169
176
|
|
|
170
|
-
def _title_system_prompt(meeting_context: str | None) -> str:
|
|
177
|
+
def _title_system_prompt(meeting_context: str | None, glossary_terms: list[str] | None) -> str:
|
|
171
178
|
context = meeting_context or "none"
|
|
179
|
+
glossary = _format_glossary(glossary_terms)
|
|
172
180
|
return f"""
|
|
173
181
|
You name meeting transcripts for a personal note-taker.
|
|
174
182
|
Return only one title, with no Markdown, labels, quotes, or punctuation wrapper.
|
|
@@ -177,10 +185,20 @@ Prefer concrete names, projects, organizations, and topics from the transcript.
|
|
|
177
185
|
Do not include dates unless the date is central to the meeting topic.
|
|
178
186
|
Do not return generic titles like "Meeting Summary" or "Team Meeting".
|
|
179
187
|
If the transcript has no meaningful content, return an empty string.
|
|
188
|
+
Use the glossary spellings when the transcript appears to refer to these names or domain terms.
|
|
189
|
+
Do not insert glossary terms unless the transcript context supports them.
|
|
180
190
|
Meeting context: {context}
|
|
191
|
+
Known names and terms:
|
|
192
|
+
{glossary}
|
|
181
193
|
""".strip()
|
|
182
194
|
|
|
183
195
|
|
|
196
|
+
def _format_glossary(glossary_terms: list[str] | None) -> str:
|
|
197
|
+
if not glossary_terms:
|
|
198
|
+
return "- none"
|
|
199
|
+
return "\n".join(f"- {term}" for term in glossary_terms)
|
|
200
|
+
|
|
201
|
+
|
|
184
202
|
def _extract_content(response: dict[str, Any]) -> str:
|
|
185
203
|
try:
|
|
186
204
|
content = response["choices"][0]["message"]["content"]
|
|
@@ -10,7 +10,7 @@ from fly_on_the_wall.secrets import get_api_key
|
|
|
10
10
|
API_URL = "https://api.openai.com/v1/chat/completions"
|
|
11
11
|
DEFAULT_MODEL = "gpt-5.4-mini"
|
|
12
12
|
DEFAULT_CLEANUP_TIMEOUT_SECONDS = 1800
|
|
13
|
-
CLEANUP_PROMPT_VERSION = "2026-06-
|
|
13
|
+
CLEANUP_PROMPT_VERSION = "2026-06-13-manuscript-cleanup-glossary-v5"
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class OpenAICleanupError(RuntimeError):
|
|
@@ -61,7 +61,7 @@ def cleanup_transcript(
|
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
def _system_prompt(glossary_terms: list[str] | None, meeting_context: str | None) -> str:
|
|
64
|
-
glossary =
|
|
64
|
+
glossary = _format_glossary(glossary_terms)
|
|
65
65
|
context = meeting_context or "none"
|
|
66
66
|
return f"""
|
|
67
67
|
You clean meeting transcripts into readable manuscript-style dialogue.
|
|
@@ -78,12 +78,21 @@ of an idiom, or used with clear literal/comparative meaning, such as "på samma
|
|
|
78
78
|
Prefer complete readable sentences over literal STT fragments, but do not summarize,
|
|
79
79
|
invent details, remove uncertainty markers, or add new content.
|
|
80
80
|
Preserve standalone acknowledgements such as yes/no/okay/mm and Swedish ja/nej/okej/mm.
|
|
81
|
+
Use the glossary spellings when the transcript appears to refer to these names or domain terms.
|
|
82
|
+
Do not insert glossary terms unless the transcript context supports them.
|
|
81
83
|
Return only the cleaned manuscript.
|
|
82
84
|
Meeting context: {context}
|
|
83
|
-
|
|
85
|
+
Known names and terms:
|
|
86
|
+
{glossary}
|
|
84
87
|
""".strip()
|
|
85
88
|
|
|
86
89
|
|
|
90
|
+
def _format_glossary(glossary_terms: list[str] | None) -> str:
|
|
91
|
+
if not glossary_terms:
|
|
92
|
+
return "- none"
|
|
93
|
+
return "\n".join(f"- {term}" for term in glossary_terms)
|
|
94
|
+
|
|
95
|
+
|
|
87
96
|
def _extract_content(response: dict[str, Any]) -> str:
|
|
88
97
|
try:
|
|
89
98
|
content = response["choices"][0]["message"]["content"]
|
|
@@ -133,7 +133,8 @@ def publish_meeting(connection: Connection, meeting_id_or_slug: str, target_iden
|
|
|
133
133
|
analysis_markdown = _read_analysis_markdown(analysis_path)
|
|
134
134
|
manifest = json.loads(manifest_path.read_text())
|
|
135
135
|
output_path = _published_output_path(connection, meeting, target)
|
|
136
|
-
|
|
136
|
+
participants = _meeting_participants(connection, meeting["id"])
|
|
137
|
+
content = _obsidian_note(meeting, transcript_markdown, analysis_markdown, manifest, participants)
|
|
137
138
|
content_hash = _sha256(content)
|
|
138
139
|
|
|
139
140
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -271,7 +272,34 @@ def _published_output_path(connection: Connection, meeting: dict, target: Publis
|
|
|
271
272
|
return target.path / filename
|
|
272
273
|
|
|
273
274
|
|
|
274
|
-
def
|
|
275
|
+
def _meeting_participants(connection: Connection, meeting_id: str) -> list[str]:
|
|
276
|
+
rows = connection.execute(
|
|
277
|
+
"""
|
|
278
|
+
SELECT DISTINCT people.display_name
|
|
279
|
+
FROM speaker_assignments
|
|
280
|
+
JOIN local_speakers ON local_speakers.id = speaker_assignments.local_speaker_id
|
|
281
|
+
JOIN people ON people.id = speaker_assignments.person_id
|
|
282
|
+
WHERE local_speakers.meeting_id = ?
|
|
283
|
+
AND speaker_assignments.status = 'known'
|
|
284
|
+
ORDER BY lower(people.display_name)
|
|
285
|
+
""",
|
|
286
|
+
(meeting_id,),
|
|
287
|
+
).fetchall()
|
|
288
|
+
return [_obsidian_people_link(row["display_name"]) for row in rows]
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _obsidian_people_link(display_name: str) -> str:
|
|
292
|
+
safe_name = display_name.replace("[[", "").replace("]]", "").replace("|", "-").strip()
|
|
293
|
+
return f"[[People/{safe_name}]]"
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _obsidian_note(
|
|
297
|
+
meeting: dict,
|
|
298
|
+
transcript_markdown: str,
|
|
299
|
+
analysis_markdown: str,
|
|
300
|
+
manifest: dict,
|
|
301
|
+
participants: list[str] | None = None,
|
|
302
|
+
) -> str:
|
|
275
303
|
date, time = _date_time(_meeting_timestamp(meeting))
|
|
276
304
|
frontmatter = {
|
|
277
305
|
"title": meeting["title"],
|
|
@@ -284,6 +312,7 @@ def _obsidian_note(meeting: dict, transcript_markdown: str, analysis_markdown: s
|
|
|
284
312
|
"recorded_at": meeting.get("recorded_at"),
|
|
285
313
|
"duration_seconds": meeting.get("duration_seconds"),
|
|
286
314
|
"recording_quality": meeting.get("recording_quality_status"),
|
|
315
|
+
"participants": participants or None,
|
|
287
316
|
"tags": ["meetings", "fly-on-the-wall"],
|
|
288
317
|
}
|
|
289
318
|
lines = ["---", *_yaml_lines(frontmatter), "---", ""]
|
|
@@ -330,7 +359,7 @@ def _yaml_lines(values: dict) -> list[str]:
|
|
|
330
359
|
|
|
331
360
|
def _yaml_scalar(value: object) -> str:
|
|
332
361
|
text = str(value)
|
|
333
|
-
if re.search(r"[:#\n
|
|
362
|
+
if re.search(r"[:#\n,\[\]{}]", text):
|
|
334
363
|
return json.dumps(text, ensure_ascii=False)
|
|
335
364
|
return text
|
|
336
365
|
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Any
|
|
5
|
-
|
|
6
|
-
import yaml
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def load_glossary_terms(path: Path | None) -> list[str]:
|
|
10
|
-
if path is None or not path.exists():
|
|
11
|
-
return []
|
|
12
|
-
data = yaml.safe_load(path.read_text())
|
|
13
|
-
return sorted(set(_collect_terms(data)))
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def _collect_terms(value: Any) -> list[str]:
|
|
17
|
-
if value is None:
|
|
18
|
-
return []
|
|
19
|
-
if isinstance(value, str):
|
|
20
|
-
return [value]
|
|
21
|
-
if isinstance(value, list):
|
|
22
|
-
terms: list[str] = []
|
|
23
|
-
for item in value:
|
|
24
|
-
terms.extend(_collect_terms(item))
|
|
25
|
-
return terms
|
|
26
|
-
if isinstance(value, dict):
|
|
27
|
-
terms = []
|
|
28
|
-
for item in value.values():
|
|
29
|
-
terms.extend(_collect_terms(item))
|
|
30
|
-
return terms
|
|
31
|
-
return []
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|