fow-cli 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. fow_cli-0.2.0/CHANGELOG.md +24 -0
  2. {fow_cli-0.1.0 → fow_cli-0.2.0}/PKG-INFO +9 -2
  3. {fow_cli-0.1.0 → fow_cli-0.2.0}/README.md +8 -1
  4. {fow_cli-0.1.0 → fow_cli-0.2.0}/pyproject.toml +25 -1
  5. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/__init__.py +1 -1
  6. fow_cli-0.2.0/src/fly_on_the_wall/api_keys.py +6 -0
  7. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/audio_metadata.py +19 -6
  8. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/cli.py +9 -7
  9. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/cli_menu.py +28 -20
  10. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/cli_watch.py +38 -2
  11. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/config.py +0 -5
  12. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/db.py +10 -3
  13. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/embeddings.py +26 -5
  14. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/processing.py +17 -18
  15. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/providers/openai_analysis.py +5 -4
  16. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/providers/openai_cleanup.py +3 -2
  17. fow_cli-0.2.0/src/fly_on_the_wall/py.typed +0 -0
  18. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/secrets.py +3 -3
  19. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/setup.py +4 -2
  20. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/speaker_matching.py +9 -2
  21. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/watch.py +57 -11
  22. {fow_cli-0.1.0 → fow_cli-0.2.0}/.gitignore +0 -0
  23. {fow_cli-0.1.0 → fow_cli-0.2.0}/LICENSE +0 -0
  24. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/audio.py +0 -0
  25. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/cache.py +0 -0
  26. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/cleanup.py +0 -0
  27. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/cli_costs.py +0 -0
  28. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/cli_publish.py +0 -0
  29. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/cli_speaker_review.py +0 -0
  30. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/costs.py +0 -0
  31. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/doctor.py +0 -0
  32. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/exporting.py +0 -0
  33. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/glossary.py +0 -0
  34. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/meetings.py +0 -0
  35. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/normalization.py +0 -0
  36. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/people.py +0 -0
  37. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/people_embeddings.py +0 -0
  38. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/pipeline.py +0 -0
  39. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/providers/__init__.py +0 -0
  40. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/providers/elevenlabs.py +0 -0
  41. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/publishing.py +0 -0
  42. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/reanalysis.py +0 -0
  43. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/recording_quality.py +0 -0
  44. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/rendering.py +0 -0
  45. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/service_pricing.py +0 -0
  46. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/speaker_identity.py +0 -0
  47. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/speakers.py +0 -0
  48. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/storage.py +0 -0
  49. {fow_cli-0.1.0 → fow_cli-0.2.0}/src/fly_on_the_wall/voice_samples.py +0 -0
@@ -0,0 +1,24 @@
1
+ # Changelog
2
+
3
+ All notable changes to Fly on the Wall are documented here.
4
+
5
+ ## [0.2.0] - 2026-06-09
6
+
7
+ ### Added
8
+
9
+ - Added folder-level `--delete-originals-after-import` support for watched folders.
10
+ - Added `fow watch folders delete-originals-after-import` to toggle original cleanup for existing watch folders.
11
+ - Added a `py.typed` marker so editors and type checkers recognize the package as typed.
12
+ - Added pragmatic `basedpyright` type checking for source files and documented the code quality policy.
13
+
14
+ ### Fixed
15
+
16
+ - Avoided a tight retry loop when the watch backend fails, such as `Too many open files`.
17
+ - Resolved source-level `basedpyright` warnings.
18
+
19
+ ## [0.1.0] - 2026-06-09
20
+
21
+ ### Added
22
+
23
+ - Initial public release of the `fow` CLI as the `fow-cli` PyPI package.
24
+ - Published GitHub repository and release artifacts.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fow-cli
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Personal CLI note-taker for turning meeting audio into cleaned meeting manuscripts.
5
5
  Project-URL: Repository, https://github.com/henriksvensson/fly-on-the-wall
6
6
  License-Expression: MIT
@@ -30,6 +30,10 @@ Description-Content-Type: text/markdown
30
30
 
31
31
  # Fly on the Wall
32
32
 
33
+ [![PyPI](https://img.shields.io/pypi/v/fow-cli.svg)](https://pypi.org/project/fow-cli/)
34
+ [![Python Versions](https://img.shields.io/pypi/pyversions/fow-cli.svg)](https://pypi.org/project/fow-cli/)
35
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
36
+
33
37
  Fly on the Wall is a personal CLI note-taker for meeting audio.
34
38
 
35
39
  It takes local audio recordings, transcribes them, identifies recurring speakers where possible, cleans the transcript, analyzes the meeting, exports durable Markdown artifacts, and can publish readable notes into an Obsidian vault.
@@ -419,8 +423,11 @@ Run lint and formatting checks:
419
423
  ```bash
420
424
  uv run ruff check .
421
425
  uv run ruff format --check .
426
+ uv run basedpyright
422
427
  ```
423
428
 
429
+ `basedpyright` is configured as a pragmatic source-code guardrail. It checks explicit type claims in `src/` without requiring every dynamic SQLite, JSON, or third-party boundary to be fully typed.
430
+
424
431
  Build distribution artifacts:
425
432
 
426
433
  ```bash
@@ -430,7 +437,7 @@ uv build
430
437
  Test a built wheel locally:
431
438
 
432
439
  ```bash
433
- uv tool install dist/fly_on_the_wall-0.1.0-py3-none-any.whl
440
+ uv tool install dist/fow_cli-0.1.0-py3-none-any.whl
434
441
  fow setup
435
442
  ```
436
443
 
@@ -1,5 +1,9 @@
1
1
  # Fly on the Wall
2
2
 
3
+ [![PyPI](https://img.shields.io/pypi/v/fow-cli.svg)](https://pypi.org/project/fow-cli/)
4
+ [![Python Versions](https://img.shields.io/pypi/pyversions/fow-cli.svg)](https://pypi.org/project/fow-cli/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
6
+
3
7
  Fly on the Wall is a personal CLI note-taker for meeting audio.
4
8
 
5
9
  It takes local audio recordings, transcribes them, identifies recurring speakers where possible, cleans the transcript, analyzes the meeting, exports durable Markdown artifacts, and can publish readable notes into an Obsidian vault.
@@ -389,8 +393,11 @@ Run lint and formatting checks:
389
393
  ```bash
390
394
  uv run ruff check .
391
395
  uv run ruff format --check .
396
+ uv run basedpyright
392
397
  ```
393
398
 
399
+ `basedpyright` is configured as a pragmatic source-code guardrail. It checks explicit type claims in `src/` without requiring every dynamic SQLite, JSON, or third-party boundary to be fully typed.
400
+
394
401
  Build distribution artifacts:
395
402
 
396
403
  ```bash
@@ -400,7 +407,7 @@ uv build
400
407
  Test a built wheel locally:
401
408
 
402
409
  ```bash
403
- uv tool install dist/fly_on_the_wall-0.1.0-py3-none-any.whl
410
+ uv tool install dist/fow_cli-0.1.0-py3-none-any.whl
404
411
  fow setup
405
412
  ```
406
413
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "fow-cli"
3
- version = "0.1.0"
3
+ version = "0.2.0"
4
4
  description = "Personal CLI note-taker for turning meeting audio into cleaned meeting manuscripts."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -41,6 +41,7 @@ fow = "fly_on_the_wall.cli:app"
41
41
 
42
42
  [dependency-groups]
43
43
  dev = [
44
+ "basedpyright>=1.39.7",
44
45
  "pre-commit>=4.0.0",
45
46
  "pytest>=8.0.0",
46
47
  "ruff>=0.8.0",
@@ -58,6 +59,7 @@ ignore-vcs = true
58
59
  only-include = [
59
60
  "src/fly_on_the_wall",
60
61
  "README.md",
62
+ "CHANGELOG.md",
61
63
  "LICENSE",
62
64
  "pyproject.toml",
63
65
  ]
@@ -74,3 +76,25 @@ select = ["E", "F", "I", "UP", "B"]
74
76
  [tool.pytest.ini_options]
75
77
  testpaths = ["tests"]
76
78
  addopts = "-q"
79
+
80
+ [tool.basedpyright]
81
+ include = ["src"]
82
+ exclude = ["tests"]
83
+ reportAny = "none"
84
+ reportExplicitAny = "none"
85
+ reportUnknownVariableType = "none"
86
+ reportUnknownMemberType = "none"
87
+ reportUnknownArgumentType = "none"
88
+ reportUnknownParameterType = "none"
89
+ reportMissingParameterType = "none"
90
+ reportMissingTypeArgument = "none"
91
+ reportUnusedCallResult = "none"
92
+ reportUnannotatedClassAttribute = "none"
93
+ reportArgumentType = "error"
94
+ reportAssignmentType = "error"
95
+ reportReturnType = "error"
96
+ reportOperatorIssue = "error"
97
+ reportOptionalMemberAccess = "error"
98
+ reportAttributeAccessIssue = "error"
99
+ reportCallIssue = "error"
100
+ reportImportCycles = "error"
@@ -1,3 +1,3 @@
1
1
  """Fly on the Wall CLI application."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.2.0"
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ API_KEY_ENV_VARS: dict[str, str] = {
4
+ "elevenlabs": "ELEVENLABS_API_KEY",
5
+ "openai": "OPENAI_API_KEY",
6
+ }
@@ -6,6 +6,7 @@ from dataclasses import dataclass
6
6
  from datetime import datetime
7
7
  from pathlib import Path
8
8
  from sqlite3 import Connection
9
+ from typing import Any, cast
9
10
 
10
11
  from fly_on_the_wall.audio import AudioError, probe_metadata
11
12
  from fly_on_the_wall.storage import StoragePaths
@@ -103,9 +104,13 @@ def extract_and_store_audio_metadata(
103
104
  )
104
105
 
105
106
 
106
- def normalize_audio_metadata(raw_metadata: dict, audio_path: Path) -> NormalizedAudioMetadata:
107
+ JsonObject = dict[str, Any]
108
+
109
+
110
+ def normalize_audio_metadata(raw_metadata: JsonObject, audio_path: Path) -> NormalizedAudioMetadata:
107
111
  audio_stream = _first_audio_stream(raw_metadata)
108
- format_data = raw_metadata.get("format") if isinstance(raw_metadata.get("format"), dict) else {}
112
+ raw_format = raw_metadata.get("format")
113
+ format_data: JsonObject = cast(JsonObject, raw_format) if isinstance(raw_format, dict) else {}
109
114
  format_tags = _normalized_tags(format_data.get("tags"))
110
115
  stream_tags = _normalized_tags(audio_stream.get("tags"))
111
116
  tags = {**stream_tags, **format_tags}
@@ -133,13 +138,13 @@ def normalize_audio_metadata(raw_metadata: dict, audio_path: Path) -> Normalized
133
138
  )
134
139
 
135
140
 
136
- def _first_audio_stream(raw_metadata: dict) -> dict:
141
+ def _first_audio_stream(raw_metadata: JsonObject) -> JsonObject:
137
142
  streams = raw_metadata.get("streams")
138
143
  if not isinstance(streams, list):
139
144
  return {}
140
145
  for stream in streams:
141
146
  if isinstance(stream, dict) and stream.get("codec_type") == "audio":
142
- return stream
147
+ return cast(JsonObject, stream)
143
148
  return {}
144
149
 
145
150
 
@@ -228,14 +233,22 @@ def _optional_str(value: object) -> str | None:
228
233
 
229
234
 
230
235
  def _optional_int(value: object) -> int | None:
236
+ if value is None:
237
+ return None
231
238
  try:
232
- return int(value) if value is not None else None
239
+ if isinstance(value, int | float | str | bytes | bytearray):
240
+ return int(value)
241
+ return int(str(value))
233
242
  except (TypeError, ValueError):
234
243
  return None
235
244
 
236
245
 
237
246
  def _optional_float(value: object) -> float | None:
247
+ if value is None:
248
+ return None
238
249
  try:
239
- return float(value) if value is not None else None
250
+ if isinstance(value, int | float | str | bytes | bytearray):
251
+ return float(value)
252
+ return float(str(value))
240
253
  except (TypeError, ValueError):
241
254
  return None
@@ -93,13 +93,15 @@ def _version_callback(show_version: bool) -> None:
93
93
 
94
94
  @app.callback()
95
95
  def main(
96
- version: bool = typer.Option(
97
- False,
98
- "--version",
99
- callback=_version_callback,
100
- is_eager=True,
101
- help="Show the application version.",
102
- ),
96
+ _version: Annotated[
97
+ bool,
98
+ typer.Option(
99
+ "--version",
100
+ callback=_version_callback,
101
+ is_eager=True,
102
+ help="Show the application version.",
103
+ ),
104
+ ] = False,
103
105
  ) -> None:
104
106
  """Run Fly on the Wall commands."""
105
107
 
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import subprocess
4
4
  import threading
5
+ from collections.abc import Callable
5
6
  from dataclasses import dataclass
6
7
  from pathlib import Path
7
8
 
@@ -55,25 +56,11 @@ class InteractiveMenu:
55
56
  )
56
57
 
57
58
  def _bind_navigation_keys(self) -> None:
58
- @self.key_bindings.add("up")
59
- def _up(_event) -> None:
60
- self._move(-1)
61
-
62
- @self.key_bindings.add("down")
63
- def _down(_event) -> None:
64
- self._move(1)
65
-
66
- @self.key_bindings.add("enter")
67
- def _enter(_event) -> None:
68
- if self._playback_is_running():
69
- self._stop_playback()
70
- return
71
- self._finish(self.choices[self.selected_index])
72
-
73
- @self.key_bindings.add("escape")
74
- @self.key_bindings.add("c-c")
75
- def _cancel(_event) -> None:
76
- self._cancel()
59
+ self.key_bindings.add("up")(self._handle_up)
60
+ self.key_bindings.add("down")(self._handle_down)
61
+ self.key_bindings.add("enter")(self._handle_enter)
62
+ self.key_bindings.add("escape")(self._handle_cancel)
63
+ self.key_bindings.add("c-c")(self._handle_cancel)
77
64
 
78
65
  def _bind_shortcut_keys(self) -> None:
79
66
  bound_shortcuts: set[str] = set()
@@ -81,7 +68,28 @@ class InteractiveMenu:
81
68
  if choice.shortcut is None or choice.shortcut in bound_shortcuts:
82
69
  continue
83
70
  bound_shortcuts.add(choice.shortcut)
84
- self.key_bindings.add(choice.shortcut)(lambda _event, selected=choice: self._finish(selected))
71
+ self.key_bindings.add(choice.shortcut)(self._shortcut_handler(choice))
72
+
73
+ def _handle_up(self, _event: object) -> None:
74
+ self._move(-1)
75
+
76
+ def _handle_down(self, _event: object) -> None:
77
+ self._move(1)
78
+
79
+ def _handle_enter(self, _event: object) -> None:
80
+ if self._playback_is_running():
81
+ self._stop_playback()
82
+ return
83
+ self._finish(self.choices[self.selected_index])
84
+
85
+ def _handle_cancel(self, _event: object) -> None:
86
+ self._cancel()
87
+
88
+ def _shortcut_handler(self, choice: MenuChoice) -> Callable[[object], None]:
89
+ def handle_shortcut(_event: object) -> None:
90
+ self._finish(choice)
91
+
92
+ return handle_shortcut
85
93
 
86
94
  def _finish(self, choice: MenuChoice) -> None:
87
95
  if choice.playback_path is not None:
@@ -17,6 +17,7 @@ from fly_on_the_wall.watch import (
17
17
  list_watch_folders,
18
18
  remove_watch_folder,
19
19
  scan_watch_folders,
20
+ set_watch_folder_delete_originals_after_import,
20
21
  set_watch_folder_enabled,
21
22
  )
22
23
 
@@ -77,17 +78,26 @@ def watch_run(
77
78
  def watch_folders_add(
78
79
  path: Annotated[Path, typer.Argument(file_okay=False, dir_okay=True)],
79
80
  name: Annotated[str | None, typer.Option("--name", "-n", help="Optional folder name.")] = None,
81
+ delete_originals_after_import: Annotated[
82
+ bool,
83
+ typer.Option(
84
+ "--delete-originals-after-import",
85
+ help="Delete source audio files after this watch folder imports them successfully.",
86
+ ),
87
+ ] = False,
80
88
  ) -> None:
81
89
  """Add a folder to scan for audio files."""
82
90
  with database() as connection:
83
91
  try:
84
- folder = add_watch_folder(connection, path, name)
92
+ folder = add_watch_folder(connection, path, name, delete_originals_after_import)
85
93
  except Exception as exc:
86
94
  console.print(str(exc))
87
95
  raise typer.Exit(code=1) from exc
88
96
  console.print(f"Added watch folder {folder.path}")
89
97
  if folder.name:
90
98
  console.print(f"Name: {folder.name}")
99
+ if folder.delete_originals_after_import:
100
+ console.print("Original audio files will be deleted after successful import.")
91
101
 
92
102
 
93
103
  @watch_folders_app.command("list")
@@ -102,12 +112,14 @@ def watch_folders_list() -> None:
102
112
  table.add_column("ID")
103
113
  table.add_column("Name")
104
114
  table.add_column("Enabled")
115
+ table.add_column("Delete Originals")
105
116
  table.add_column("Path")
106
117
  for folder in folders:
107
118
  table.add_row(
108
119
  folder.id,
109
120
  folder.name or "",
110
121
  "yes" if folder.enabled else "no",
122
+ "yes" if folder.delete_originals_after_import else "no",
111
123
  str(folder.path),
112
124
  )
113
125
  console.print(table)
@@ -136,6 +148,27 @@ def watch_folders_disable(identifier: str) -> None:
136
148
  _set_watch_folder_enabled_command(identifier, False)
137
149
 
138
150
 
151
+ @watch_folders_app.command("delete-originals-after-import")
152
+ def watch_folders_delete_originals_after_import(
153
+ identifier: str,
154
+ enabled: Annotated[
155
+ bool,
156
+ typer.Option(
157
+ "--enabled/--disabled",
158
+ help="Whether this folder deletes source audio files after successful import.",
159
+ ),
160
+ ],
161
+ ) -> None:
162
+ """Configure original audio deletion after import for a watched folder."""
163
+ with database() as connection:
164
+ folder = set_watch_folder_delete_originals_after_import(connection, identifier, enabled)
165
+ if folder is None:
166
+ console.print(f"Watch folder not found: {identifier}")
167
+ raise typer.Exit(code=1)
168
+ state = "enabled" if enabled else "disabled"
169
+ console.print(f"Delete originals after import {state} for {folder.path}")
170
+
171
+
139
172
  def _watch_run_once(config, stable_age_seconds: int, interval_seconds: int) -> None:
140
173
  existing_paths = _existing_watch_paths()
141
174
  if not existing_paths:
@@ -146,7 +179,9 @@ def _watch_run_once(config, stable_age_seconds: int, interval_seconds: int) -> N
146
179
 
147
180
  changes = _watch_for_changes(existing_paths, interval_seconds)
148
181
  if changes is None:
182
+ console.print("Watch backend unavailable. Running safety scan before retry delay.")
149
183
  _scan_watch_once(config, stable_age_seconds)
184
+ sleep(interval_seconds)
150
185
  return
151
186
 
152
187
  _print_watch_changes(changes)
@@ -192,11 +227,12 @@ def _scan_watch_once(config, stable_age_seconds: int) -> None:
192
227
  stable_age_seconds=stable_age_seconds,
193
228
  progress=lambda message: console.print(f"-> {message}"),
194
229
  )
195
- console.print(
230
+ message = (
196
231
  f"Watch scan complete: {result.processed} processed, "
197
232
  f"{result.ignored} ignored, {result.skipped} skipped, "
198
233
  f"{result.failed} failed, {result.seen} seen."
199
234
  )
235
+ console.print(message)
200
236
 
201
237
 
202
238
  def _set_watch_folder_enabled_command(identifier: str, enabled: bool) -> None:
@@ -14,11 +14,6 @@ GLOSSARY_FILE_NAME = "glossary.yaml"
14
14
  ProviderName = Literal["elevenlabs", "openai"]
15
15
  CleanupMode = Literal["off", "deterministic", "light"]
16
16
 
17
- API_KEY_ENV_VARS: dict[str, str] = {
18
- "elevenlabs": "ELEVENLABS_API_KEY",
19
- "openai": "OPENAI_API_KEY",
20
- }
21
-
22
17
 
23
18
  class ConfigError(RuntimeError):
24
19
  """Raised when the application config cannot be loaded."""
@@ -2,13 +2,13 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import sqlite3
5
- from collections.abc import Iterator
5
+ from collections.abc import Generator
6
6
  from contextlib import contextmanager
7
7
  from pathlib import Path
8
8
 
9
9
  from fly_on_the_wall.storage import ensure_storage_layout, storage_paths
10
10
 
11
- SCHEMA_VERSION = 16
11
+ SCHEMA_VERSION = 17
12
12
 
13
13
  SCHEMA_STATEMENTS = (
14
14
  """
@@ -218,6 +218,7 @@ SCHEMA_STATEMENTS = (
218
218
  name TEXT UNIQUE,
219
219
  path TEXT NOT NULL UNIQUE,
220
220
  enabled INTEGER NOT NULL DEFAULT 1,
221
+ delete_originals_after_import INTEGER NOT NULL DEFAULT 0,
221
222
  created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
222
223
  updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
223
224
  )
@@ -424,6 +425,12 @@ def initialize_database(connection: sqlite3.Connection) -> None:
424
425
  _ensure_column(connection, "meetings", "title_source", "TEXT NOT NULL DEFAULT 'manual'")
425
426
  _ensure_column(connection, "meetings", "generated_title", "TEXT")
426
427
  _ensure_column(connection, "people", "is_user", "INTEGER NOT NULL DEFAULT 0")
428
+ _ensure_column(
429
+ connection,
430
+ "watch_folders",
431
+ "delete_originals_after_import",
432
+ "INTEGER NOT NULL DEFAULT 0",
433
+ )
427
434
  connection.execute(
428
435
  """
429
436
  CREATE UNIQUE INDEX IF NOT EXISTS idx_meetings_audio_sha256
@@ -499,7 +506,7 @@ def bootstrap_database(database_path: Path | None = None) -> Path:
499
506
 
500
507
 
501
508
  @contextmanager
502
- def database(database_path: Path | None = None) -> Iterator[sqlite3.Connection]:
509
+ def database(database_path: Path | None = None) -> Generator[sqlite3.Connection]:
503
510
  connection = connect(database_path)
504
511
  try:
505
512
  initialize_database(connection)
@@ -2,10 +2,11 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import math
5
+ from collections.abc import Iterable
5
6
  from dataclasses import dataclass
6
7
  from pathlib import Path
7
8
  from sqlite3 import Connection
8
- from typing import Protocol
9
+ from typing import Protocol, runtime_checkable
9
10
  from uuid import uuid4
10
11
 
11
12
  from fly_on_the_wall.storage import StoragePaths, storage_paths
@@ -19,6 +20,11 @@ class EmbeddingBackend(Protocol):
19
20
  def embed(self, audio_path: Path) -> list[float]: ...
20
21
 
21
22
 
23
+ @runtime_checkable
24
+ class SupportsToList(Protocol):
25
+ def tolist(self) -> object: ...
26
+
27
+
22
28
  @dataclass(frozen=True)
23
29
  class CachedEmbedding:
24
30
  model_name: str
@@ -36,13 +42,28 @@ class PyannoteEmbeddingBackend:
36
42
  raise RuntimeError("pyannote.audio is required for local speaker embeddings.") from exc
37
43
 
38
44
  model = Model.from_pretrained(self.model_name)
45
+ if model is None:
46
+ raise RuntimeError(f"Could not load embedding model: {self.model_name}")
39
47
  self._inference = Inference(model, window="whole")
40
48
 
41
49
  def embed(self, audio_path: Path) -> list[float]:
42
- embedding = self._inference(str(audio_path))
43
- if hasattr(embedding, "tolist"):
44
- return [float(value) for value in embedding.tolist()]
45
- return [float(value) for value in embedding]
50
+ return _embedding_to_vector(self._inference(str(audio_path)))
51
+
52
+
53
+ def _embedding_to_vector(embedding: object) -> list[float]:
54
+ values = embedding.tolist() if isinstance(embedding, SupportsToList) else embedding
55
+ if isinstance(values, str | bytes | bytearray) or not isinstance(values, Iterable):
56
+ raise RuntimeError("Embedding backend returned an unsupported shape.")
57
+ try:
58
+ return [_embedding_value_to_float(value) for value in values]
59
+ except (TypeError, ValueError) as exc:
60
+ raise RuntimeError("Embedding backend returned non-numeric values.") from exc
61
+
62
+
63
+ def _embedding_value_to_float(value: object) -> float:
64
+ if not isinstance(value, int | float | str | bytes | bytearray):
65
+ raise TypeError(f"Unsupported embedding value: {type(value).__name__}")
66
+ return float(value)
46
67
 
47
68
 
48
69
  def cache_voice_sample_embedding(
@@ -5,6 +5,7 @@ from collections.abc import Callable
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
7
  from sqlite3 import Connection
8
+ from typing import Any
8
9
 
9
10
  from fly_on_the_wall.cache import read_cached_text, text_sha256, write_cached_text
10
11
  from fly_on_the_wall.cleanup import deterministic_cleanup
@@ -223,12 +224,8 @@ def _cleanup_transcript(context: RefreshContext, deterministic_transcript: str)
223
224
  deterministic_transcript,
224
225
  glossary_terms=glossary_terms,
225
226
  meeting_context=context.description,
226
- usage_callback=lambda response: record_openai_usage(
227
- context.connection,
228
- meeting_id=context.meeting.id,
229
- model=DEFAULT_CLEANUP_MODEL,
230
- service="cleanup",
231
- response=response,
227
+ usage_callback=lambda response: _record_openai_usage(
228
+ context, DEFAULT_CLEANUP_MODEL, "cleanup", response
232
229
  ),
233
230
  )
234
231
  write_cached_text(cleanup_cache_dir, cleanup_cache_key, cleaned_transcript)
@@ -278,12 +275,8 @@ def _suggest_and_apply_title(
278
275
  analysis,
279
276
  meeting_context=context.description,
280
277
  options=OpenAIRequestOptions(
281
- usage_callback=lambda response: record_openai_usage(
282
- context.connection,
283
- meeting_id=context.meeting.id,
284
- model=DEFAULT_ANALYSIS_MODEL,
285
- service="title",
286
- response=response,
278
+ usage_callback=lambda response: _record_openai_usage(
279
+ context, DEFAULT_ANALYSIS_MODEL, "title", response
287
280
  )
288
281
  ),
289
282
  ),
@@ -388,6 +381,16 @@ def _format_elapsed(seconds: float) -> str:
388
381
  return _format_duration(seconds)
389
382
 
390
383
 
384
+ def _record_openai_usage(context: RefreshContext, model: str, service: str, response: dict[str, Any]) -> None:
385
+ record_openai_usage(
386
+ context.connection,
387
+ meeting_id=context.meeting.id,
388
+ model=model,
389
+ service=service,
390
+ response=response,
391
+ )
392
+
393
+
391
394
  def _analyze_transcript(
392
395
  context: RefreshContext,
393
396
  transcript: str,
@@ -409,12 +412,8 @@ def _analyze_transcript(
409
412
  transcript,
410
413
  meeting_context=context.description,
411
414
  options=OpenAIRequestOptions(
412
- usage_callback=lambda response: record_openai_usage(
413
- context.connection,
414
- meeting_id=context.meeting.id,
415
- model=DEFAULT_ANALYSIS_MODEL,
416
- service="analysis",
417
- response=response,
415
+ usage_callback=lambda response: _record_openai_usage(
416
+ context, DEFAULT_ANALYSIS_MODEL, "analysis", response
418
417
  )
419
418
  ),
420
419
  ),
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from collections.abc import Callable
4
4
  from dataclasses import dataclass, field
5
+ from typing import Any
5
6
 
6
7
  import httpx
7
8
 
@@ -20,7 +21,7 @@ class OpenAIRequestOptions:
20
21
  model: str = DEFAULT_ANALYSIS_MODEL
21
22
  api_key: str | None = None
22
23
  client: httpx.Client | None = None
23
- usage_callback: Callable[[dict], None] | None = None
24
+ usage_callback: Callable[[dict[str, Any]], None] | None = None
24
25
 
25
26
 
26
27
  @dataclass(frozen=True)
@@ -98,7 +99,7 @@ def _close_client(client: httpx.Client, close_client: bool) -> None:
98
99
  client.close()
99
100
 
100
101
 
101
- def _send_chat_completion(client: httpx.Client, api_key: str, request: ChatCompletionRequest) -> dict:
102
+ def _send_chat_completion(client: httpx.Client, api_key: str, request: ChatCompletionRequest) -> dict[str, Any]:
102
103
  response = client.post(
103
104
  API_URL,
104
105
  headers={"Authorization": f"Bearer {api_key}"},
@@ -115,7 +116,7 @@ def _send_chat_completion(client: httpx.Client, api_key: str, request: ChatCompl
115
116
  return response.json()
116
117
 
117
118
 
118
- def _record_usage(options: OpenAIRequestOptions, response_json: dict) -> None:
119
+ def _record_usage(options: OpenAIRequestOptions, response_json: dict[str, Any]) -> None:
119
120
  if options.usage_callback is not None:
120
121
  options.usage_callback(response_json)
121
122
 
@@ -180,7 +181,7 @@ Meeting context: {context}
180
181
  """.strip()
181
182
 
182
183
 
183
- def _extract_content(response: dict) -> str:
184
+ def _extract_content(response: dict[str, Any]) -> str:
184
185
  try:
185
186
  content = response["choices"][0]["message"]["content"]
186
187
  except (KeyError, IndexError, TypeError) as exc:
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections.abc import Callable
4
+ from typing import Any
4
5
 
5
6
  import httpx
6
7
 
@@ -23,7 +24,7 @@ def cleanup_transcript(
23
24
  model: str = DEFAULT_MODEL,
24
25
  api_key: str | None = None,
25
26
  client: httpx.Client | None = None,
26
- usage_callback: Callable[[dict], None] | None = None,
27
+ usage_callback: Callable[[dict[str, Any]], None] | None = None,
27
28
  ) -> str:
28
29
  resolved_api_key = api_key or get_api_key("openai")
29
30
  if not resolved_api_key:
@@ -83,7 +84,7 @@ Glossary terms: {glossary}
83
84
  """.strip()
84
85
 
85
86
 
86
- def _extract_content(response: dict) -> str:
87
+ def _extract_content(response: dict[str, Any]) -> str:
87
88
  try:
88
89
  content = response["choices"][0]["message"]["content"]
89
90
  except (KeyError, IndexError, TypeError) as exc:
File without changes
@@ -5,9 +5,9 @@ from dataclasses import dataclass
5
5
  from typing import Literal
6
6
 
7
7
  import keyring
8
- from keyring.errors import KeyringError
8
+ from keyring.errors import KeyringError, PasswordDeleteError
9
9
 
10
- from fly_on_the_wall.config import API_KEY_ENV_VARS
10
+ from fly_on_the_wall.api_keys import API_KEY_ENV_VARS
11
11
 
12
12
  KEYRING_SERVICE = "fly-on-the-wall"
13
13
  SecretSource = Literal["env", "keyring", "missing", "unknown"]
@@ -59,7 +59,7 @@ def remove_api_key(provider: str) -> None:
59
59
  normalized = _require_known_provider(provider)
60
60
  try:
61
61
  keyring.delete_password(KEYRING_SERVICE, normalized)
62
- except keyring.errors.PasswordDeleteError:
62
+ except PasswordDeleteError:
63
63
  return
64
64
  except KeyringError as exc:
65
65
  raise SecretError(f"Could not remove {normalized} API key from OS keyring: {exc}") from exc
@@ -183,7 +183,8 @@ def _print_watch_folders(console: Console, folders: list) -> None:
183
183
  console.print("Watched folders")
184
184
  for folder in folders:
185
185
  state = "enabled" if folder.enabled else "disabled"
186
- console.print(f"- {folder.path} ({state})")
186
+ cleanup = ", deletes originals" if folder.delete_originals_after_import else ""
187
+ console.print(f"- {folder.path} ({state}{cleanup})")
187
188
 
188
189
 
189
190
  def _prompt_watch_folder(console: Console, connection) -> None:
@@ -191,7 +192,8 @@ def _prompt_watch_folder(console: Console, connection) -> None:
191
192
  if not path_text:
192
193
  return
193
194
  name = typer.prompt("Folder name", default="").strip() or None
194
- folder = add_watch_folder(connection, Path(path_text), name)
195
+ delete_originals = typer.confirm("Delete original audio files after successful import?", default=False)
196
+ folder = add_watch_folder(connection, Path(path_text), name, delete_originals)
195
197
  console.print(f"Added watch folder: {folder.path}")
196
198
 
197
199
 
@@ -4,6 +4,7 @@ import json
4
4
  from dataclasses import dataclass
5
5
  from pathlib import Path
6
6
  from sqlite3 import Connection
7
+ from typing import TypedDict
7
8
  from uuid import uuid4
8
9
 
9
10
  from fly_on_the_wall.config import ConfidenceThresholds
@@ -19,6 +20,12 @@ class SpeakerMatch:
19
20
  margin: float | None
20
21
 
21
22
 
23
+ class SpeakerScore(TypedDict):
24
+ person_id: str
25
+ voice_sample_id: str
26
+ score: float
27
+
28
+
22
29
  def match_local_speakers(
23
30
  connection: Connection,
24
31
  provider_run_id: str,
@@ -75,7 +82,7 @@ def match_local_speaker(
75
82
  return SpeakerMatch(local_speaker_id, person_id, status, best["score"], margin)
76
83
 
77
84
 
78
- def _score_people(connection: Connection, local_vector: list[float]) -> list[dict[str, float | str]]:
85
+ def _score_people(connection: Connection, local_vector: list[float]) -> list[SpeakerScore]:
79
86
  rows = connection.execute(
80
87
  """
81
88
  SELECT person_id, id AS voice_sample_id, embedding_path
@@ -83,7 +90,7 @@ def _score_people(connection: Connection, local_vector: list[float]) -> list[dic
83
90
  WHERE embedding_path IS NOT NULL
84
91
  """
85
92
  ).fetchall()
86
- best_by_person: dict[str, dict[str, float | str]] = {}
93
+ best_by_person: dict[str, SpeakerScore] = {}
87
94
  for row in rows:
88
95
  score = cosine_similarity(local_vector, read_embedding(Path(row["embedding_path"])))
89
96
  current = best_by_person.get(row["person_id"])
@@ -27,6 +27,7 @@ class WatchFolder:
27
27
  name: str | None
28
28
  path: Path
29
29
  enabled: bool
30
+ delete_originals_after_import: bool
30
31
 
31
32
 
32
33
  @dataclass(frozen=True)
@@ -45,6 +46,7 @@ class WatchFile:
45
46
  size_bytes: int
46
47
  mtime_ns: int
47
48
  mtime: float
49
+ delete_original_after_import: bool
48
50
 
49
51
 
50
52
  @dataclass(frozen=True)
@@ -66,18 +68,23 @@ class WatchScanContext:
66
68
  progress: ProgressFn | None
67
69
 
68
70
 
69
- def add_watch_folder(connection: Connection, path: Path, name: str | None = None) -> WatchFolder:
71
+ def add_watch_folder(
72
+ connection: Connection,
73
+ path: Path,
74
+ name: str | None = None,
75
+ delete_originals_after_import: bool = False,
76
+ ) -> WatchFolder:
70
77
  resolved_path = _resolve_folder_path(path)
71
78
  folder_id = str(uuid4())
72
79
  with connection:
73
80
  connection.execute(
74
81
  """
75
- INSERT INTO watch_folders(id, name, path, enabled)
76
- VALUES (?, ?, ?, 1)
82
+ INSERT INTO watch_folders(id, name, path, enabled, delete_originals_after_import)
83
+ VALUES (?, ?, ?, 1, ?)
77
84
  """,
78
- (folder_id, name, str(resolved_path)),
85
+ (folder_id, name, str(resolved_path), 1 if delete_originals_after_import else 0),
79
86
  )
80
- return WatchFolder(folder_id, name, resolved_path, True)
87
+ return WatchFolder(folder_id, name, resolved_path, True, delete_originals_after_import)
81
88
 
82
89
 
83
90
  def list_watch_folders(connection: Connection) -> list[WatchFolder]:
@@ -85,7 +92,7 @@ def list_watch_folders(connection: Connection) -> list[WatchFolder]:
85
92
  _watch_folder_from_row(row)
86
93
  for row in connection.execute(
87
94
  """
88
- SELECT id, name, path, enabled
95
+ SELECT id, name, path, enabled, delete_originals_after_import
89
96
  FROM watch_folders
90
97
  ORDER BY created_at, path
91
98
  """
@@ -98,7 +105,7 @@ def get_watch_folder(connection: Connection, identifier: str) -> WatchFolder | N
98
105
  resolved_identifier_path = str(Path(identifier).expanduser().resolve())
99
106
  row = connection.execute(
100
107
  """
101
- SELECT id, name, path, enabled
108
+ SELECT id, name, path, enabled, delete_originals_after_import
102
109
  FROM watch_folders
103
110
  WHERE id = ? OR name = ? OR path = ? OR path = ?
104
111
  """,
@@ -129,7 +136,27 @@ def set_watch_folder_enabled(connection: Connection, identifier: str, enabled: b
129
136
  """,
130
137
  (1 if enabled else 0, folder.id),
131
138
  )
132
- return WatchFolder(folder.id, folder.name, folder.path, enabled)
139
+ return WatchFolder(folder.id, folder.name, folder.path, enabled, folder.delete_originals_after_import)
140
+
141
+
142
+ def set_watch_folder_delete_originals_after_import(
143
+ connection: Connection,
144
+ identifier: str,
145
+ delete_originals_after_import: bool,
146
+ ) -> WatchFolder | None:
147
+ folder = get_watch_folder(connection, identifier)
148
+ if folder is None:
149
+ return None
150
+ with connection:
151
+ connection.execute(
152
+ """
153
+ UPDATE watch_folders
154
+ SET delete_originals_after_import = ?, updated_at = CURRENT_TIMESTAMP
155
+ WHERE id = ?
156
+ """,
157
+ (1 if delete_originals_after_import else 0, folder.id),
158
+ )
159
+ return WatchFolder(folder.id, folder.name, folder.path, folder.enabled, delete_originals_after_import)
133
160
 
134
161
 
135
162
  def scan_watch_folders(
@@ -152,7 +179,7 @@ def scan_watch_folders(
152
179
 
153
180
  for audio_path in _audio_files(folder.path):
154
181
  seen += 1
155
- result = _scan_audio_file(context, _watch_file(folder.id, audio_path))
182
+ result = _scan_audio_file(context, _watch_file(folder, audio_path))
156
183
  processed += result.processed
157
184
  ignored += result.ignored
158
185
  skipped += result.skipped
@@ -171,6 +198,7 @@ def _watch_folder_from_row(row) -> WatchFolder:
171
198
  name=row["name"],
172
199
  path=Path(row["path"]),
173
200
  enabled=bool(row["enabled"]),
201
+ delete_originals_after_import=bool(row["delete_originals_after_import"]),
174
202
  )
175
203
 
176
204
 
@@ -187,9 +215,11 @@ def _audio_files(folder: Path) -> Iterable[Path]:
187
215
  yield path
188
216
 
189
217
 
190
- def _watch_file(folder_id: str, path: Path) -> WatchFile:
218
+ def _watch_file(folder: WatchFolder, path: Path) -> WatchFile:
191
219
  stat = path.stat()
192
- return WatchFile(folder_id, path, stat.st_size, stat.st_mtime_ns, stat.st_mtime)
220
+ return WatchFile(
221
+ folder.id, path, stat.st_size, stat.st_mtime_ns, stat.st_mtime, folder.delete_originals_after_import
222
+ )
193
223
 
194
224
 
195
225
  def _scan_audio_file(
@@ -234,9 +264,25 @@ def _process_audio_file(
234
264
  return WatchFileResult(failed=1)
235
265
 
236
266
  _mark_item_done(context.connection, item.path, result.meeting.id)
267
+ if item.delete_original_after_import:
268
+ _delete_original_audio_file(item.path, result.meeting.imported_audio_path, context.progress)
237
269
  return WatchFileResult(processed=1)
238
270
 
239
271
 
272
+ def _delete_original_audio_file(original_path: Path, imported_audio_path: Path, progress: ProgressFn | None) -> None:
273
+ if original_path.resolve() == imported_audio_path.resolve():
274
+ _report(progress, f"Keeping original because it is the imported audio file {original_path}")
275
+ return
276
+ try:
277
+ original_path.unlink()
278
+ except FileNotFoundError:
279
+ return
280
+ except OSError as exc:
281
+ _report(progress, f"Could not delete original audio file {original_path}: {exc}")
282
+ return
283
+ _report(progress, f"Deleted original audio file {original_path}")
284
+
285
+
240
286
  def _upsert_seen_item(connection: Connection, item: WatchFile) -> None:
241
287
  existing = _watch_item(connection, item.path)
242
288
  with connection:
File without changes
File without changes