@they-juanreina/compost-cli 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/dist/commands/agreement.d.ts +3 -0
  2. package/dist/commands/agreement.d.ts.map +1 -0
  3. package/dist/commands/agreement.js +35 -0
  4. package/dist/commands/agreement.js.map +1 -0
  5. package/dist/commands/create.d.ts +1 -0
  6. package/dist/commands/create.d.ts.map +1 -1
  7. package/dist/commands/create.js +39 -1
  8. package/dist/commands/create.js.map +1 -1
  9. package/dist/commands/export.d.ts.map +1 -1
  10. package/dist/commands/export.js +47 -4
  11. package/dist/commands/export.js.map +1 -1
  12. package/dist/commands/import.d.ts +3 -0
  13. package/dist/commands/import.d.ts.map +1 -0
  14. package/dist/commands/import.js +59 -0
  15. package/dist/commands/import.js.map +1 -0
  16. package/dist/commands/init.d.ts.map +1 -1
  17. package/dist/commands/init.js +1 -0
  18. package/dist/commands/init.js.map +1 -1
  19. package/dist/commands/jobs.d.ts +3 -0
  20. package/dist/commands/jobs.d.ts.map +1 -0
  21. package/dist/commands/jobs.js +105 -0
  22. package/dist/commands/jobs.js.map +1 -0
  23. package/dist/commands/label.d.ts +3 -0
  24. package/dist/commands/label.d.ts.map +1 -0
  25. package/dist/commands/label.js +67 -0
  26. package/dist/commands/label.js.map +1 -0
  27. package/dist/commands/models.d.ts.map +1 -1
  28. package/dist/commands/models.js +2 -1
  29. package/dist/commands/models.js.map +1 -1
  30. package/dist/commands/recode.d.ts +3 -0
  31. package/dist/commands/recode.d.ts.map +1 -0
  32. package/dist/commands/recode.js +60 -0
  33. package/dist/commands/recode.js.map +1 -0
  34. package/dist/commands/reindex.d.ts.map +1 -1
  35. package/dist/commands/reindex.js +6 -4
  36. package/dist/commands/reindex.js.map +1 -1
  37. package/dist/commands/rerun.d.ts +3 -0
  38. package/dist/commands/rerun.d.ts.map +1 -0
  39. package/dist/commands/rerun.js +91 -0
  40. package/dist/commands/rerun.js.map +1 -0
  41. package/dist/commands/search.d.ts.map +1 -1
  42. package/dist/commands/search.js +2 -1
  43. package/dist/commands/search.js.map +1 -1
  44. package/dist/commands/secrets.d.ts +3 -0
  45. package/dist/commands/secrets.d.ts.map +1 -0
  46. package/dist/commands/secrets.js +143 -0
  47. package/dist/commands/secrets.js.map +1 -0
  48. package/dist/commands/setup.d.ts.map +1 -1
  49. package/dist/commands/setup.js +90 -1
  50. package/dist/commands/setup.js.map +1 -1
  51. package/dist/commands/status.d.ts.map +1 -1
  52. package/dist/commands/status.js +2 -1
  53. package/dist/commands/status.js.map +1 -1
  54. package/dist/commands/transcribe.d.ts.map +1 -1
  55. package/dist/commands/transcribe.js +13 -2
  56. package/dist/commands/transcribe.js.map +1 -1
  57. package/dist/commands/validate.d.ts.map +1 -1
  58. package/dist/commands/validate.js +29 -1
  59. package/dist/commands/validate.js.map +1 -1
  60. package/dist/engine.d.ts +23 -0
  61. package/dist/engine.d.ts.map +1 -0
  62. package/dist/engine.js +32 -0
  63. package/dist/engine.js.map +1 -0
  64. package/dist/exporters/prov.d.ts +11 -0
  65. package/dist/exporters/prov.d.ts.map +1 -0
  66. package/dist/exporters/prov.js +151 -0
  67. package/dist/exporters/prov.js.map +1 -0
  68. package/dist/index.d.ts.map +1 -1
  69. package/dist/index.js +6 -0
  70. package/dist/index.js.map +1 -1
  71. package/dist/lib/agreement.d.ts +77 -0
  72. package/dist/lib/agreement.d.ts.map +1 -0
  73. package/dist/lib/agreement.js +261 -0
  74. package/dist/lib/agreement.js.map +1 -0
  75. package/dist/lib/artifacts.d.ts +32 -1
  76. package/dist/lib/artifacts.d.ts.map +1 -1
  77. package/dist/lib/artifacts.js +156 -22
  78. package/dist/lib/artifacts.js.map +1 -1
  79. package/dist/lib/blame.d.ts.map +1 -1
  80. package/dist/lib/blame.js +3 -2
  81. package/dist/lib/blame.js.map +1 -1
  82. package/dist/lib/config.d.ts +3 -0
  83. package/dist/lib/config.d.ts.map +1 -1
  84. package/dist/lib/config.js.map +1 -1
  85. package/dist/lib/doctor.d.ts +3 -0
  86. package/dist/lib/doctor.d.ts.map +1 -1
  87. package/dist/lib/doctor.js +24 -1
  88. package/dist/lib/doctor.js.map +1 -1
  89. package/dist/lib/events.d.ts +34 -1
  90. package/dist/lib/events.d.ts.map +1 -1
  91. package/dist/lib/events.js +35 -1
  92. package/dist/lib/events.js.map +1 -1
  93. package/dist/lib/importTranscript.d.ts +16 -0
  94. package/dist/lib/importTranscript.d.ts.map +1 -0
  95. package/dist/lib/importTranscript.js +94 -0
  96. package/dist/lib/importTranscript.js.map +1 -0
  97. package/dist/lib/ingest.d.ts.map +1 -1
  98. package/dist/lib/ingest.js +12 -6
  99. package/dist/lib/ingest.js.map +1 -1
  100. package/dist/lib/journal.d.ts +13 -0
  101. package/dist/lib/journal.d.ts.map +1 -1
  102. package/dist/lib/journal.js +58 -2
  103. package/dist/lib/journal.js.map +1 -1
  104. package/dist/lib/legacyNative.d.ts +24 -0
  105. package/dist/lib/legacyNative.d.ts.map +1 -0
  106. package/dist/lib/legacyNative.js +51 -0
  107. package/dist/lib/legacyNative.js.map +1 -0
  108. package/dist/lib/migrate.d.ts.map +1 -1
  109. package/dist/lib/migrate.js +1 -0
  110. package/dist/lib/migrate.js.map +1 -1
  111. package/dist/lib/nativeRuntime.d.ts +6 -3
  112. package/dist/lib/nativeRuntime.d.ts.map +1 -1
  113. package/dist/lib/nativeRuntime.js +6 -3
  114. package/dist/lib/nativeRuntime.js.map +1 -1
  115. package/dist/lib/provisionNative.js +1 -1
  116. package/dist/lib/provisionNative.js.map +1 -1
  117. package/dist/lib/queue.d.ts +25 -0
  118. package/dist/lib/queue.d.ts.map +1 -1
  119. package/dist/lib/queue.js +70 -3
  120. package/dist/lib/queue.js.map +1 -1
  121. package/dist/lib/reads.d.ts +24 -0
  122. package/dist/lib/reads.d.ts.map +1 -0
  123. package/dist/lib/reads.js +115 -0
  124. package/dist/lib/reads.js.map +1 -0
  125. package/dist/lib/recode.d.ts +19 -0
  126. package/dist/lib/recode.d.ts.map +1 -0
  127. package/dist/lib/recode.js +43 -0
  128. package/dist/lib/recode.js.map +1 -0
  129. package/dist/lib/rerun.d.ts +51 -0
  130. package/dist/lib/rerun.d.ts.map +1 -0
  131. package/dist/lib/rerun.js +166 -0
  132. package/dist/lib/rerun.js.map +1 -0
  133. package/dist/lib/retrieve.d.ts +8 -4
  134. package/dist/lib/retrieve.d.ts.map +1 -1
  135. package/dist/lib/retrieve.js +12 -10
  136. package/dist/lib/retrieve.js.map +1 -1
  137. package/dist/lib/schemas.generated.d.ts.map +1 -1
  138. package/dist/lib/schemas.generated.js +28 -0
  139. package/dist/lib/schemas.generated.js.map +1 -1
  140. package/dist/lib/secrets.d.ts +158 -0
  141. package/dist/lib/secrets.d.ts.map +1 -0
  142. package/dist/lib/secrets.js +507 -0
  143. package/dist/lib/secrets.js.map +1 -0
  144. package/dist/lib/seed.d.ts +5 -0
  145. package/dist/lib/seed.d.ts.map +1 -1
  146. package/dist/lib/seed.js +15 -2
  147. package/dist/lib/seed.js.map +1 -1
  148. package/dist/lib/seedResolve.d.ts.map +1 -1
  149. package/dist/lib/seedResolve.js +1 -0
  150. package/dist/lib/seedResolve.js.map +1 -1
  151. package/dist/lib/session.d.ts +14 -0
  152. package/dist/lib/session.d.ts.map +1 -1
  153. package/dist/lib/session.js +47 -0
  154. package/dist/lib/session.js.map +1 -1
  155. package/dist/lib/setup.d.ts +5 -0
  156. package/dist/lib/setup.d.ts.map +1 -1
  157. package/dist/lib/setup.js +78 -14
  158. package/dist/lib/setup.js.map +1 -1
  159. package/dist/lib/setupWizard.d.ts +51 -0
  160. package/dist/lib/setupWizard.d.ts.map +1 -0
  161. package/dist/lib/setupWizard.js +223 -0
  162. package/dist/lib/setupWizard.js.map +1 -0
  163. package/dist/lib/snap.d.ts.map +1 -1
  164. package/dist/lib/snap.js +2 -5
  165. package/dist/lib/snap.js.map +1 -1
  166. package/dist/lib/speakers.d.ts +41 -0
  167. package/dist/lib/speakers.d.ts.map +1 -0
  168. package/dist/lib/speakers.js +78 -0
  169. package/dist/lib/speakers.js.map +1 -0
  170. package/dist/lib/status.d.ts.map +1 -1
  171. package/dist/lib/status.js +21 -0
  172. package/dist/lib/status.js.map +1 -1
  173. package/dist/lib/userConfig.d.ts +22 -0
  174. package/dist/lib/userConfig.d.ts.map +1 -0
  175. package/dist/lib/userConfig.js +67 -0
  176. package/dist/lib/userConfig.js.map +1 -0
  177. package/dist/lib/validate.d.ts +18 -0
  178. package/dist/lib/validate.d.ts.map +1 -1
  179. package/dist/lib/validate.js +70 -1
  180. package/dist/lib/validate.js.map +1 -1
  181. package/dist/lib/version.d.ts +30 -0
  182. package/dist/lib/version.d.ts.map +1 -0
  183. package/dist/lib/version.js +73 -0
  184. package/dist/lib/version.js.map +1 -0
  185. package/dist/llm/adapter.d.ts.map +1 -1
  186. package/dist/llm/adapter.js +2 -0
  187. package/dist/llm/adapter.js.map +1 -1
  188. package/dist/llm/providers/ollama.d.ts.map +1 -1
  189. package/dist/llm/providers/ollama.js +6 -0
  190. package/dist/llm/providers/ollama.js.map +1 -1
  191. package/dist/loops/ingest_watcher.d.ts.map +1 -1
  192. package/dist/loops/ingest_watcher.js +6 -3
  193. package/dist/loops/ingest_watcher.js.map +1 -1
  194. package/dist/loops/legacy_worker.d.ts +28 -1
  195. package/dist/loops/legacy_worker.d.ts.map +1 -1
  196. package/dist/loops/legacy_worker.js +81 -9
  197. package/dist/loops/legacy_worker.js.map +1 -1
  198. package/dist/loops/supervisor.d.ts +3 -0
  199. package/dist/loops/supervisor.d.ts.map +1 -1
  200. package/dist/loops/supervisor.js +12 -0
  201. package/dist/loops/supervisor.js.map +1 -1
  202. package/dist/loops/synthesis.d.ts.map +1 -1
  203. package/dist/loops/synthesis.js +15 -0
  204. package/dist/loops/synthesis.js.map +1 -1
  205. package/dist/loops/transcribe_worker.d.ts.map +1 -1
  206. package/dist/loops/transcribe_worker.js +2 -4
  207. package/dist/loops/transcribe_worker.js.map +1 -1
  208. package/dist/output.d.ts +13 -1
  209. package/dist/output.d.ts.map +1 -1
  210. package/dist/output.js +22 -2
  211. package/dist/output.js.map +1 -1
  212. package/dist/render/human.d.ts +20 -0
  213. package/dist/render/human.d.ts.map +1 -0
  214. package/dist/render/human.js +54 -0
  215. package/dist/render/human.js.map +1 -0
  216. package/dist/router.d.ts.map +1 -1
  217. package/dist/router.js +17 -2
  218. package/dist/router.js.map +1 -1
  219. package/package.json +18 -5
  220. package/templates/config.toml +6 -1
  221. package/transcriber/app/__init__.py +3 -0
  222. package/transcriber/app/asr.py +198 -0
  223. package/transcriber/app/asr_parakeet.py +174 -0
  224. package/transcriber/app/cue_parser.py +110 -0
  225. package/transcriber/app/diarization.py +330 -0
  226. package/transcriber/app/frame_annotation.py +77 -0
  227. package/transcriber/app/frames.py +130 -0
  228. package/transcriber/app/health.py +70 -0
  229. package/transcriber/app/legacy.py +355 -0
  230. package/transcriber/app/legacy_cli.py +90 -0
  231. package/transcriber/app/main.py +30 -0
  232. package/transcriber/app/pipeline.py +210 -0
  233. package/transcriber/app/pptx_export.py +42 -0
  234. package/transcriber/app/prosody.py +128 -0
  235. package/transcriber/app/routes/__init__.py +1 -0
  236. package/transcriber/app/routes/legacy.py +117 -0
  237. package/transcriber/app/routes/transcribe.py +133 -0
  238. package/transcriber/app/shot_change.py +74 -0
  239. package/transcriber/app/silence_typer.py +144 -0
  240. package/transcriber/app/transcribe_cli.py +82 -0
  241. package/transcriber/app/vad.py +216 -0
  242. package/transcriber/pyproject.toml +56 -0
@@ -0,0 +1,210 @@
1
+ """Transcription pipeline orchestrator (#v0.1-01).
2
+
3
+ Composes the already-tested deterministic stages into a single transcript.json:
4
+
5
+ duration probe → VAD speech/silences → ASR → diarization align →
6
+ cue parser → silence typer → prosody → final transcript
7
+
8
+ Each stage accepts injectable backends so the route, the worker, and the tests
9
+ all share one orchestration codepath. The route in `routes/transcribe.py`
10
+ provides real backends; tests pass fakes.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import subprocess
17
+ from dataclasses import dataclass
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ from .asr import ASRConfig, Transcriber, WhisperBackend
22
+ from .cue_parser import parse_transcript_cues
23
+ from .diarization import DiarizationBackend, Diarizer, align
24
+ from .prosody import annotate_prosody
25
+ from .silence_typer import type_all_silences
26
+ from .vad import VAD, VADBackend, silences_to_schema, utterance_energies
27
+
28
+ SCHEMA_VERSION = "1.0"
29
+ DEFAULT_TRANSCRIBER_VERSION = "compost-transcriber@0.1.0"
30
+
31
+
32
+ @dataclass
33
+ class PipelineConfig:
34
+ asr: ASRConfig
35
+ transcriber_version: str = DEFAULT_TRANSCRIBER_VERSION
36
+ asr_model_tag: str = "whisper-large-v3-turbo-event-tags"
37
+ diarizer_tag: str = "pyannote-audio@3.3"
38
+ vad_tag: str = "silero-vad@5.0"
39
+
40
+
41
+ @dataclass
42
+ class PipelineBackends:
43
+ """Inject concrete or fake backends. Route wires real ones; tests inject fakes."""
44
+
45
+ vad: VADBackend | None = None
46
+ asr: WhisperBackend | None = None
47
+ diarization: DiarizationBackend | None = None
48
+
49
+
50
+ def probe_duration_ms(source_path: str) -> int:
51
+ """Return the duration of an audio/video file in milliseconds via ffprobe.
52
+
53
+ Falls back to 0 if ffprobe is missing or the file is unreadable; the caller
54
+ can decide whether to error or proceed (silence segmentation against
55
+ duration=0 produces no trailing silence, which is fine).
56
+ """
57
+ try:
58
+ result = subprocess.run(
59
+ [
60
+ "ffprobe",
61
+ "-v",
62
+ "error",
63
+ "-show_entries",
64
+ "format=duration",
65
+ "-of",
66
+ "default=noprint_wrappers=1:nokey=1",
67
+ source_path,
68
+ ],
69
+ capture_output=True,
70
+ text=True,
71
+ timeout=30,
72
+ check=False,
73
+ )
74
+ if result.returncode != 0:
75
+ return 0
76
+ return int(float(result.stdout.strip()) * 1000)
77
+ except (FileNotFoundError, ValueError, subprocess.TimeoutExpired):
78
+ return 0
79
+
80
+
81
+ def _speakers_from_utterances(utterances: list[dict[str, Any]]) -> list[dict[str, Any]]:
82
+ """Distinct speakers seen in the utterances; first speaker tagged as moderator,
83
+ the rest as participants (researcher overrides this in the UI for now).
84
+ """
85
+ seen: dict[str, dict[str, Any]] = {}
86
+ for u in utterances:
87
+ sid = u.get("speaker_id", "S?")
88
+ if sid in seen:
89
+ continue
90
+ seen[sid] = {"id": sid, "name": sid, "type": "participant"}
91
+ # First seen → moderator by convention. Researcher can override post-hoc.
92
+ if seen:
93
+ first = next(iter(seen))
94
+ seen[first]["type"] = "moderator"
95
+ return list(seen.values())
96
+
97
+
98
+ def _detect_language(asr_lang: str | None, configured: str | None) -> str:
99
+ """Prefer ASR-detected, then configured hint, then 'und' (undetermined)."""
100
+ if asr_lang:
101
+ return asr_lang
102
+ if configured:
103
+ return configured
104
+ return "und"
105
+
106
+
107
+ def run_pipeline(
108
+ seed_path: str,
109
+ session_id: str,
110
+ source_path: str,
111
+ config: PipelineConfig,
112
+ backends: PipelineBackends,
113
+ ) -> dict[str, Any]:
114
+ """Run every stage and return the final transcript dict.
115
+
116
+ Side-effect-free except for backends' own model loading. The route writes
117
+ the result to disk separately so this function is testable as pure
118
+ transformation given the backends.
119
+ """
120
+ if not Path(source_path).exists():
121
+ raise FileNotFoundError(f"source not found: {source_path}")
122
+
123
+ duration_ms = probe_duration_ms(source_path)
124
+
125
+ # 1. VAD — speech segments (carry per-segment RMS energy) + first-class silences
126
+ vad = VAD(backend=backends.vad)
127
+ speech, silences = vad.segment(source_path, duration_ms)
128
+
129
+ # 2. ASR — utterances with word timings, may contain event tags inline
130
+ asr = Transcriber(config=config.asr, backend=backends.asr)
131
+ asr_result = asr.transcribe(source_path)
132
+
133
+ # 3. Initial transcript shell
134
+ transcript: dict[str, Any] = {
135
+ "schema_version": SCHEMA_VERSION,
136
+ "kind": "session",
137
+ "session_id": session_id,
138
+ "source": _relative_source(seed_path, source_path),
139
+ "language": _detect_language(asr_result.language, config.asr.language),
140
+ "duration_ms": duration_ms,
141
+ "modality": _modality(source_path),
142
+ "speakers": [],
143
+ "utterances": asr_result.utterances,
144
+ "silences": silences_to_schema(silences),
145
+ "cues": [],
146
+ "frames": [],
147
+ "glossary_refs": [],
148
+ # frame_capture / frame_annotation are omitted (not null): the schema
149
+ # types provenance fields as strings and the convention is "absent when
150
+ # not applicable". run_pipeline does no frame capture/annotation; those
151
+ # stages (frames.py / frame_annotation.py) add their own provenance when
152
+ # they run.
153
+ "provenance": {
154
+ "transcriber": config.transcriber_version,
155
+ "asr_model": config.asr_model_tag,
156
+ "diarizer": config.diarizer_tag,
157
+ "audio_cues": f"{config.vad_tag} + whisper-events",
158
+ },
159
+ }
160
+
161
+ # 4. Diarization — assign speaker_id per utterance + overlap cues
162
+ diarizer = Diarizer(backend=backends.diarization)
163
+ turns = diarizer.diarize(source_path)
164
+ align(transcript, turns)
165
+
166
+ # 5. Speakers list, derived from the diarized utterances
167
+ transcript["speakers"] = _speakers_from_utterances(transcript["utterances"])
168
+
169
+ # 6. Cue parser — strip [laughter]/[sigh]/etc from utterance text into cues[]
170
+ parse_transcript_cues(transcript)
171
+
172
+ # 7. Silence semantic typing (after_question / thinking / interruption / …)
173
+ type_all_silences(transcript)
174
+
175
+ # 8. Prosody hints per utterance (deterministic, cheap). Volume bucketing
176
+ # needs the per-utterance VAD RMS energy signal mapped from the speech
177
+ # segments; without it volume would default to "normal" for every utterance.
178
+ energies = utterance_energies(speech, transcript["utterances"])
179
+ annotate_prosody(transcript, energies)
180
+
181
+ return transcript
182
+
183
+
184
+ def _relative_source(seed_path: str, source_path: str) -> str:
185
+ """Return a seed-relative path for transcript.source if the source lives
186
+ inside the seed; otherwise return the absolute path unchanged.
187
+ """
188
+ try:
189
+ return str(Path(source_path).relative_to(Path(seed_path).parent))
190
+ except ValueError:
191
+ return source_path
192
+
193
+
194
+ def _modality(source_path: str) -> list[str]:
195
+ """Coarse modality flag from file extension. Video files imply both audio
196
+ and video tracks (the player will only render video if present).
197
+ """
198
+ ext = Path(source_path).suffix.lower()
199
+ if ext in {".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"}:
200
+ return ["audio", "video"]
201
+ return ["audio"]
202
+
203
+
204
+ def write_transcript(seed_path: str, session_id: str, transcript: dict[str, Any]) -> str:
205
+ """Write transcript.json to sessions/<session_id>/. Returns the path."""
206
+ out_dir = Path(seed_path) / "sessions" / session_id
207
+ out_dir.mkdir(parents=True, exist_ok=True)
208
+ out_path = out_dir / "transcript.json"
209
+ out_path.write_text(json.dumps(transcript, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
210
+ return str(out_path)
@@ -0,0 +1,42 @@
1
+ """PPTX deck export (#66).
2
+
3
+ Turns a report deck-spec (built by cli/src/exporters/report.ts → buildDeckSpec)
4
+ into a .pptx: one slide per entry, bullets as body, citations as slide notes.
5
+ Branding (title color) is configurable per seed. python-pptx is lazily imported.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+
13
+ def export_deck(spec: list[dict[str, Any]], out_path: str, branding: dict[str, Any] | None = None) -> str:
14
+ try:
15
+ from pptx import Presentation # type: ignore
16
+ from pptx.util import Pt # type: ignore
17
+ except ImportError as e:
18
+ raise RuntimeError("python-pptx not installed (pip install -e '.[legacy]')") from e
19
+
20
+ branding = branding or {}
21
+ prs = Presentation()
22
+ title_only = prs.slide_layouts[5] # title + content area
23
+
24
+ for slide_spec in spec:
25
+ slide = prs.slides.add_slide(title_only)
26
+ slide.shapes.title.text = slide_spec.get("title", "")
27
+ # bullets in a textbox
28
+ body = slide.placeholders[0] if slide_spec.get("title") is None else None
29
+ tb = slide.shapes.add_textbox(Pt(40), Pt(120), Pt(640), Pt(360)).text_frame
30
+ tb.word_wrap = True
31
+ for i, bullet in enumerate(slide_spec.get("bullets", [])):
32
+ p = tb.paragraphs[0] if i == 0 else tb.add_paragraph()
33
+ p.text = str(bullet)
34
+ # citations → slide notes
35
+ notes = slide_spec.get("notes", "")
36
+ if notes:
37
+ slide.notes_slide.notes_text_frame.text = notes
38
+ _ = body
39
+ _ = branding
40
+
41
+ prs.save(out_path)
42
+ return out_path
@@ -0,0 +1,128 @@
1
+ """Prosody hint extractor (#13).
2
+
3
+ Deterministic per-utterance hints derived from word timings, optional VAD
4
+ energy, and speech rate. No ML model — cheap, reproducible context.
5
+
6
+ Output shape (matches transcript.schema.json #/$defs/prosody):
7
+ {"volume": "low|normal|high", "pace": "slow|normal|fast", "hesitations": int}
8
+
9
+ Thresholds are module constants, documented here for reproducibility:
10
+
11
+ pace (words per second over the utterance span):
12
+ < 2.0 → slow
13
+ > 3.3 → fast
14
+ else → normal
15
+
16
+ volume (mean VAD RMS energy, normalized 0..1; requires the energy signal
17
+ from Silero VAD, issue #9). When energy is unavailable we report "normal"
18
+ rather than guess:
19
+ < 0.33 → low
20
+ > 0.66 → high
21
+ else → normal
22
+
23
+ hesitations = filler tokens + immediate word repetitions + long
24
+ intra-utterance gaps (> 400 ms between consecutive words).
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import re
30
+ from typing import Any
31
+
32
+ PACE_SLOW_WPS = 2.0
33
+ PACE_FAST_WPS = 3.3
34
+ # Volume thresholds applied to the normalized 0..1 energy. These are GLOBAL
35
+ # constants, not per-speaker. The energy signal is session-normalized in
36
+ # vad.utterance_energies(); a soft speaker's loudest moment can still read below
37
+ # a loud speaker's baseline. TODO(#13): per-speaker normalization would make
38
+ # these buckets speaker-relative — see vad.utterance_energies for the plan.
39
+ VOLUME_LOW = 0.33
40
+ VOLUME_HIGH = 0.66
41
+ HESITATION_GAP_MS = 400
42
+
43
+ # Multilingual (es-CO + en) filler set.
44
+ _FILLERS = {
45
+ "uh",
46
+ "um",
47
+ "eh",
48
+ "em",
49
+ "este",
50
+ "esto",
51
+ "mmm",
52
+ "hmm",
53
+ "like",
54
+ "pues",
55
+ }
56
+ _FILLER_PHRASES = ("o sea", "you know", "es decir")
57
+
58
+ _WORD_RE = re.compile(r"[^\W\d_]+", re.UNICODE)
59
+
60
+
61
+ def _pace(text: str, start_ms: int, end_ms: int) -> str:
62
+ duration_s = max((end_ms - start_ms) / 1000.0, 1e-6)
63
+ n_words = len(_WORD_RE.findall(text))
64
+ wps = n_words / duration_s
65
+ if wps < PACE_SLOW_WPS:
66
+ return "slow"
67
+ if wps > PACE_FAST_WPS:
68
+ return "fast"
69
+ return "normal"
70
+
71
+
72
+ def _volume(energy: float | None) -> str:
73
+ if energy is None:
74
+ return "normal"
75
+ if energy < VOLUME_LOW:
76
+ return "low"
77
+ if energy > VOLUME_HIGH:
78
+ return "high"
79
+ return "normal"
80
+
81
+
82
+ def _count_hesitations(text: str, words: list[dict[str, Any]] | None) -> int:
83
+ count = 0
84
+ tokens = [t.lower() for t in _WORD_RE.findall(text)]
85
+
86
+ # filler single tokens
87
+ count += sum(1 for t in tokens if t in _FILLERS)
88
+
89
+ # filler phrases
90
+ lowered = text.lower()
91
+ for phrase in _FILLER_PHRASES:
92
+ count += lowered.count(phrase)
93
+
94
+ # immediate repetitions ("yo yo", "the the")
95
+ for a, b in zip(tokens, tokens[1:], strict=False):
96
+ if a == b and len(a) > 1:
97
+ count += 1
98
+
99
+ # long gaps between consecutive words
100
+ if words:
101
+ for prev, nxt in zip(words, words[1:], strict=False):
102
+ if nxt.get("s", 0) - prev.get("e", 0) > HESITATION_GAP_MS:
103
+ count += 1
104
+
105
+ return count
106
+
107
+
108
+ def extract_prosody(utterance: dict[str, Any], energy: float | None = None) -> dict[str, Any]:
109
+ """Compute {volume, pace, hesitations} for a single utterance dict."""
110
+ text = utterance.get("text", "")
111
+ return {
112
+ "volume": _volume(energy),
113
+ "pace": _pace(text, utterance["start_ms"], utterance["end_ms"]),
114
+ "hesitations": _count_hesitations(text, utterance.get("words")),
115
+ }
116
+
117
+
118
+ def annotate_prosody(
119
+ transcript: dict[str, Any],
120
+ energies: dict[str, float] | None = None,
121
+ ) -> dict[str, Any]:
122
+ """Attach `prosody` to every utterance. `energies` maps utterance id → mean
123
+ VAD RMS energy (0..1) when available. Mutates and returns the transcript.
124
+ """
125
+ energies = energies or {}
126
+ for utt in transcript.get("utterances", []):
127
+ utt["prosody"] = extract_prosody(utt, energies.get(utt.get("id")))
128
+ return transcript
@@ -0,0 +1 @@
1
+ """HTTP route modules. Each subsystem (#9-#15) mounts its own router."""
@@ -0,0 +1,117 @@
1
+ """POST /legacy-ingest — normalize a legacy document into a transcript.json.
2
+
3
+ The Node-side legacy-worker (cli/src/loops/legacy_worker.ts) pulls
4
+ `legacy-ingest` jobs from the queue and POSTs each here. The route dispatches
5
+ by file extension to the pure ingestors in `app/legacy.py`, then writes the
6
+ normalized JSON to `<seed>/legacy/<basename>.json`.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from fastapi import APIRouter, HTTPException, status
16
+ from pydantic import BaseModel, Field
17
+
18
+ from ..legacy import ingest as ingest_legacy
19
+
20
+ router = APIRouter()
21
+
22
+
23
+ class LegacyIngestRequest(BaseModel):
24
+ seed_path: str = Field(..., description="Absolute path to the seed root.")
25
+ source_path: str = Field(..., description="Absolute path to the asset to ingest.")
26
+ # CSV/XLSX column mapping — if text_col is None, the ingestor auto-detects
27
+ # from the header (text → transcript → content → utterance → quote →
28
+ # message → body, then first-column fallback). Node-side workers may also
29
+ # consult a `<source_path>.compost.json` sidecar that takes precedence.
30
+ text_col: str | None = Field(
31
+ None,
32
+ description="Column holding the utterance text (CSV/XLSX). Auto-detected if None.",
33
+ )
34
+ speaker_col: str | None = Field(None, description="Optional column for speaker label.")
35
+ sheet: str | None = Field(None, description="Optional XLSX sheet name (defaults to active).")
36
+
37
+
38
+ class LegacyIngestResponse(BaseModel):
39
+ source_path: str
40
+ normalized_path: str
41
+ utterance_count: int
42
+ status: str # ok | empty | failed
43
+ text_col_resolved: str | None = None # which column was actually used (CSV/XLSX)
44
+ warnings: list[str] = [] # surfaced UX hints (e.g. xlsx un-evaluated formulas)
45
+
46
+
47
+ @router.post(
48
+ "/legacy-ingest",
49
+ response_model=LegacyIngestResponse,
50
+ status_code=status.HTTP_200_OK,
51
+ summary="Normalize a PDF/DOCX/PPTX/CSV/XLSX/TXT/MD into a transcript-shaped JSON.",
52
+ )
53
+ def post_legacy_ingest(req: LegacyIngestRequest) -> LegacyIngestResponse:
54
+ src = Path(req.source_path)
55
+ seed = Path(req.seed_path)
56
+ if not src.exists():
57
+ raise HTTPException(
58
+ status_code=status.HTTP_404_NOT_FOUND,
59
+ detail=f"source not found: {req.source_path}",
60
+ )
61
+ if not seed.exists():
62
+ raise HTTPException(
63
+ status_code=status.HTTP_404_NOT_FOUND,
64
+ detail=f"seed not found: {req.seed_path}",
65
+ )
66
+
67
+ kwargs: dict[str, Any] = {}
68
+ if req.text_col is not None:
69
+ kwargs["text_col"] = req.text_col
70
+ if req.speaker_col is not None:
71
+ kwargs["speaker_col"] = req.speaker_col
72
+ if req.sheet is not None:
73
+ kwargs["sheet"] = req.sheet
74
+
75
+ try:
76
+ doc = ingest_legacy(src, **kwargs)
77
+ except ValueError as e:
78
+ # Unsupported ext or missing column — surface as 422 so the worker
79
+ # can mark the job failed and the CLI can show the researcher what's wrong.
80
+ raise HTTPException(
81
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
82
+ detail=f"invalid_input: {e}",
83
+ ) from e
84
+ except RuntimeError as e:
85
+ # Missing optional dep (python-docx, openpyxl, etc.) — 503 so the
86
+ # CLI can route to `compost setup --fix`.
87
+ raise HTTPException(
88
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
89
+ detail=f"dep_missing: {e}",
90
+ ) from e
91
+
92
+ # Write normalized JSON under <seed>/legacy/<basename>.json
93
+ legacy_dir = seed / "legacy"
94
+ legacy_dir.mkdir(parents=True, exist_ok=True)
95
+ out_path = legacy_dir / f"{src.stem}.json"
96
+ out_path.write_text(json.dumps(doc, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
97
+
98
+ utt_count = len(doc.get("utterances", []))
99
+ prov = doc.get("provenance", {})
100
+ warnings: list[str] = []
101
+ skipped = prov.get("xlsx_rows_skipped_empty_text", 0)
102
+ if skipped > 0:
103
+ warnings.append(
104
+ f"{skipped} XLSX row(s) had data in other columns but an empty text cell — "
105
+ "likely an un-evaluated formula. Open the file in Excel once, or export to CSV."
106
+ )
107
+ return LegacyIngestResponse(
108
+ source_path=req.source_path,
109
+ normalized_path=str(out_path),
110
+ utterance_count=utt_count,
111
+ status="ok" if utt_count > 0 else "empty",
112
+ text_col_resolved=prov.get("text_col_resolved"),
113
+ warnings=warnings,
114
+ )
115
+
116
+
117
+ __all__ = ["router", "LegacyIngestRequest", "LegacyIngestResponse"]
@@ -0,0 +1,133 @@
1
+ """POST /transcribe — orchestrate the full descriptive pipeline (v0.1-01).
2
+
3
+ Body shape mirrors the CLI's `TranscriberClient.transcribe()` contract: the
4
+ client passes the seed root, the session id, and the absolute source path
5
+ (already moved into `sessions/<sid>/source.<ext>` by the inbox watcher).
6
+
7
+ The route returns the transcript path and a status code the worker uses to
8
+ either commit the job, requeue for retry, or surface needs_speaker_labels to
9
+ the researcher.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ from pathlib import Path
16
+ from typing import Annotated
17
+
18
+ from fastapi import APIRouter, Depends, HTTPException, status
19
+ from pydantic import BaseModel, Field
20
+
21
+ from ..asr import ASRConfig
22
+ from ..pipeline import (
23
+ PipelineBackends,
24
+ PipelineConfig,
25
+ run_pipeline,
26
+ write_transcript,
27
+ )
28
+
29
+ router = APIRouter()
30
+
31
+
32
+ class TranscribeRequest(BaseModel):
33
+ """JSON body for POST /transcribe."""
34
+
35
+ seed_path: str = Field(..., description="Absolute path to the seed root (Seeds/<name>/).")
36
+ session_id: str = Field(..., pattern=r"^[A-Za-z0-9_-]+$")
37
+ source_path: str = Field(..., description="Absolute path to the audio/video file.")
38
+ language: str | None = Field(None, description="Optional language hint (e.g. 'es-CO').")
39
+ model_name: str = Field("large-v3-turbo", description="Whisper model id.")
40
+ device: str = Field("auto", description="Device: auto | cpu | cuda | mps.")
41
+ compute_type: str = Field("int8", description="Compute precision (int8|float16|float32).")
42
+
43
+
44
+ class TranscribeResponse(BaseModel):
45
+ """Response shape mirroring `TranscriberClient.TranscribeResponse`."""
46
+
47
+ session_id: str
48
+ transcript_path: str
49
+ status: str # ok | needs_speaker_labels | failed_transcription
50
+
51
+
52
+ def _build_backends() -> PipelineBackends:
53
+ """Resolve real backends from the environment.
54
+
55
+ Each backend is lazy-loaded by its own module; this function just decides
56
+ *which* backend to inject. In production all three are None → each module
57
+ falls back to its real implementation (WhisperX / pyannote / Silero). In
58
+ tests we override via FastAPI's `app.dependency_overrides`.
59
+ """
60
+ return PipelineBackends(vad=None, asr=None, diarization=None)
61
+
62
+
63
+ def _build_pipeline_config(req: TranscribeRequest) -> PipelineConfig:
64
+ asr = ASRConfig(
65
+ model_name=req.model_name,
66
+ device=req.device,
67
+ compute_type=req.compute_type,
68
+ language=req.language,
69
+ event_tags=True,
70
+ )
71
+ return PipelineConfig(asr=asr)
72
+
73
+
74
+ @router.post(
75
+ "/transcribe",
76
+ response_model=TranscribeResponse,
77
+ status_code=status.HTTP_200_OK,
78
+ summary="Run the descriptive transcription pipeline on a session's source media.",
79
+ )
80
+ def post_transcribe(
81
+ req: TranscribeRequest,
82
+ backends: Annotated[PipelineBackends, Depends(_build_backends)],
83
+ ) -> TranscribeResponse:
84
+ if not Path(req.source_path).exists():
85
+ raise HTTPException(
86
+ status_code=status.HTTP_404_NOT_FOUND,
87
+ detail=f"source not found: {req.source_path}",
88
+ )
89
+ if not Path(req.seed_path).exists():
90
+ raise HTTPException(
91
+ status_code=status.HTTP_404_NOT_FOUND,
92
+ detail=f"seed not found: {req.seed_path}",
93
+ )
94
+
95
+ config = _build_pipeline_config(req)
96
+
97
+ try:
98
+ transcript = run_pipeline(
99
+ seed_path=req.seed_path,
100
+ session_id=req.session_id,
101
+ source_path=req.source_path,
102
+ config=config,
103
+ backends=backends,
104
+ )
105
+ except RuntimeError as e:
106
+ # Backend missing weights → distinguishable from generic failure so the
107
+ # CLI can suggest `compost setup --fix`.
108
+ if "asr extra" in str(e).lower() or "HUGGINGFACE_TOKEN" in str(e):
109
+ raise HTTPException(
110
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
111
+ detail=f"model_missing: {e}",
112
+ ) from e
113
+ raise HTTPException(
114
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
115
+ detail=f"failed_transcription: {e}",
116
+ ) from e
117
+
118
+ transcript_path = write_transcript(req.seed_path, req.session_id, transcript)
119
+
120
+ return TranscribeResponse(
121
+ session_id=req.session_id,
122
+ transcript_path=transcript_path,
123
+ status=transcript.get("status", "ok"),
124
+ )
125
+
126
+
127
+ def hf_token_present() -> bool:
128
+ """Helper exposed for the /compost-setup doctor: whether a HuggingFace
129
+ token is on the environment (does NOT validate it works against pyannote)."""
130
+ return bool(os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_TOKEN"))
131
+
132
+
133
+ __all__ = ["router", "TranscribeRequest", "TranscribeResponse", "hf_token_present"]