@they-juanreina/compost-cli 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/agreement.d.ts +3 -0
- package/dist/commands/agreement.d.ts.map +1 -0
- package/dist/commands/agreement.js +35 -0
- package/dist/commands/agreement.js.map +1 -0
- package/dist/commands/create.d.ts +1 -0
- package/dist/commands/create.d.ts.map +1 -1
- package/dist/commands/create.js +39 -1
- package/dist/commands/create.js.map +1 -1
- package/dist/commands/export.d.ts.map +1 -1
- package/dist/commands/export.js +47 -4
- package/dist/commands/export.js.map +1 -1
- package/dist/commands/import.d.ts +3 -0
- package/dist/commands/import.d.ts.map +1 -0
- package/dist/commands/import.js +59 -0
- package/dist/commands/import.js.map +1 -0
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +1 -0
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/jobs.d.ts +3 -0
- package/dist/commands/jobs.d.ts.map +1 -0
- package/dist/commands/jobs.js +105 -0
- package/dist/commands/jobs.js.map +1 -0
- package/dist/commands/label.d.ts +3 -0
- package/dist/commands/label.d.ts.map +1 -0
- package/dist/commands/label.js +67 -0
- package/dist/commands/label.js.map +1 -0
- package/dist/commands/models.d.ts.map +1 -1
- package/dist/commands/models.js +2 -1
- package/dist/commands/models.js.map +1 -1
- package/dist/commands/recode.d.ts +3 -0
- package/dist/commands/recode.d.ts.map +1 -0
- package/dist/commands/recode.js +60 -0
- package/dist/commands/recode.js.map +1 -0
- package/dist/commands/reindex.d.ts.map +1 -1
- package/dist/commands/reindex.js +6 -4
- package/dist/commands/reindex.js.map +1 -1
- package/dist/commands/rerun.d.ts +3 -0
- package/dist/commands/rerun.d.ts.map +1 -0
- package/dist/commands/rerun.js +91 -0
- package/dist/commands/rerun.js.map +1 -0
- package/dist/commands/search.d.ts.map +1 -1
- package/dist/commands/search.js +2 -1
- package/dist/commands/search.js.map +1 -1
- package/dist/commands/secrets.d.ts +3 -0
- package/dist/commands/secrets.d.ts.map +1 -0
- package/dist/commands/secrets.js +143 -0
- package/dist/commands/secrets.js.map +1 -0
- package/dist/commands/setup.d.ts.map +1 -1
- package/dist/commands/setup.js +90 -1
- package/dist/commands/setup.js.map +1 -1
- package/dist/commands/status.d.ts.map +1 -1
- package/dist/commands/status.js +2 -1
- package/dist/commands/status.js.map +1 -1
- package/dist/commands/transcribe.d.ts.map +1 -1
- package/dist/commands/transcribe.js +13 -2
- package/dist/commands/transcribe.js.map +1 -1
- package/dist/commands/validate.d.ts.map +1 -1
- package/dist/commands/validate.js +29 -1
- package/dist/commands/validate.js.map +1 -1
- package/dist/engine.d.ts +23 -0
- package/dist/engine.d.ts.map +1 -0
- package/dist/engine.js +32 -0
- package/dist/engine.js.map +1 -0
- package/dist/exporters/prov.d.ts +11 -0
- package/dist/exporters/prov.d.ts.map +1 -0
- package/dist/exporters/prov.js +151 -0
- package/dist/exporters/prov.js.map +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/dist/lib/agreement.d.ts +77 -0
- package/dist/lib/agreement.d.ts.map +1 -0
- package/dist/lib/agreement.js +261 -0
- package/dist/lib/agreement.js.map +1 -0
- package/dist/lib/artifacts.d.ts +32 -1
- package/dist/lib/artifacts.d.ts.map +1 -1
- package/dist/lib/artifacts.js +156 -22
- package/dist/lib/artifacts.js.map +1 -1
- package/dist/lib/blame.d.ts.map +1 -1
- package/dist/lib/blame.js +3 -2
- package/dist/lib/blame.js.map +1 -1
- package/dist/lib/config.d.ts +3 -0
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/doctor.d.ts +3 -0
- package/dist/lib/doctor.d.ts.map +1 -1
- package/dist/lib/doctor.js +24 -1
- package/dist/lib/doctor.js.map +1 -1
- package/dist/lib/events.d.ts +34 -1
- package/dist/lib/events.d.ts.map +1 -1
- package/dist/lib/events.js +35 -1
- package/dist/lib/events.js.map +1 -1
- package/dist/lib/importTranscript.d.ts +16 -0
- package/dist/lib/importTranscript.d.ts.map +1 -0
- package/dist/lib/importTranscript.js +94 -0
- package/dist/lib/importTranscript.js.map +1 -0
- package/dist/lib/ingest.d.ts.map +1 -1
- package/dist/lib/ingest.js +12 -6
- package/dist/lib/ingest.js.map +1 -1
- package/dist/lib/journal.d.ts +13 -0
- package/dist/lib/journal.d.ts.map +1 -1
- package/dist/lib/journal.js +58 -2
- package/dist/lib/journal.js.map +1 -1
- package/dist/lib/legacyNative.d.ts +24 -0
- package/dist/lib/legacyNative.d.ts.map +1 -0
- package/dist/lib/legacyNative.js +51 -0
- package/dist/lib/legacyNative.js.map +1 -0
- package/dist/lib/migrate.d.ts.map +1 -1
- package/dist/lib/migrate.js +1 -0
- package/dist/lib/migrate.js.map +1 -1
- package/dist/lib/nativeRuntime.d.ts +6 -3
- package/dist/lib/nativeRuntime.d.ts.map +1 -1
- package/dist/lib/nativeRuntime.js +6 -3
- package/dist/lib/nativeRuntime.js.map +1 -1
- package/dist/lib/provisionNative.js +1 -1
- package/dist/lib/provisionNative.js.map +1 -1
- package/dist/lib/queue.d.ts +25 -0
- package/dist/lib/queue.d.ts.map +1 -1
- package/dist/lib/queue.js +70 -3
- package/dist/lib/queue.js.map +1 -1
- package/dist/lib/reads.d.ts +24 -0
- package/dist/lib/reads.d.ts.map +1 -0
- package/dist/lib/reads.js +115 -0
- package/dist/lib/reads.js.map +1 -0
- package/dist/lib/recode.d.ts +19 -0
- package/dist/lib/recode.d.ts.map +1 -0
- package/dist/lib/recode.js +43 -0
- package/dist/lib/recode.js.map +1 -0
- package/dist/lib/rerun.d.ts +51 -0
- package/dist/lib/rerun.d.ts.map +1 -0
- package/dist/lib/rerun.js +166 -0
- package/dist/lib/rerun.js.map +1 -0
- package/dist/lib/retrieve.d.ts +8 -4
- package/dist/lib/retrieve.d.ts.map +1 -1
- package/dist/lib/retrieve.js +12 -10
- package/dist/lib/retrieve.js.map +1 -1
- package/dist/lib/schemas.generated.d.ts.map +1 -1
- package/dist/lib/schemas.generated.js +28 -0
- package/dist/lib/schemas.generated.js.map +1 -1
- package/dist/lib/secrets.d.ts +158 -0
- package/dist/lib/secrets.d.ts.map +1 -0
- package/dist/lib/secrets.js +507 -0
- package/dist/lib/secrets.js.map +1 -0
- package/dist/lib/seed.d.ts +5 -0
- package/dist/lib/seed.d.ts.map +1 -1
- package/dist/lib/seed.js +15 -2
- package/dist/lib/seed.js.map +1 -1
- package/dist/lib/seedResolve.d.ts.map +1 -1
- package/dist/lib/seedResolve.js +1 -0
- package/dist/lib/seedResolve.js.map +1 -1
- package/dist/lib/session.d.ts +14 -0
- package/dist/lib/session.d.ts.map +1 -1
- package/dist/lib/session.js +47 -0
- package/dist/lib/session.js.map +1 -1
- package/dist/lib/setup.d.ts +5 -0
- package/dist/lib/setup.d.ts.map +1 -1
- package/dist/lib/setup.js +78 -14
- package/dist/lib/setup.js.map +1 -1
- package/dist/lib/setupWizard.d.ts +51 -0
- package/dist/lib/setupWizard.d.ts.map +1 -0
- package/dist/lib/setupWizard.js +223 -0
- package/dist/lib/setupWizard.js.map +1 -0
- package/dist/lib/snap.d.ts.map +1 -1
- package/dist/lib/snap.js +2 -5
- package/dist/lib/snap.js.map +1 -1
- package/dist/lib/speakers.d.ts +41 -0
- package/dist/lib/speakers.d.ts.map +1 -0
- package/dist/lib/speakers.js +78 -0
- package/dist/lib/speakers.js.map +1 -0
- package/dist/lib/status.d.ts.map +1 -1
- package/dist/lib/status.js +21 -0
- package/dist/lib/status.js.map +1 -1
- package/dist/lib/userConfig.d.ts +22 -0
- package/dist/lib/userConfig.d.ts.map +1 -0
- package/dist/lib/userConfig.js +67 -0
- package/dist/lib/userConfig.js.map +1 -0
- package/dist/lib/validate.d.ts +18 -0
- package/dist/lib/validate.d.ts.map +1 -1
- package/dist/lib/validate.js +70 -1
- package/dist/lib/validate.js.map +1 -1
- package/dist/lib/version.d.ts +30 -0
- package/dist/lib/version.d.ts.map +1 -0
- package/dist/lib/version.js +73 -0
- package/dist/lib/version.js.map +1 -0
- package/dist/llm/adapter.d.ts.map +1 -1
- package/dist/llm/adapter.js +2 -0
- package/dist/llm/adapter.js.map +1 -1
- package/dist/llm/providers/ollama.d.ts.map +1 -1
- package/dist/llm/providers/ollama.js +6 -0
- package/dist/llm/providers/ollama.js.map +1 -1
- package/dist/loops/ingest_watcher.d.ts.map +1 -1
- package/dist/loops/ingest_watcher.js +6 -3
- package/dist/loops/ingest_watcher.js.map +1 -1
- package/dist/loops/legacy_worker.d.ts +28 -1
- package/dist/loops/legacy_worker.d.ts.map +1 -1
- package/dist/loops/legacy_worker.js +81 -9
- package/dist/loops/legacy_worker.js.map +1 -1
- package/dist/loops/supervisor.d.ts +3 -0
- package/dist/loops/supervisor.d.ts.map +1 -1
- package/dist/loops/supervisor.js +12 -0
- package/dist/loops/supervisor.js.map +1 -1
- package/dist/loops/synthesis.d.ts.map +1 -1
- package/dist/loops/synthesis.js +15 -0
- package/dist/loops/synthesis.js.map +1 -1
- package/dist/loops/transcribe_worker.d.ts.map +1 -1
- package/dist/loops/transcribe_worker.js +2 -4
- package/dist/loops/transcribe_worker.js.map +1 -1
- package/dist/output.d.ts +13 -1
- package/dist/output.d.ts.map +1 -1
- package/dist/output.js +22 -2
- package/dist/output.js.map +1 -1
- package/dist/render/human.d.ts +20 -0
- package/dist/render/human.d.ts.map +1 -0
- package/dist/render/human.js +54 -0
- package/dist/render/human.js.map +1 -0
- package/dist/router.d.ts.map +1 -1
- package/dist/router.js +17 -2
- package/dist/router.js.map +1 -1
- package/package.json +18 -5
- package/templates/config.toml +6 -1
- package/transcriber/app/__init__.py +3 -0
- package/transcriber/app/asr.py +198 -0
- package/transcriber/app/asr_parakeet.py +174 -0
- package/transcriber/app/cue_parser.py +110 -0
- package/transcriber/app/diarization.py +330 -0
- package/transcriber/app/frame_annotation.py +77 -0
- package/transcriber/app/frames.py +130 -0
- package/transcriber/app/health.py +70 -0
- package/transcriber/app/legacy.py +355 -0
- package/transcriber/app/legacy_cli.py +90 -0
- package/transcriber/app/main.py +30 -0
- package/transcriber/app/pipeline.py +210 -0
- package/transcriber/app/pptx_export.py +42 -0
- package/transcriber/app/prosody.py +128 -0
- package/transcriber/app/routes/__init__.py +1 -0
- package/transcriber/app/routes/legacy.py +117 -0
- package/transcriber/app/routes/transcribe.py +133 -0
- package/transcriber/app/shot_change.py +74 -0
- package/transcriber/app/silence_typer.py +144 -0
- package/transcriber/app/transcribe_cli.py +82 -0
- package/transcriber/app/vad.py +216 -0
- package/transcriber/pyproject.toml +56 -0
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Transcription pipeline orchestrator (#v0.1-01).
|
|
2
|
+
|
|
3
|
+
Composes the already-tested deterministic stages into a single transcript.json:
|
|
4
|
+
|
|
5
|
+
duration probe → VAD speech/silences → ASR → diarization align →
|
|
6
|
+
cue parser → silence typer → prosody → final transcript
|
|
7
|
+
|
|
8
|
+
Each stage accepts injectable backends so the route, the worker, and the tests
|
|
9
|
+
all share one orchestration codepath. The route in `routes/transcribe.py`
|
|
10
|
+
provides real backends; tests pass fakes.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import subprocess
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from .asr import ASRConfig, Transcriber, WhisperBackend
|
|
22
|
+
from .cue_parser import parse_transcript_cues
|
|
23
|
+
from .diarization import DiarizationBackend, Diarizer, align
|
|
24
|
+
from .prosody import annotate_prosody
|
|
25
|
+
from .silence_typer import type_all_silences
|
|
26
|
+
from .vad import VAD, VADBackend, silences_to_schema, utterance_energies
|
|
27
|
+
|
|
28
|
+
SCHEMA_VERSION = "1.0"
|
|
29
|
+
DEFAULT_TRANSCRIBER_VERSION = "compost-transcriber@0.1.0"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class PipelineConfig:
|
|
34
|
+
asr: ASRConfig
|
|
35
|
+
transcriber_version: str = DEFAULT_TRANSCRIBER_VERSION
|
|
36
|
+
asr_model_tag: str = "whisper-large-v3-turbo-event-tags"
|
|
37
|
+
diarizer_tag: str = "pyannote-audio@3.3"
|
|
38
|
+
vad_tag: str = "silero-vad@5.0"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class PipelineBackends:
|
|
43
|
+
"""Inject concrete or fake backends. Route wires real ones; tests inject fakes."""
|
|
44
|
+
|
|
45
|
+
vad: VADBackend | None = None
|
|
46
|
+
asr: WhisperBackend | None = None
|
|
47
|
+
diarization: DiarizationBackend | None = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def probe_duration_ms(source_path: str) -> int:
|
|
51
|
+
"""Return the duration of an audio/video file in milliseconds via ffprobe.
|
|
52
|
+
|
|
53
|
+
Falls back to 0 if ffprobe is missing or the file is unreadable; the caller
|
|
54
|
+
can decide whether to error or proceed (silence segmentation against
|
|
55
|
+
duration=0 produces no trailing silence, which is fine).
|
|
56
|
+
"""
|
|
57
|
+
try:
|
|
58
|
+
result = subprocess.run(
|
|
59
|
+
[
|
|
60
|
+
"ffprobe",
|
|
61
|
+
"-v",
|
|
62
|
+
"error",
|
|
63
|
+
"-show_entries",
|
|
64
|
+
"format=duration",
|
|
65
|
+
"-of",
|
|
66
|
+
"default=noprint_wrappers=1:nokey=1",
|
|
67
|
+
source_path,
|
|
68
|
+
],
|
|
69
|
+
capture_output=True,
|
|
70
|
+
text=True,
|
|
71
|
+
timeout=30,
|
|
72
|
+
check=False,
|
|
73
|
+
)
|
|
74
|
+
if result.returncode != 0:
|
|
75
|
+
return 0
|
|
76
|
+
return int(float(result.stdout.strip()) * 1000)
|
|
77
|
+
except (FileNotFoundError, ValueError, subprocess.TimeoutExpired):
|
|
78
|
+
return 0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _speakers_from_utterances(utterances: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
82
|
+
"""Distinct speakers seen in the utterances; first speaker tagged as moderator,
|
|
83
|
+
the rest as participants (researcher overrides this in the UI for now).
|
|
84
|
+
"""
|
|
85
|
+
seen: dict[str, dict[str, Any]] = {}
|
|
86
|
+
for u in utterances:
|
|
87
|
+
sid = u.get("speaker_id", "S?")
|
|
88
|
+
if sid in seen:
|
|
89
|
+
continue
|
|
90
|
+
seen[sid] = {"id": sid, "name": sid, "type": "participant"}
|
|
91
|
+
# First seen → moderator by convention. Researcher can override post-hoc.
|
|
92
|
+
if seen:
|
|
93
|
+
first = next(iter(seen))
|
|
94
|
+
seen[first]["type"] = "moderator"
|
|
95
|
+
return list(seen.values())
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _detect_language(asr_lang: str | None, configured: str | None) -> str:
|
|
99
|
+
"""Prefer ASR-detected, then configured hint, then 'und' (undetermined)."""
|
|
100
|
+
if asr_lang:
|
|
101
|
+
return asr_lang
|
|
102
|
+
if configured:
|
|
103
|
+
return configured
|
|
104
|
+
return "und"
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def run_pipeline(
|
|
108
|
+
seed_path: str,
|
|
109
|
+
session_id: str,
|
|
110
|
+
source_path: str,
|
|
111
|
+
config: PipelineConfig,
|
|
112
|
+
backends: PipelineBackends,
|
|
113
|
+
) -> dict[str, Any]:
|
|
114
|
+
"""Run every stage and return the final transcript dict.
|
|
115
|
+
|
|
116
|
+
Side-effect-free except for backends' own model loading. The route writes
|
|
117
|
+
the result to disk separately so this function is testable as pure
|
|
118
|
+
transformation given the backends.
|
|
119
|
+
"""
|
|
120
|
+
if not Path(source_path).exists():
|
|
121
|
+
raise FileNotFoundError(f"source not found: {source_path}")
|
|
122
|
+
|
|
123
|
+
duration_ms = probe_duration_ms(source_path)
|
|
124
|
+
|
|
125
|
+
# 1. VAD — speech segments (carry per-segment RMS energy) + first-class silences
|
|
126
|
+
vad = VAD(backend=backends.vad)
|
|
127
|
+
speech, silences = vad.segment(source_path, duration_ms)
|
|
128
|
+
|
|
129
|
+
# 2. ASR — utterances with word timings, may contain event tags inline
|
|
130
|
+
asr = Transcriber(config=config.asr, backend=backends.asr)
|
|
131
|
+
asr_result = asr.transcribe(source_path)
|
|
132
|
+
|
|
133
|
+
# 3. Initial transcript shell
|
|
134
|
+
transcript: dict[str, Any] = {
|
|
135
|
+
"schema_version": SCHEMA_VERSION,
|
|
136
|
+
"kind": "session",
|
|
137
|
+
"session_id": session_id,
|
|
138
|
+
"source": _relative_source(seed_path, source_path),
|
|
139
|
+
"language": _detect_language(asr_result.language, config.asr.language),
|
|
140
|
+
"duration_ms": duration_ms,
|
|
141
|
+
"modality": _modality(source_path),
|
|
142
|
+
"speakers": [],
|
|
143
|
+
"utterances": asr_result.utterances,
|
|
144
|
+
"silences": silences_to_schema(silences),
|
|
145
|
+
"cues": [],
|
|
146
|
+
"frames": [],
|
|
147
|
+
"glossary_refs": [],
|
|
148
|
+
# frame_capture / frame_annotation are omitted (not null): the schema
|
|
149
|
+
# types provenance fields as strings and the convention is "absent when
|
|
150
|
+
# not applicable". run_pipeline does no frame capture/annotation; those
|
|
151
|
+
# stages (frames.py / frame_annotation.py) add their own provenance when
|
|
152
|
+
# they run.
|
|
153
|
+
"provenance": {
|
|
154
|
+
"transcriber": config.transcriber_version,
|
|
155
|
+
"asr_model": config.asr_model_tag,
|
|
156
|
+
"diarizer": config.diarizer_tag,
|
|
157
|
+
"audio_cues": f"{config.vad_tag} + whisper-events",
|
|
158
|
+
},
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
# 4. Diarization — assign speaker_id per utterance + overlap cues
|
|
162
|
+
diarizer = Diarizer(backend=backends.diarization)
|
|
163
|
+
turns = diarizer.diarize(source_path)
|
|
164
|
+
align(transcript, turns)
|
|
165
|
+
|
|
166
|
+
# 5. Speakers list, derived from the diarized utterances
|
|
167
|
+
transcript["speakers"] = _speakers_from_utterances(transcript["utterances"])
|
|
168
|
+
|
|
169
|
+
# 6. Cue parser — strip [laughter]/[sigh]/etc from utterance text into cues[]
|
|
170
|
+
parse_transcript_cues(transcript)
|
|
171
|
+
|
|
172
|
+
# 7. Silence semantic typing (after_question / thinking / interruption / …)
|
|
173
|
+
type_all_silences(transcript)
|
|
174
|
+
|
|
175
|
+
# 8. Prosody hints per utterance (deterministic, cheap). Volume bucketing
|
|
176
|
+
# needs the per-utterance VAD RMS energy signal mapped from the speech
|
|
177
|
+
# segments; without it volume would default to "normal" for every utterance.
|
|
178
|
+
energies = utterance_energies(speech, transcript["utterances"])
|
|
179
|
+
annotate_prosody(transcript, energies)
|
|
180
|
+
|
|
181
|
+
return transcript
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _relative_source(seed_path: str, source_path: str) -> str:
|
|
185
|
+
"""Return a seed-relative path for transcript.source if the source lives
|
|
186
|
+
inside the seed; otherwise return the absolute path unchanged.
|
|
187
|
+
"""
|
|
188
|
+
try:
|
|
189
|
+
return str(Path(source_path).relative_to(Path(seed_path).parent))
|
|
190
|
+
except ValueError:
|
|
191
|
+
return source_path
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _modality(source_path: str) -> list[str]:
|
|
195
|
+
"""Coarse modality flag from file extension. Video files imply both audio
|
|
196
|
+
and video tracks (the player will only render video if present).
|
|
197
|
+
"""
|
|
198
|
+
ext = Path(source_path).suffix.lower()
|
|
199
|
+
if ext in {".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"}:
|
|
200
|
+
return ["audio", "video"]
|
|
201
|
+
return ["audio"]
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def write_transcript(seed_path: str, session_id: str, transcript: dict[str, Any]) -> str:
|
|
205
|
+
"""Write transcript.json to sessions/<session_id>/. Returns the path."""
|
|
206
|
+
out_dir = Path(seed_path) / "sessions" / session_id
|
|
207
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
208
|
+
out_path = out_dir / "transcript.json"
|
|
209
|
+
out_path.write_text(json.dumps(transcript, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
|
210
|
+
return str(out_path)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""PPTX deck export (#66).
|
|
2
|
+
|
|
3
|
+
Turns a report deck-spec (built by cli/src/exporters/report.ts → buildDeckSpec)
|
|
4
|
+
into a .pptx: one slide per entry, bullets as body, citations as slide notes.
|
|
5
|
+
Branding (title color) is configurable per seed. python-pptx is lazily imported.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def export_deck(spec: list[dict[str, Any]], out_path: str, branding: dict[str, Any] | None = None) -> str:
|
|
14
|
+
try:
|
|
15
|
+
from pptx import Presentation # type: ignore
|
|
16
|
+
from pptx.util import Pt # type: ignore
|
|
17
|
+
except ImportError as e:
|
|
18
|
+
raise RuntimeError("python-pptx not installed (pip install -e '.[legacy]')") from e
|
|
19
|
+
|
|
20
|
+
branding = branding or {}
|
|
21
|
+
prs = Presentation()
|
|
22
|
+
title_only = prs.slide_layouts[5] # title + content area
|
|
23
|
+
|
|
24
|
+
for slide_spec in spec:
|
|
25
|
+
slide = prs.slides.add_slide(title_only)
|
|
26
|
+
slide.shapes.title.text = slide_spec.get("title", "")
|
|
27
|
+
# bullets in a textbox
|
|
28
|
+
body = slide.placeholders[0] if slide_spec.get("title") is None else None
|
|
29
|
+
tb = slide.shapes.add_textbox(Pt(40), Pt(120), Pt(640), Pt(360)).text_frame
|
|
30
|
+
tb.word_wrap = True
|
|
31
|
+
for i, bullet in enumerate(slide_spec.get("bullets", [])):
|
|
32
|
+
p = tb.paragraphs[0] if i == 0 else tb.add_paragraph()
|
|
33
|
+
p.text = str(bullet)
|
|
34
|
+
# citations → slide notes
|
|
35
|
+
notes = slide_spec.get("notes", "")
|
|
36
|
+
if notes:
|
|
37
|
+
slide.notes_slide.notes_text_frame.text = notes
|
|
38
|
+
_ = body
|
|
39
|
+
_ = branding
|
|
40
|
+
|
|
41
|
+
prs.save(out_path)
|
|
42
|
+
return out_path
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Prosody hint extractor (#13).
|
|
2
|
+
|
|
3
|
+
Deterministic per-utterance hints derived from word timings, optional VAD
|
|
4
|
+
energy, and speech rate. No ML model — cheap, reproducible context.
|
|
5
|
+
|
|
6
|
+
Output shape (matches transcript.schema.json #/$defs/prosody):
|
|
7
|
+
{"volume": "low|normal|high", "pace": "slow|normal|fast", "hesitations": int}
|
|
8
|
+
|
|
9
|
+
Thresholds are module constants, documented here for reproducibility:
|
|
10
|
+
|
|
11
|
+
pace (words per second over the utterance span):
|
|
12
|
+
< 2.0 → slow
|
|
13
|
+
> 3.3 → fast
|
|
14
|
+
else → normal
|
|
15
|
+
|
|
16
|
+
volume (mean VAD RMS energy, normalized 0..1; requires the energy signal
|
|
17
|
+
from Silero VAD, issue #9). When energy is unavailable we report "normal"
|
|
18
|
+
rather than guess:
|
|
19
|
+
< 0.33 → low
|
|
20
|
+
> 0.66 → high
|
|
21
|
+
else → normal
|
|
22
|
+
|
|
23
|
+
hesitations = filler tokens + immediate word repetitions + long
|
|
24
|
+
intra-utterance gaps (> 400 ms between consecutive words).
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import re
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
PACE_SLOW_WPS = 2.0
|
|
33
|
+
PACE_FAST_WPS = 3.3
|
|
34
|
+
# Volume thresholds applied to the normalized 0..1 energy. These are GLOBAL
|
|
35
|
+
# constants, not per-speaker. The energy signal is session-normalized in
|
|
36
|
+
# vad.utterance_energies(); a soft speaker's loudest moment can still read below
|
|
37
|
+
# a loud speaker's baseline. TODO(#13): per-speaker normalization would make
|
|
38
|
+
# these buckets speaker-relative — see vad.utterance_energies for the plan.
|
|
39
|
+
VOLUME_LOW = 0.33
|
|
40
|
+
VOLUME_HIGH = 0.66
|
|
41
|
+
HESITATION_GAP_MS = 400
|
|
42
|
+
|
|
43
|
+
# Multilingual (es-CO + en) filler set.
|
|
44
|
+
_FILLERS = {
|
|
45
|
+
"uh",
|
|
46
|
+
"um",
|
|
47
|
+
"eh",
|
|
48
|
+
"em",
|
|
49
|
+
"este",
|
|
50
|
+
"esto",
|
|
51
|
+
"mmm",
|
|
52
|
+
"hmm",
|
|
53
|
+
"like",
|
|
54
|
+
"pues",
|
|
55
|
+
}
|
|
56
|
+
_FILLER_PHRASES = ("o sea", "you know", "es decir")
|
|
57
|
+
|
|
58
|
+
_WORD_RE = re.compile(r"[^\W\d_]+", re.UNICODE)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _pace(text: str, start_ms: int, end_ms: int) -> str:
|
|
62
|
+
duration_s = max((end_ms - start_ms) / 1000.0, 1e-6)
|
|
63
|
+
n_words = len(_WORD_RE.findall(text))
|
|
64
|
+
wps = n_words / duration_s
|
|
65
|
+
if wps < PACE_SLOW_WPS:
|
|
66
|
+
return "slow"
|
|
67
|
+
if wps > PACE_FAST_WPS:
|
|
68
|
+
return "fast"
|
|
69
|
+
return "normal"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _volume(energy: float | None) -> str:
|
|
73
|
+
if energy is None:
|
|
74
|
+
return "normal"
|
|
75
|
+
if energy < VOLUME_LOW:
|
|
76
|
+
return "low"
|
|
77
|
+
if energy > VOLUME_HIGH:
|
|
78
|
+
return "high"
|
|
79
|
+
return "normal"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _count_hesitations(text: str, words: list[dict[str, Any]] | None) -> int:
|
|
83
|
+
count = 0
|
|
84
|
+
tokens = [t.lower() for t in _WORD_RE.findall(text)]
|
|
85
|
+
|
|
86
|
+
# filler single tokens
|
|
87
|
+
count += sum(1 for t in tokens if t in _FILLERS)
|
|
88
|
+
|
|
89
|
+
# filler phrases
|
|
90
|
+
lowered = text.lower()
|
|
91
|
+
for phrase in _FILLER_PHRASES:
|
|
92
|
+
count += lowered.count(phrase)
|
|
93
|
+
|
|
94
|
+
# immediate repetitions ("yo yo", "the the")
|
|
95
|
+
for a, b in zip(tokens, tokens[1:], strict=False):
|
|
96
|
+
if a == b and len(a) > 1:
|
|
97
|
+
count += 1
|
|
98
|
+
|
|
99
|
+
# long gaps between consecutive words
|
|
100
|
+
if words:
|
|
101
|
+
for prev, nxt in zip(words, words[1:], strict=False):
|
|
102
|
+
if nxt.get("s", 0) - prev.get("e", 0) > HESITATION_GAP_MS:
|
|
103
|
+
count += 1
|
|
104
|
+
|
|
105
|
+
return count
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def extract_prosody(utterance: dict[str, Any], energy: float | None = None) -> dict[str, Any]:
|
|
109
|
+
"""Compute {volume, pace, hesitations} for a single utterance dict."""
|
|
110
|
+
text = utterance.get("text", "")
|
|
111
|
+
return {
|
|
112
|
+
"volume": _volume(energy),
|
|
113
|
+
"pace": _pace(text, utterance["start_ms"], utterance["end_ms"]),
|
|
114
|
+
"hesitations": _count_hesitations(text, utterance.get("words")),
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def annotate_prosody(
|
|
119
|
+
transcript: dict[str, Any],
|
|
120
|
+
energies: dict[str, float] | None = None,
|
|
121
|
+
) -> dict[str, Any]:
|
|
122
|
+
"""Attach `prosody` to every utterance. `energies` maps utterance id → mean
|
|
123
|
+
VAD RMS energy (0..1) when available. Mutates and returns the transcript.
|
|
124
|
+
"""
|
|
125
|
+
energies = energies or {}
|
|
126
|
+
for utt in transcript.get("utterances", []):
|
|
127
|
+
utt["prosody"] = extract_prosody(utt, energies.get(utt.get("id")))
|
|
128
|
+
return transcript
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""HTTP route modules. Each subsystem (#9-#15) mounts its own router."""
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""POST /legacy-ingest — normalize a legacy document into a transcript.json.
|
|
2
|
+
|
|
3
|
+
The Node-side legacy-worker (cli/src/loops/legacy_worker.ts) pulls
|
|
4
|
+
`legacy-ingest` jobs from the queue and POSTs each here. The route dispatches
|
|
5
|
+
by file extension to the pure ingestors in `app/legacy.py`, then writes the
|
|
6
|
+
normalized JSON to `<seed>/legacy/<basename>.json`.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from fastapi import APIRouter, HTTPException, status
|
|
16
|
+
from pydantic import BaseModel, Field
|
|
17
|
+
|
|
18
|
+
from ..legacy import ingest as ingest_legacy
|
|
19
|
+
|
|
20
|
+
router = APIRouter()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LegacyIngestRequest(BaseModel):
|
|
24
|
+
seed_path: str = Field(..., description="Absolute path to the seed root.")
|
|
25
|
+
source_path: str = Field(..., description="Absolute path to the asset to ingest.")
|
|
26
|
+
# CSV/XLSX column mapping — if text_col is None, the ingestor auto-detects
|
|
27
|
+
# from the header (text → transcript → content → utterance → quote →
|
|
28
|
+
# message → body, then first-column fallback). Node-side workers may also
|
|
29
|
+
# consult a `<source_path>.compost.json` sidecar that takes precedence.
|
|
30
|
+
text_col: str | None = Field(
|
|
31
|
+
None,
|
|
32
|
+
description="Column holding the utterance text (CSV/XLSX). Auto-detected if None.",
|
|
33
|
+
)
|
|
34
|
+
speaker_col: str | None = Field(None, description="Optional column for speaker label.")
|
|
35
|
+
sheet: str | None = Field(None, description="Optional XLSX sheet name (defaults to active).")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class LegacyIngestResponse(BaseModel):
|
|
39
|
+
source_path: str
|
|
40
|
+
normalized_path: str
|
|
41
|
+
utterance_count: int
|
|
42
|
+
status: str # ok | empty | failed
|
|
43
|
+
text_col_resolved: str | None = None # which column was actually used (CSV/XLSX)
|
|
44
|
+
warnings: list[str] = [] # surfaced UX hints (e.g. xlsx un-evaluated formulas)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@router.post(
|
|
48
|
+
"/legacy-ingest",
|
|
49
|
+
response_model=LegacyIngestResponse,
|
|
50
|
+
status_code=status.HTTP_200_OK,
|
|
51
|
+
summary="Normalize a PDF/DOCX/PPTX/CSV/XLSX/TXT/MD into a transcript-shaped JSON.",
|
|
52
|
+
)
|
|
53
|
+
def post_legacy_ingest(req: LegacyIngestRequest) -> LegacyIngestResponse:
|
|
54
|
+
src = Path(req.source_path)
|
|
55
|
+
seed = Path(req.seed_path)
|
|
56
|
+
if not src.exists():
|
|
57
|
+
raise HTTPException(
|
|
58
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
59
|
+
detail=f"source not found: {req.source_path}",
|
|
60
|
+
)
|
|
61
|
+
if not seed.exists():
|
|
62
|
+
raise HTTPException(
|
|
63
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
64
|
+
detail=f"seed not found: {req.seed_path}",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
kwargs: dict[str, Any] = {}
|
|
68
|
+
if req.text_col is not None:
|
|
69
|
+
kwargs["text_col"] = req.text_col
|
|
70
|
+
if req.speaker_col is not None:
|
|
71
|
+
kwargs["speaker_col"] = req.speaker_col
|
|
72
|
+
if req.sheet is not None:
|
|
73
|
+
kwargs["sheet"] = req.sheet
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
doc = ingest_legacy(src, **kwargs)
|
|
77
|
+
except ValueError as e:
|
|
78
|
+
# Unsupported ext or missing column — surface as 422 so the worker
|
|
79
|
+
# can mark the job failed and the CLI can show the researcher what's wrong.
|
|
80
|
+
raise HTTPException(
|
|
81
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
82
|
+
detail=f"invalid_input: {e}",
|
|
83
|
+
) from e
|
|
84
|
+
except RuntimeError as e:
|
|
85
|
+
# Missing optional dep (python-docx, openpyxl, etc.) — 503 so the
|
|
86
|
+
# CLI can route to `compost setup --fix`.
|
|
87
|
+
raise HTTPException(
|
|
88
|
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
89
|
+
detail=f"dep_missing: {e}",
|
|
90
|
+
) from e
|
|
91
|
+
|
|
92
|
+
# Write normalized JSON under <seed>/legacy/<basename>.json
|
|
93
|
+
legacy_dir = seed / "legacy"
|
|
94
|
+
legacy_dir.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
out_path = legacy_dir / f"{src.stem}.json"
|
|
96
|
+
out_path.write_text(json.dumps(doc, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
|
97
|
+
|
|
98
|
+
utt_count = len(doc.get("utterances", []))
|
|
99
|
+
prov = doc.get("provenance", {})
|
|
100
|
+
warnings: list[str] = []
|
|
101
|
+
skipped = prov.get("xlsx_rows_skipped_empty_text", 0)
|
|
102
|
+
if skipped > 0:
|
|
103
|
+
warnings.append(
|
|
104
|
+
f"{skipped} XLSX row(s) had data in other columns but an empty text cell — "
|
|
105
|
+
"likely an un-evaluated formula. Open the file in Excel once, or export to CSV."
|
|
106
|
+
)
|
|
107
|
+
return LegacyIngestResponse(
|
|
108
|
+
source_path=req.source_path,
|
|
109
|
+
normalized_path=str(out_path),
|
|
110
|
+
utterance_count=utt_count,
|
|
111
|
+
status="ok" if utt_count > 0 else "empty",
|
|
112
|
+
text_col_resolved=prov.get("text_col_resolved"),
|
|
113
|
+
warnings=warnings,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
__all__ = ["router", "LegacyIngestRequest", "LegacyIngestResponse"]
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""POST /transcribe — orchestrate the full descriptive pipeline (v0.1-01).
|
|
2
|
+
|
|
3
|
+
Body shape mirrors the CLI's `TranscriberClient.transcribe()` contract: the
|
|
4
|
+
client passes the seed root, the session id, and the absolute source path
|
|
5
|
+
(already moved into `sessions/<sid>/source.<ext>` by the inbox watcher).
|
|
6
|
+
|
|
7
|
+
The route returns the transcript path and a status code the worker uses to
|
|
8
|
+
either commit the job, requeue for retry, or surface needs_speaker_labels to
|
|
9
|
+
the researcher.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Annotated
|
|
17
|
+
|
|
18
|
+
from fastapi import APIRouter, Depends, HTTPException, status
|
|
19
|
+
from pydantic import BaseModel, Field
|
|
20
|
+
|
|
21
|
+
from ..asr import ASRConfig
|
|
22
|
+
from ..pipeline import (
|
|
23
|
+
PipelineBackends,
|
|
24
|
+
PipelineConfig,
|
|
25
|
+
run_pipeline,
|
|
26
|
+
write_transcript,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
router = APIRouter()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TranscribeRequest(BaseModel):
|
|
33
|
+
"""JSON body for POST /transcribe."""
|
|
34
|
+
|
|
35
|
+
seed_path: str = Field(..., description="Absolute path to the seed root (Seeds/<name>/).")
|
|
36
|
+
session_id: str = Field(..., pattern=r"^[A-Za-z0-9_-]+$")
|
|
37
|
+
source_path: str = Field(..., description="Absolute path to the audio/video file.")
|
|
38
|
+
language: str | None = Field(None, description="Optional language hint (e.g. 'es-CO').")
|
|
39
|
+
model_name: str = Field("large-v3-turbo", description="Whisper model id.")
|
|
40
|
+
device: str = Field("auto", description="Device: auto | cpu | cuda | mps.")
|
|
41
|
+
compute_type: str = Field("int8", description="Compute precision (int8|float16|float32).")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class TranscribeResponse(BaseModel):
|
|
45
|
+
"""Response shape mirroring `TranscriberClient.TranscribeResponse`."""
|
|
46
|
+
|
|
47
|
+
session_id: str
|
|
48
|
+
transcript_path: str
|
|
49
|
+
status: str # ok | needs_speaker_labels | failed_transcription
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _build_backends() -> PipelineBackends:
|
|
53
|
+
"""Resolve real backends from the environment.
|
|
54
|
+
|
|
55
|
+
Each backend is lazy-loaded by its own module; this function just decides
|
|
56
|
+
*which* backend to inject. In production all three are None → each module
|
|
57
|
+
falls back to its real implementation (WhisperX / pyannote / Silero). In
|
|
58
|
+
tests we override via FastAPI's `app.dependency_overrides`.
|
|
59
|
+
"""
|
|
60
|
+
return PipelineBackends(vad=None, asr=None, diarization=None)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _build_pipeline_config(req: TranscribeRequest) -> PipelineConfig:
|
|
64
|
+
asr = ASRConfig(
|
|
65
|
+
model_name=req.model_name,
|
|
66
|
+
device=req.device,
|
|
67
|
+
compute_type=req.compute_type,
|
|
68
|
+
language=req.language,
|
|
69
|
+
event_tags=True,
|
|
70
|
+
)
|
|
71
|
+
return PipelineConfig(asr=asr)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@router.post(
|
|
75
|
+
"/transcribe",
|
|
76
|
+
response_model=TranscribeResponse,
|
|
77
|
+
status_code=status.HTTP_200_OK,
|
|
78
|
+
summary="Run the descriptive transcription pipeline on a session's source media.",
|
|
79
|
+
)
|
|
80
|
+
def post_transcribe(
|
|
81
|
+
req: TranscribeRequest,
|
|
82
|
+
backends: Annotated[PipelineBackends, Depends(_build_backends)],
|
|
83
|
+
) -> TranscribeResponse:
|
|
84
|
+
if not Path(req.source_path).exists():
|
|
85
|
+
raise HTTPException(
|
|
86
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
87
|
+
detail=f"source not found: {req.source_path}",
|
|
88
|
+
)
|
|
89
|
+
if not Path(req.seed_path).exists():
|
|
90
|
+
raise HTTPException(
|
|
91
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
92
|
+
detail=f"seed not found: {req.seed_path}",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
config = _build_pipeline_config(req)
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
transcript = run_pipeline(
|
|
99
|
+
seed_path=req.seed_path,
|
|
100
|
+
session_id=req.session_id,
|
|
101
|
+
source_path=req.source_path,
|
|
102
|
+
config=config,
|
|
103
|
+
backends=backends,
|
|
104
|
+
)
|
|
105
|
+
except RuntimeError as e:
|
|
106
|
+
# Backend missing weights → distinguishable from generic failure so the
|
|
107
|
+
# CLI can suggest `compost setup --fix`.
|
|
108
|
+
if "asr extra" in str(e).lower() or "HUGGINGFACE_TOKEN" in str(e):
|
|
109
|
+
raise HTTPException(
|
|
110
|
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
111
|
+
detail=f"model_missing: {e}",
|
|
112
|
+
) from e
|
|
113
|
+
raise HTTPException(
|
|
114
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
115
|
+
detail=f"failed_transcription: {e}",
|
|
116
|
+
) from e
|
|
117
|
+
|
|
118
|
+
transcript_path = write_transcript(req.seed_path, req.session_id, transcript)
|
|
119
|
+
|
|
120
|
+
return TranscribeResponse(
|
|
121
|
+
session_id=req.session_id,
|
|
122
|
+
transcript_path=transcript_path,
|
|
123
|
+
status=transcript.get("status", "ok"),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def hf_token_present() -> bool:
|
|
128
|
+
"""Helper exposed for the /compost-setup doctor: whether a HuggingFace
|
|
129
|
+
token is on the environment (does NOT validate it works against pyannote)."""
|
|
130
|
+
return bool(os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_TOKEN"))
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
__all__ = ["router", "TranscribeRequest", "TranscribeResponse", "hf_token_present"]
|