@they-juanreina/compost-cli 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/agreement.d.ts +3 -0
- package/dist/commands/agreement.d.ts.map +1 -0
- package/dist/commands/agreement.js +35 -0
- package/dist/commands/agreement.js.map +1 -0
- package/dist/commands/create.d.ts +1 -0
- package/dist/commands/create.d.ts.map +1 -1
- package/dist/commands/create.js +39 -1
- package/dist/commands/create.js.map +1 -1
- package/dist/commands/export.d.ts.map +1 -1
- package/dist/commands/export.js +47 -4
- package/dist/commands/export.js.map +1 -1
- package/dist/commands/import.d.ts +3 -0
- package/dist/commands/import.d.ts.map +1 -0
- package/dist/commands/import.js +59 -0
- package/dist/commands/import.js.map +1 -0
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +1 -0
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/jobs.d.ts +3 -0
- package/dist/commands/jobs.d.ts.map +1 -0
- package/dist/commands/jobs.js +105 -0
- package/dist/commands/jobs.js.map +1 -0
- package/dist/commands/label.d.ts +3 -0
- package/dist/commands/label.d.ts.map +1 -0
- package/dist/commands/label.js +67 -0
- package/dist/commands/label.js.map +1 -0
- package/dist/commands/models.d.ts.map +1 -1
- package/dist/commands/models.js +2 -1
- package/dist/commands/models.js.map +1 -1
- package/dist/commands/recode.d.ts +3 -0
- package/dist/commands/recode.d.ts.map +1 -0
- package/dist/commands/recode.js +60 -0
- package/dist/commands/recode.js.map +1 -0
- package/dist/commands/reindex.d.ts.map +1 -1
- package/dist/commands/reindex.js +6 -4
- package/dist/commands/reindex.js.map +1 -1
- package/dist/commands/rerun.d.ts +3 -0
- package/dist/commands/rerun.d.ts.map +1 -0
- package/dist/commands/rerun.js +91 -0
- package/dist/commands/rerun.js.map +1 -0
- package/dist/commands/search.d.ts.map +1 -1
- package/dist/commands/search.js +2 -1
- package/dist/commands/search.js.map +1 -1
- package/dist/commands/secrets.d.ts +3 -0
- package/dist/commands/secrets.d.ts.map +1 -0
- package/dist/commands/secrets.js +143 -0
- package/dist/commands/secrets.js.map +1 -0
- package/dist/commands/setup.d.ts.map +1 -1
- package/dist/commands/setup.js +90 -1
- package/dist/commands/setup.js.map +1 -1
- package/dist/commands/status.d.ts.map +1 -1
- package/dist/commands/status.js +2 -1
- package/dist/commands/status.js.map +1 -1
- package/dist/commands/transcribe.d.ts.map +1 -1
- package/dist/commands/transcribe.js +13 -2
- package/dist/commands/transcribe.js.map +1 -1
- package/dist/commands/validate.d.ts.map +1 -1
- package/dist/commands/validate.js +29 -1
- package/dist/commands/validate.js.map +1 -1
- package/dist/engine.d.ts +23 -0
- package/dist/engine.d.ts.map +1 -0
- package/dist/engine.js +32 -0
- package/dist/engine.js.map +1 -0
- package/dist/exporters/prov.d.ts +11 -0
- package/dist/exporters/prov.d.ts.map +1 -0
- package/dist/exporters/prov.js +151 -0
- package/dist/exporters/prov.js.map +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/dist/lib/agreement.d.ts +77 -0
- package/dist/lib/agreement.d.ts.map +1 -0
- package/dist/lib/agreement.js +261 -0
- package/dist/lib/agreement.js.map +1 -0
- package/dist/lib/artifacts.d.ts +32 -1
- package/dist/lib/artifacts.d.ts.map +1 -1
- package/dist/lib/artifacts.js +156 -22
- package/dist/lib/artifacts.js.map +1 -1
- package/dist/lib/config.d.ts +3 -0
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/doctor.d.ts +3 -0
- package/dist/lib/doctor.d.ts.map +1 -1
- package/dist/lib/doctor.js +24 -1
- package/dist/lib/doctor.js.map +1 -1
- package/dist/lib/events.d.ts +34 -1
- package/dist/lib/events.d.ts.map +1 -1
- package/dist/lib/events.js +35 -1
- package/dist/lib/events.js.map +1 -1
- package/dist/lib/importTranscript.d.ts +16 -0
- package/dist/lib/importTranscript.d.ts.map +1 -0
- package/dist/lib/importTranscript.js +94 -0
- package/dist/lib/importTranscript.js.map +1 -0
- package/dist/lib/ingest.d.ts.map +1 -1
- package/dist/lib/ingest.js +12 -6
- package/dist/lib/ingest.js.map +1 -1
- package/dist/lib/journal.d.ts +13 -0
- package/dist/lib/journal.d.ts.map +1 -1
- package/dist/lib/journal.js +49 -2
- package/dist/lib/journal.js.map +1 -1
- package/dist/lib/legacyNative.d.ts +24 -0
- package/dist/lib/legacyNative.d.ts.map +1 -0
- package/dist/lib/legacyNative.js +51 -0
- package/dist/lib/legacyNative.js.map +1 -0
- package/dist/lib/provisionNative.js +1 -1
- package/dist/lib/provisionNative.js.map +1 -1
- package/dist/lib/queue.d.ts +25 -0
- package/dist/lib/queue.d.ts.map +1 -1
- package/dist/lib/queue.js +70 -3
- package/dist/lib/queue.js.map +1 -1
- package/dist/lib/reads.d.ts +24 -0
- package/dist/lib/reads.d.ts.map +1 -0
- package/dist/lib/reads.js +115 -0
- package/dist/lib/reads.js.map +1 -0
- package/dist/lib/recode.d.ts +19 -0
- package/dist/lib/recode.d.ts.map +1 -0
- package/dist/lib/recode.js +43 -0
- package/dist/lib/recode.js.map +1 -0
- package/dist/lib/rerun.d.ts +51 -0
- package/dist/lib/rerun.d.ts.map +1 -0
- package/dist/lib/rerun.js +166 -0
- package/dist/lib/rerun.js.map +1 -0
- package/dist/lib/retrieve.d.ts +8 -4
- package/dist/lib/retrieve.d.ts.map +1 -1
- package/dist/lib/retrieve.js +12 -2
- package/dist/lib/retrieve.js.map +1 -1
- package/dist/lib/schemas.generated.d.ts.map +1 -1
- package/dist/lib/schemas.generated.js +28 -0
- package/dist/lib/schemas.generated.js.map +1 -1
- package/dist/lib/secrets.d.ts +158 -0
- package/dist/lib/secrets.d.ts.map +1 -0
- package/dist/lib/secrets.js +507 -0
- package/dist/lib/secrets.js.map +1 -0
- package/dist/lib/seed.d.ts +5 -0
- package/dist/lib/seed.d.ts.map +1 -1
- package/dist/lib/seed.js +15 -2
- package/dist/lib/seed.js.map +1 -1
- package/dist/lib/session.d.ts +14 -0
- package/dist/lib/session.d.ts.map +1 -1
- package/dist/lib/session.js +47 -0
- package/dist/lib/session.js.map +1 -1
- package/dist/lib/setup.d.ts +5 -0
- package/dist/lib/setup.d.ts.map +1 -1
- package/dist/lib/setup.js +69 -6
- package/dist/lib/setup.js.map +1 -1
- package/dist/lib/setupWizard.d.ts +51 -0
- package/dist/lib/setupWizard.d.ts.map +1 -0
- package/dist/lib/setupWizard.js +223 -0
- package/dist/lib/setupWizard.js.map +1 -0
- package/dist/lib/speakers.d.ts +41 -0
- package/dist/lib/speakers.d.ts.map +1 -0
- package/dist/lib/speakers.js +78 -0
- package/dist/lib/speakers.js.map +1 -0
- package/dist/lib/status.d.ts.map +1 -1
- package/dist/lib/status.js +21 -0
- package/dist/lib/status.js.map +1 -1
- package/dist/lib/userConfig.d.ts +22 -0
- package/dist/lib/userConfig.d.ts.map +1 -0
- package/dist/lib/userConfig.js +67 -0
- package/dist/lib/userConfig.js.map +1 -0
- package/dist/lib/validate.d.ts +18 -0
- package/dist/lib/validate.d.ts.map +1 -1
- package/dist/lib/validate.js +70 -1
- package/dist/lib/validate.js.map +1 -1
- package/dist/lib/version.d.ts +30 -0
- package/dist/lib/version.d.ts.map +1 -0
- package/dist/lib/version.js +73 -0
- package/dist/lib/version.js.map +1 -0
- package/dist/llm/adapter.d.ts.map +1 -1
- package/dist/llm/adapter.js +2 -0
- package/dist/llm/adapter.js.map +1 -1
- package/dist/llm/providers/ollama.d.ts.map +1 -1
- package/dist/llm/providers/ollama.js +6 -0
- package/dist/llm/providers/ollama.js.map +1 -1
- package/dist/loops/ingest_watcher.d.ts.map +1 -1
- package/dist/loops/ingest_watcher.js +6 -3
- package/dist/loops/ingest_watcher.js.map +1 -1
- package/dist/loops/legacy_worker.d.ts +28 -1
- package/dist/loops/legacy_worker.d.ts.map +1 -1
- package/dist/loops/legacy_worker.js +81 -9
- package/dist/loops/legacy_worker.js.map +1 -1
- package/dist/loops/supervisor.d.ts +3 -0
- package/dist/loops/supervisor.d.ts.map +1 -1
- package/dist/loops/supervisor.js +11 -0
- package/dist/loops/supervisor.js.map +1 -1
- package/dist/loops/synthesis.d.ts.map +1 -1
- package/dist/loops/synthesis.js +15 -0
- package/dist/loops/synthesis.js.map +1 -1
- package/dist/loops/transcribe_worker.d.ts.map +1 -1
- package/dist/loops/transcribe_worker.js +2 -3
- package/dist/loops/transcribe_worker.js.map +1 -1
- package/dist/output.d.ts +13 -1
- package/dist/output.d.ts.map +1 -1
- package/dist/output.js +22 -2
- package/dist/output.js.map +1 -1
- package/dist/render/human.d.ts +20 -0
- package/dist/render/human.d.ts.map +1 -0
- package/dist/render/human.js +54 -0
- package/dist/render/human.js.map +1 -0
- package/dist/router.d.ts.map +1 -1
- package/dist/router.js +17 -2
- package/dist/router.js.map +1 -1
- package/package.json +12 -5
- package/templates/config.toml +6 -1
- package/transcriber/app/diarization.py +36 -6
- package/transcriber/app/legacy_cli.py +90 -0
- package/transcriber/app/pipeline.py +13 -7
- package/transcriber/app/prosody.py +5 -0
- package/transcriber/app/vad.py +82 -11
package/dist/router.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { Command } from 'commander';
|
|
2
|
+
import { registerAgreement } from './commands/agreement.js';
|
|
2
3
|
import { registerBlame } from './commands/blame.js';
|
|
3
4
|
import { registerChat } from './commands/chat.js';
|
|
4
5
|
import { registerCode } from './commands/code.js';
|
|
@@ -7,15 +8,21 @@ import { registerCreate } from './commands/create.js';
|
|
|
7
8
|
import { registerEndorse } from './commands/endorse.js';
|
|
8
9
|
import { registerEvals } from './commands/evals.js';
|
|
9
10
|
import { registerExport } from './commands/export.js';
|
|
11
|
+
import { registerImport } from './commands/import.js';
|
|
10
12
|
import { registerIngest } from './commands/ingest.js';
|
|
11
13
|
import { registerInit } from './commands/init.js';
|
|
14
|
+
import { registerJobs } from './commands/jobs.js';
|
|
15
|
+
import { registerLabel } from './commands/label.js';
|
|
12
16
|
import { registerMigrate } from './commands/migrate.js';
|
|
13
17
|
import { registerModels } from './commands/models.js';
|
|
14
18
|
import { registerQuery } from './commands/query.js';
|
|
19
|
+
import { registerRecode } from './commands/recode.js';
|
|
15
20
|
import { registerReindex } from './commands/reindex.js';
|
|
21
|
+
import { registerRerun } from './commands/rerun.js';
|
|
16
22
|
import { registerRescan } from './commands/rescan.js';
|
|
17
23
|
import { registerSaturate } from './commands/saturate.js';
|
|
18
24
|
import { registerSearch } from './commands/search.js';
|
|
25
|
+
import { registerSecrets } from './commands/secrets.js';
|
|
19
26
|
import { registerServe } from './commands/serve.js';
|
|
20
27
|
import { registerSession } from './commands/session.js';
|
|
21
28
|
import { registerSetup } from './commands/setup.js';
|
|
@@ -26,20 +33,24 @@ import { registerTag } from './commands/tag.js';
|
|
|
26
33
|
import { registerTranscribe } from './commands/transcribe.js';
|
|
27
34
|
import { registerValidate } from './commands/validate.js';
|
|
28
35
|
import { registerWatch } from './commands/watch.js';
|
|
29
|
-
const VERSION = '0.1.
|
|
36
|
+
const VERSION = '0.1.3';
|
|
30
37
|
export function buildProgram() {
|
|
31
38
|
const program = new Command();
|
|
32
39
|
program
|
|
33
40
|
.name('compost')
|
|
34
41
|
.description('Local-first, AI-first research analysis harness for coding agents and humans.')
|
|
35
42
|
.version(VERSION, '-V, --version')
|
|
36
|
-
.option('--human', '
|
|
43
|
+
.option('--human', 'Force human-readable output (auto-on at a TTY; JSON when piped or called by an agent).')
|
|
44
|
+
.option('--json', 'Force machine-readable JSON output (overrides TTY auto-detection).')
|
|
37
45
|
.showHelpAfterError()
|
|
38
46
|
.configureHelp({ sortSubcommands: true });
|
|
39
47
|
registerInit(program);
|
|
40
48
|
registerIngest(program);
|
|
49
|
+
registerImport(program);
|
|
41
50
|
registerTranscribe(program);
|
|
51
|
+
registerLabel(program);
|
|
42
52
|
registerWatch(program);
|
|
53
|
+
registerJobs(program);
|
|
43
54
|
registerSnap(program);
|
|
44
55
|
registerStatus(program);
|
|
45
56
|
registerBlame(program);
|
|
@@ -51,8 +62,12 @@ export function buildProgram() {
|
|
|
51
62
|
registerValidate(program);
|
|
52
63
|
registerTag(program);
|
|
53
64
|
registerCode(program);
|
|
65
|
+
registerRecode(program);
|
|
66
|
+
registerAgreement(program);
|
|
67
|
+
registerRerun(program);
|
|
54
68
|
registerSynthesize(program);
|
|
55
69
|
registerSearch(program);
|
|
70
|
+
registerSecrets(program);
|
|
56
71
|
registerSession(program);
|
|
57
72
|
registerCreate(program);
|
|
58
73
|
registerEndorse(program);
|
package/dist/router.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"router.js","sourceRoot":"","sources":["../src/router.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAEnC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACvD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACvD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAA;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAA;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAA;AAC/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAA;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAA;AACzD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AAEnD,MAAM,OAAO,GAAG,OAAO,CAAA;AAEvB,MAAM,UAAU,YAAY;IAC1B,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAA;IAE7B,OAAO;SACJ,IAAI,CAAC,SAAS,CAAC;SACf,WAAW,CAAC,+EAA+E,CAAC;SAC5F,OAAO,CAAC,OAAO,EAAE,eAAe,CAAC;SACjC,MAAM,CACL,SAAS,EACT,
|
|
1
|
+
{"version":3,"file":"router.js","sourceRoot":"","sources":["../src/router.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAEnC,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAA;AAC3D,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACvD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAA;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AACnD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAA;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAA;AAC/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAA;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAA;AACzD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAA;AAEnD,MAAM,OAAO,GAAG,OAAO,CAAA;AAEvB,MAAM,UAAU,YAAY;IAC1B,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAA;IAE7B,OAAO;SACJ,IAAI,CAAC,SAAS,CAAC;SACf,WAAW,CAAC,+EAA+E,CAAC;SAC5F,OAAO,CAAC,OAAO,EAAE,eAAe,CAAC;SACjC,MAAM,CACL,SAAS,EACT,wFAAwF,CACzF;SACA,MAAM,CAAC,QAAQ,EAAE,oEAAoE,CAAC;SACtF,kBAAkB,EAAE;SACpB,aAAa,CAAC,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC,CAAA;IAE3C,YAAY,CAAC,OAAO,CAAC,CAAA;IACrB,cAAc,CAAC,OAAO,CAAC,CAAA;IACvB,cAAc,CAAC,OAAO,CAAC,CAAA;IACvB,kBAAkB,CAAC,OAAO,CAAC,CAAA;IAC3B,aAAa,CAAC,OAAO,CAAC,CAAA;IACtB,aAAa,CAAC,OAAO,CAAC,CAAA;IACtB,YAAY,CAAC,OAAO,CAAC,CAAA;IACrB,YAAY,CAAC,OAAO,CAAC,CAAA;IACrB,cAAc,CAAC,OAAO,CAAC,CAAA;IACvB,aAAa,CAAC,OAAO,CAAC,CAAA;IACtB,eAAe,CAAC,OAAO,CAAC,CAAA;IACxB,cAAc,CAAC,OAAO,CAAC,CAAA;IACvB,eAAe,CAAC,OAAO,CAAC,CAAA;IACxB,cAAc,CAAC,OAAO,CAAC,CAAA;IACvB,gBAAgB,CAAC,OAAO,CAAC,CAAA;IACzB,gBAAgB,CAAC,OAAO,CAAC,CAAA;IACzB,WAAW,CAAC,OAAO,CAAC,CAAA;IACpB,YAAY,CAAC,OAAO,CAAC,CAAA;IACrB,cAAc,CAAC,OAAO,CAAC,CAAA;IACvB,iBAAiB,CAAC,OAAO,CAAC,CAAA;IAC1B,aAAa,CAAC,OAAO,CAAC,CAAA;IACtB,kBAAkB,CAAC,OAAO,CAAC,CAAA;IAC3B,cAAc,CAAC,OAAO,CAAC,CAAA;IACvB,eAAe,CAAC,OAAO,CAAC,CAAA;IACxB,eAAe,CAAC,OAAO,CAAC,CAAA;IACxB,cAAc,CAAC,OAAO,CAAC,CAAA;IACvB,eAAe,CAAC,OAAO,CAAC,CAAA;IACxB,aAAa,CAAC,OAAO,CAAC,CAAA;IACtB,aAAa,CAAC,OAAO,CAAC,CAAA;IACtB,YAAY,CAAC,OAAO,CAAC,CAAA;IACrB,aAAa,CAAC,OAAO,CAAC,CAAA;IACtB,cAAc,CAAC,OAAO,CAAC,CAAA;IACvB,aAAa,CAAC,OAAO,CAAC,CAAA;IACtB,cAAc,CAAC,OAAO,CAAC,CAAA;IAEvB,OAAO,OAAO,CAAA;AAChB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@they-juanreina/compost-cli",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Compost CLI engine. Subcommand router, JSON-out, harness loop runner.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"repository": {
|
|
@@ -16,7 +16,14 @@
|
|
|
16
16
|
"compost": "./bin/compost.js"
|
|
17
17
|
},
|
|
18
18
|
"exports": {
|
|
19
|
-
".":
|
|
19
|
+
".": {
|
|
20
|
+
"types": "./dist/index.d.ts",
|
|
21
|
+
"default": "./dist/index.js"
|
|
22
|
+
},
|
|
23
|
+
"./engine": {
|
|
24
|
+
"types": "./dist/engine.d.ts",
|
|
25
|
+
"default": "./dist/engine.js"
|
|
26
|
+
}
|
|
20
27
|
},
|
|
21
28
|
"files": [
|
|
22
29
|
"bin",
|
|
@@ -32,9 +39,9 @@
|
|
|
32
39
|
"chokidar": "^4.0.0",
|
|
33
40
|
"commander": "^12.1.0",
|
|
34
41
|
"smol-toml": "^1.3.0",
|
|
35
|
-
"@they-juanreina/compost-evals": "0.1.
|
|
36
|
-
"@they-juanreina/compost-provenance": "0.1.
|
|
37
|
-
"@they-juanreina/compost-retrieval": "0.1.
|
|
42
|
+
"@they-juanreina/compost-evals": "0.1.3",
|
|
43
|
+
"@they-juanreina/compost-provenance": "0.1.3",
|
|
44
|
+
"@they-juanreina/compost-retrieval": "0.1.3"
|
|
38
45
|
},
|
|
39
46
|
"devDependencies": {
|
|
40
47
|
"@types/better-sqlite3": "^7.6.11",
|
package/templates/config.toml
CHANGED
|
@@ -38,9 +38,14 @@ include_drafts = "marked"
|
|
|
38
38
|
[evals]
|
|
39
39
|
storage = "sqlite"
|
|
40
40
|
|
|
41
|
-
# LLM providers.
|
|
41
|
+
# LLM providers. `api_key_env` is the NAME of an environment variable, never the
|
|
42
|
+
# key itself — compost reads the value from the environment (or, transparently,
|
|
43
|
+
# from the OS keychain / ~/.compost/secrets.env via `compost secrets set`).
|
|
44
|
+
# NEVER paste a raw key here: this file lives in the (shared) seed. See
|
|
45
|
+
# SECURITY.md → "Storing your tokens".
|
|
42
46
|
[providers.ollama]
|
|
43
47
|
base_url = "http://localhost:11434"
|
|
48
|
+
# timeout_ms = 300000 # uncomment for very large local models (slow first load)
|
|
44
49
|
|
|
45
50
|
[providers.lmstudio]
|
|
46
51
|
base_url = "http://localhost:1234/v1"
|
|
@@ -8,6 +8,7 @@ and gating low-confidence sessions — is pure and fully unit-tested.
|
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
|
+
import re
|
|
11
12
|
from dataclasses import dataclass
|
|
12
13
|
from functools import lru_cache
|
|
13
14
|
from typing import Any, Protocol
|
|
@@ -129,6 +130,23 @@ def _load_pyannote(token_present: bool) -> DiarizationBackend: # pragma: no cov
|
|
|
129
130
|
return PyannoteBackend()
|
|
130
131
|
|
|
131
132
|
|
|
133
|
+
_PYANNOTE_LABEL_RE = re.compile(r"^SPEAKER_(\d+)$")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def normalize_speaker_label(label: str) -> str:
|
|
137
|
+
"""Canonicalize a diarization speaker label to the schema's ``^S[0-9]+$`` form.
|
|
138
|
+
|
|
139
|
+
pyannote emits cluster labels like ``SPEAKER_00`` / ``SPEAKER_01``; the
|
|
140
|
+
transcript schema (schema/transcript.schema.json $defs.speaker.id and
|
|
141
|
+
$defs.utterance.speaker_id) requires ``S{n}`` — e.g. ``S0``, ``S1``. Leading
|
|
142
|
+
zeros are dropped (``SPEAKER_00`` → ``S0``). Already-canonical labels
|
|
143
|
+
(``S1``) and the ``S?`` orphan sentinel pass through unchanged, so this is
|
|
144
|
+
idempotent and safe to apply at the single write point in ``align()``.
|
|
145
|
+
"""
|
|
146
|
+
m = _PYANNOTE_LABEL_RE.match(label)
|
|
147
|
+
return f"S{int(m.group(1))}" if m else label
|
|
148
|
+
|
|
149
|
+
|
|
132
150
|
def _overlap_ms(a_start: int, a_end: int, b_start: int, b_end: int) -> int:
|
|
133
151
|
return max(0, min(a_end, b_end) - max(a_start, b_start))
|
|
134
152
|
|
|
@@ -220,11 +238,20 @@ def assign_speaker(utterance: dict[str, Any], turns: list[Turn]) -> tuple[str, f
|
|
|
220
238
|
return winner[0], min(winner[1] / u_dur, 1.0)
|
|
221
239
|
|
|
222
240
|
|
|
223
|
-
def detect_overlaps(
|
|
224
|
-
|
|
241
|
+
def detect_overlaps(
|
|
242
|
+
turns: list[Turn], min_overlap_ms: int = 200, start_index: int = 1
|
|
243
|
+
) -> list[dict[str, Any]]:
|
|
244
|
+
"""Find regions where two turns overlap; emit `overlap` cues.
|
|
245
|
+
|
|
246
|
+
Cue ids use the schema's uniform ``CUE-[0-9]{3,}`` space (the cue ``kind``
|
|
247
|
+
already distinguishes overlap cues from ASR-tag cues, so a typed ``CUE-OV-``
|
|
248
|
+
prefix would both duplicate that and violate the id pattern). ``start_index``
|
|
249
|
+
lets the caller continue numbering past any cues already in cues[] so the
|
|
250
|
+
overlap and tag-derived cues share one collision-free id sequence.
|
|
251
|
+
"""
|
|
225
252
|
cues: list[dict[str, Any]] = []
|
|
226
253
|
ordered = sorted(turns, key=lambda t: t.start_ms)
|
|
227
|
-
idx =
|
|
254
|
+
idx = start_index
|
|
228
255
|
for i in range(len(ordered)):
|
|
229
256
|
for j in range(i + 1, len(ordered)):
|
|
230
257
|
a, b = ordered[i], ordered[j]
|
|
@@ -237,7 +264,7 @@ def detect_overlaps(turns: list[Turn], min_overlap_ms: int = 200) -> list[dict[s
|
|
|
237
264
|
if ov_end - ov_start >= min_overlap_ms:
|
|
238
265
|
cues.append(
|
|
239
266
|
{
|
|
240
|
-
"id": f"CUE-
|
|
267
|
+
"id": f"CUE-{idx:03d}",
|
|
241
268
|
"kind": "overlap",
|
|
242
269
|
"start_ms": ov_start,
|
|
243
270
|
"end_ms": ov_end,
|
|
@@ -267,12 +294,15 @@ def align(transcript: dict[str, Any], turns: list[Turn]) -> dict[str, Any]:
|
|
|
267
294
|
rescued = _nearest_turn_speaker(utt["start_ms"], utt["end_ms"], turns)
|
|
268
295
|
if rescued is not None:
|
|
269
296
|
speaker = rescued # confidence stays 0.0 (fallback marker)
|
|
270
|
-
|
|
297
|
+
# Canonicalize pyannote's SPEAKER_NN labels to the schema's S{n} form at
|
|
298
|
+
# the single write point so speakers[].id (derived from these) and every
|
|
299
|
+
# utterances[].speaker_id agree with ^S[0-9]+$.
|
|
300
|
+
utt["speaker_id"] = normalize_speaker_label(speaker)
|
|
271
301
|
utt.setdefault("diarization", {})["confidence"] = round(conf, 3)
|
|
272
302
|
confidences.append(conf)
|
|
273
303
|
|
|
274
304
|
cues = transcript.setdefault("cues", [])
|
|
275
|
-
cues.extend(detect_overlaps(turns))
|
|
305
|
+
cues.extend(detect_overlaps(turns, start_index=len(cues) + 1))
|
|
276
306
|
|
|
277
307
|
mean_conf = sum(confidences) / len(confidences) if confidences else 0.0
|
|
278
308
|
if mean_conf < DIARIZATION_CONFIDENCE_FLOOR:
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Native (host) legacy-ingest entrypoint (#184).
|
|
2
|
+
|
|
3
|
+
Mirrors `app.transcribe_cli` for documents: runs the pure ingestors in
|
|
4
|
+
`app.legacy.ingest` in a host Python venv so PDF/DOCX/PPTX/CSV/XLSX/TXT ingest
|
|
5
|
+
works WITHOUT the Docker transcriber (demoted to a fallback). Shares the exact
|
|
6
|
+
write + response shape as the `/legacy-ingest` route so the Node legacy-worker
|
|
7
|
+
treats native and Docker results identically.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python -m app.legacy_cli --seed-path <seed> --source-path <file> \
|
|
11
|
+
[--text-col COL] [--speaker-col COL] [--sheet NAME]
|
|
12
|
+
Prints exactly one JSON line; exit 0 on ok/empty, 1 on failure.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import json
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from .legacy import ingest as ingest_legacy
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def main(argv: list[str] | None = None) -> int:
|
|
25
|
+
p = argparse.ArgumentParser(prog="compost-legacy-native")
|
|
26
|
+
p.add_argument("--seed-path", required=True)
|
|
27
|
+
p.add_argument("--source-path", required=True)
|
|
28
|
+
p.add_argument("--text-col", default=None)
|
|
29
|
+
p.add_argument("--speaker-col", default=None)
|
|
30
|
+
p.add_argument("--sheet", default=None)
|
|
31
|
+
args = p.parse_args(argv)
|
|
32
|
+
|
|
33
|
+
src = Path(args.source_path)
|
|
34
|
+
seed = Path(args.seed_path)
|
|
35
|
+
if not src.exists():
|
|
36
|
+
print(json.dumps({"status": "failed", "kind": "invalid_input", "error": f"source not found: {src}"}))
|
|
37
|
+
return 1
|
|
38
|
+
if not seed.exists():
|
|
39
|
+
print(json.dumps({"status": "failed", "kind": "invalid_input", "error": f"seed not found: {seed}"}))
|
|
40
|
+
return 1
|
|
41
|
+
|
|
42
|
+
kwargs: dict[str, str] = {}
|
|
43
|
+
if args.text_col is not None:
|
|
44
|
+
kwargs["text_col"] = args.text_col
|
|
45
|
+
if args.speaker_col is not None:
|
|
46
|
+
kwargs["speaker_col"] = args.speaker_col
|
|
47
|
+
if args.sheet is not None:
|
|
48
|
+
kwargs["sheet"] = args.sheet
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
doc = ingest_legacy(src, **kwargs)
|
|
52
|
+
except ValueError as e: # unsupported ext / missing column
|
|
53
|
+
print(json.dumps({"status": "failed", "kind": "invalid_input", "error": str(e)}))
|
|
54
|
+
return 1
|
|
55
|
+
except RuntimeError as e: # missing optional dep (python-docx, openpyxl, …)
|
|
56
|
+
print(json.dumps({"status": "failed", "kind": "dep_missing", "error": str(e)}))
|
|
57
|
+
return 1
|
|
58
|
+
|
|
59
|
+
legacy_dir = seed / "legacy"
|
|
60
|
+
legacy_dir.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
out_path = legacy_dir / f"{src.stem}.json"
|
|
62
|
+
out_path.write_text(json.dumps(doc, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
|
63
|
+
|
|
64
|
+
utt_count = len(doc.get("utterances", []))
|
|
65
|
+
prov = doc.get("provenance", {})
|
|
66
|
+
warnings: list[str] = []
|
|
67
|
+
skipped = prov.get("xlsx_rows_skipped_empty_text", 0)
|
|
68
|
+
if skipped > 0:
|
|
69
|
+
warnings.append(
|
|
70
|
+
f"{skipped} XLSX row(s) had data in other columns but an empty text cell — "
|
|
71
|
+
"likely an un-evaluated formula. Open the file in Excel once, or export to CSV."
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
print(
|
|
75
|
+
json.dumps(
|
|
76
|
+
{
|
|
77
|
+
"status": "ok" if utt_count > 0 else "empty",
|
|
78
|
+
"source_path": str(src),
|
|
79
|
+
"normalized_path": str(out_path),
|
|
80
|
+
"utterance_count": utt_count,
|
|
81
|
+
"text_col_resolved": prov.get("text_col_resolved"),
|
|
82
|
+
"warnings": warnings,
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
return 0
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
if __name__ == "__main__":
|
|
90
|
+
raise SystemExit(main())
|
|
@@ -23,7 +23,7 @@ from .cue_parser import parse_transcript_cues
|
|
|
23
23
|
from .diarization import DiarizationBackend, Diarizer, align
|
|
24
24
|
from .prosody import annotate_prosody
|
|
25
25
|
from .silence_typer import type_all_silences
|
|
26
|
-
from .vad import VAD, VADBackend, silences_to_schema
|
|
26
|
+
from .vad import VAD, VADBackend, silences_to_schema, utterance_energies
|
|
27
27
|
|
|
28
28
|
SCHEMA_VERSION = "1.0"
|
|
29
29
|
DEFAULT_TRANSCRIBER_VERSION = "compost-transcriber@0.1.0"
|
|
@@ -122,9 +122,9 @@ def run_pipeline(
|
|
|
122
122
|
|
|
123
123
|
duration_ms = probe_duration_ms(source_path)
|
|
124
124
|
|
|
125
|
-
# 1. VAD — speech segments + first-class silences
|
|
125
|
+
# 1. VAD — speech segments (carry per-segment RMS energy) + first-class silences
|
|
126
126
|
vad = VAD(backend=backends.vad)
|
|
127
|
-
|
|
127
|
+
speech, silences = vad.segment(source_path, duration_ms)
|
|
128
128
|
|
|
129
129
|
# 2. ASR — utterances with word timings, may contain event tags inline
|
|
130
130
|
asr = Transcriber(config=config.asr, backend=backends.asr)
|
|
@@ -145,13 +145,16 @@ def run_pipeline(
|
|
|
145
145
|
"cues": [],
|
|
146
146
|
"frames": [],
|
|
147
147
|
"glossary_refs": [],
|
|
148
|
+
# frame_capture / frame_annotation are omitted (not null): the schema
|
|
149
|
+
# types provenance fields as strings and the convention is "absent when
|
|
150
|
+
# not applicable". run_pipeline does no frame capture/annotation; those
|
|
151
|
+
# stages (frames.py / frame_annotation.py) add their own provenance when
|
|
152
|
+
# they run.
|
|
148
153
|
"provenance": {
|
|
149
154
|
"transcriber": config.transcriber_version,
|
|
150
155
|
"asr_model": config.asr_model_tag,
|
|
151
156
|
"diarizer": config.diarizer_tag,
|
|
152
157
|
"audio_cues": f"{config.vad_tag} + whisper-events",
|
|
153
|
-
"frame_capture": None,
|
|
154
|
-
"frame_annotation": None,
|
|
155
158
|
},
|
|
156
159
|
}
|
|
157
160
|
|
|
@@ -169,8 +172,11 @@ def run_pipeline(
|
|
|
169
172
|
# 7. Silence semantic typing (after_question / thinking / interruption / …)
|
|
170
173
|
type_all_silences(transcript)
|
|
171
174
|
|
|
172
|
-
# 8. Prosody hints per utterance (deterministic, cheap)
|
|
173
|
-
|
|
175
|
+
# 8. Prosody hints per utterance (deterministic, cheap). Volume bucketing
|
|
176
|
+
# needs the per-utterance VAD RMS energy signal mapped from the speech
|
|
177
|
+
# segments; without it volume would default to "normal" for every utterance.
|
|
178
|
+
energies = utterance_energies(speech, transcript["utterances"])
|
|
179
|
+
annotate_prosody(transcript, energies)
|
|
174
180
|
|
|
175
181
|
return transcript
|
|
176
182
|
|
|
@@ -31,6 +31,11 @@ from typing import Any
|
|
|
31
31
|
|
|
32
32
|
PACE_SLOW_WPS = 2.0
|
|
33
33
|
PACE_FAST_WPS = 3.3
|
|
34
|
+
# Volume thresholds applied to the normalized 0..1 energy. These are GLOBAL
|
|
35
|
+
# constants, not per-speaker. The energy signal is session-normalized in
|
|
36
|
+
# vad.utterance_energies(); a soft speaker's loudest moment can still read below
|
|
37
|
+
# a loud speaker's baseline. TODO(#13): per-speaker normalization would make
|
|
38
|
+
# these buckets speaker-relative — see vad.utterance_energies for the plan.
|
|
34
39
|
VOLUME_LOW = 0.33
|
|
35
40
|
VOLUME_HIGH = 0.66
|
|
36
41
|
HESITATION_GAP_MS = 400
|
package/transcriber/app/vad.py
CHANGED
|
@@ -23,6 +23,10 @@ MIN_FIRST_CLASS_SILENCE_MS = 1500
|
|
|
23
23
|
class Segment:
|
|
24
24
|
start_ms: int
|
|
25
25
|
end_ms: int
|
|
26
|
+
# Mean RMS energy over the segment's waveform (raw amplitude, ~0..1; speech
|
|
27
|
+
# peaks well below 1.0). `None` when the backend doesn't report it — the
|
|
28
|
+
# prosody stage then leaves volume at "normal" rather than guessing.
|
|
29
|
+
energy: float | None = None
|
|
26
30
|
|
|
27
31
|
@property
|
|
28
32
|
def duration_ms(self) -> int:
|
|
@@ -30,7 +34,10 @@ class Segment:
|
|
|
30
34
|
|
|
31
35
|
|
|
32
36
|
class VADBackend(Protocol):
|
|
33
|
-
def speech_timestamps(self, audio_path: str) -> list[dict[str,
|
|
37
|
+
def speech_timestamps(self, audio_path: str) -> list[dict[str, Any]]:
|
|
38
|
+
"""Return speech segments as ``{"start_ms", "end_ms"}`` dicts, optionally
|
|
39
|
+
with a float ``"energy"`` (mean RMS over the segment)."""
|
|
40
|
+
...
|
|
34
41
|
|
|
35
42
|
|
|
36
43
|
SILERO_SAMPLE_RATE = 16000
|
|
@@ -60,7 +67,7 @@ class SileroBackend: # pragma: no cover - needs torch + weights
|
|
|
60
67
|
self._read_audio = read_audio
|
|
61
68
|
self._get_speech_timestamps = get_speech_timestamps
|
|
62
69
|
|
|
63
|
-
def speech_timestamps(self, audio_path: str) -> list[dict[str,
|
|
70
|
+
def speech_timestamps(self, audio_path: str) -> list[dict[str, Any]]:
|
|
64
71
|
wav = self._read_audio(audio_path, sampling_rate=SILERO_SAMPLE_RATE)
|
|
65
72
|
raw = self._get_speech_timestamps(
|
|
66
73
|
wav,
|
|
@@ -68,15 +75,23 @@ class SileroBackend: # pragma: no cover - needs torch + weights
|
|
|
68
75
|
sampling_rate=SILERO_SAMPLE_RATE,
|
|
69
76
|
return_seconds=False,
|
|
70
77
|
)
|
|
71
|
-
# `raw` is a list of {start, end} in samples; convert to ms
|
|
78
|
+
# `raw` is a list of {start, end} in samples; convert to ms and compute
|
|
79
|
+
# the mean RMS energy over each segment's waveform window (#9 → #13).
|
|
80
|
+
# `wav` is a mono float tensor in [-1, 1]; RMS = sqrt(mean(x^2)).
|
|
72
81
|
ms_per_sample = 1000 / SILERO_SAMPLE_RATE
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
82
|
+
out: list[dict[str, Any]] = []
|
|
83
|
+
for seg in raw:
|
|
84
|
+
start_sample, end_sample = int(seg["start"]), int(seg["end"])
|
|
85
|
+
window = wav[start_sample:end_sample]
|
|
86
|
+
rms = float(window.pow(2).mean().sqrt()) if window.numel() else 0.0
|
|
87
|
+
out.append(
|
|
88
|
+
{
|
|
89
|
+
"start_ms": int(seg["start"] * ms_per_sample),
|
|
90
|
+
"end_ms": int(seg["end"] * ms_per_sample),
|
|
91
|
+
"energy": rms,
|
|
92
|
+
}
|
|
93
|
+
)
|
|
94
|
+
return out
|
|
80
95
|
|
|
81
96
|
|
|
82
97
|
@lru_cache(maxsize=1)
|
|
@@ -130,6 +145,55 @@ def silences_to_schema(silences: list[Segment]) -> list[dict[str, Any]]:
|
|
|
130
145
|
return out
|
|
131
146
|
|
|
132
147
|
|
|
148
|
+
def utterance_energies(
|
|
149
|
+
speech: list[Segment],
|
|
150
|
+
utterances: list[dict[str, Any]],
|
|
151
|
+
) -> dict[str, float]:
|
|
152
|
+
"""Map utterance id → mean VAD RMS energy, normalized 0..1 across the session.
|
|
153
|
+
|
|
154
|
+
Pure. Feeds ``prosody.annotate_prosody(transcript, energies=...)`` so the
|
|
155
|
+
low|normal|high volume bucketing actually runs (without this signal volume
|
|
156
|
+
defaults to "normal" for every utterance).
|
|
157
|
+
|
|
158
|
+
Each utterance's raw energy is the overlap-duration-weighted mean RMS of the
|
|
159
|
+
speech segments it spans; segments with no energy reading are ignored. Raw
|
|
160
|
+
speech RMS peaks far below 1.0, so applying prosody's fixed 0.33/0.66 split
|
|
161
|
+
to raw values would bucket everything as "low" — we normalize by the loudest
|
|
162
|
+
utterance in the session so the split is meaningful and reproducible.
|
|
163
|
+
Utterances with no overlapping energy-bearing segment are omitted, so the
|
|
164
|
+
caller reports "normal" rather than guessing.
|
|
165
|
+
|
|
166
|
+
TODO(#13): normalization is per-session (global max) and prosody's
|
|
167
|
+
VOLUME_LOW/HIGH are global constants, so a soft speaker's loudest moment
|
|
168
|
+
still reads quieter than a loud speaker's baseline. Per-speaker
|
|
169
|
+
normalization (group by ``utterance["speaker_id"]`` and normalize within
|
|
170
|
+
each speaker) would make the buckets speaker-relative. Out of scope here.
|
|
171
|
+
"""
|
|
172
|
+
raw: dict[str, float] = {}
|
|
173
|
+
for utt in utterances:
|
|
174
|
+
uid = utt.get("id")
|
|
175
|
+
if uid is None:
|
|
176
|
+
continue
|
|
177
|
+
u_start, u_end = utt.get("start_ms", 0), utt.get("end_ms", 0)
|
|
178
|
+
weighted_sum = 0.0
|
|
179
|
+
total_overlap = 0
|
|
180
|
+
for seg in speech:
|
|
181
|
+
if seg.energy is None:
|
|
182
|
+
continue
|
|
183
|
+
overlap = min(u_end, seg.end_ms) - max(u_start, seg.start_ms)
|
|
184
|
+
if overlap <= 0:
|
|
185
|
+
continue
|
|
186
|
+
weighted_sum += seg.energy * overlap
|
|
187
|
+
total_overlap += overlap
|
|
188
|
+
if total_overlap > 0:
|
|
189
|
+
raw[uid] = weighted_sum / total_overlap
|
|
190
|
+
|
|
191
|
+
peak = max(raw.values(), default=0.0)
|
|
192
|
+
if peak <= 0:
|
|
193
|
+
return {}
|
|
194
|
+
return {uid: value / peak for uid, value in raw.items()}
|
|
195
|
+
|
|
196
|
+
|
|
133
197
|
class VAD:
|
|
134
198
|
def __init__(self, backend: VADBackend | None = None):
|
|
135
199
|
self._backend = backend
|
|
@@ -140,6 +204,13 @@ class VAD:
|
|
|
140
204
|
def segment(self, audio_path: str, total_duration_ms: int) -> tuple[list[Segment], list[Segment]]:
|
|
141
205
|
"""Return (speech_segments, first_class_silences)."""
|
|
142
206
|
raw = self._get_backend().speech_timestamps(audio_path)
|
|
143
|
-
speech = [
|
|
207
|
+
speech = [
|
|
208
|
+
Segment(
|
|
209
|
+
int(t["start_ms"]),
|
|
210
|
+
int(t["end_ms"]),
|
|
211
|
+
float(t["energy"]) if t.get("energy") is not None else None,
|
|
212
|
+
)
|
|
213
|
+
for t in raw
|
|
214
|
+
]
|
|
144
215
|
silences = speech_to_silences(speech, total_duration_ms)
|
|
145
216
|
return speech, silences
|