@they-juanreina/compost-cli 0.1.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +5 -0
- package/bin/compost.js +7 -0
- package/dist/commands/_stub.d.ts +9 -0
- package/dist/commands/_stub.d.ts.map +1 -0
- package/dist/commands/_stub.js +27 -0
- package/dist/commands/_stub.js.map +1 -0
- package/dist/commands/blame.d.ts +3 -0
- package/dist/commands/blame.d.ts.map +1 -0
- package/dist/commands/blame.js +31 -0
- package/dist/commands/blame.js.map +1 -0
- package/dist/commands/chat.d.ts +8 -0
- package/dist/commands/chat.d.ts.map +1 -0
- package/dist/commands/chat.js +99 -0
- package/dist/commands/chat.js.map +1 -0
- package/dist/commands/code.d.ts +3 -0
- package/dist/commands/code.d.ts.map +1 -0
- package/dist/commands/code.js +70 -0
- package/dist/commands/code.js.map +1 -0
- package/dist/commands/config.d.ts +3 -0
- package/dist/commands/config.d.ts.map +1 -0
- package/dist/commands/config.js +76 -0
- package/dist/commands/config.js.map +1 -0
- package/dist/commands/create.d.ts +20 -0
- package/dist/commands/create.d.ts.map +1 -0
- package/dist/commands/create.js +148 -0
- package/dist/commands/create.js.map +1 -0
- package/dist/commands/endorse.d.ts +3 -0
- package/dist/commands/endorse.d.ts.map +1 -0
- package/dist/commands/endorse.js +31 -0
- package/dist/commands/endorse.js.map +1 -0
- package/dist/commands/evals.d.ts +3 -0
- package/dist/commands/evals.d.ts.map +1 -0
- package/dist/commands/evals.js +54 -0
- package/dist/commands/evals.js.map +1 -0
- package/dist/commands/export.d.ts +3 -0
- package/dist/commands/export.d.ts.map +1 -0
- package/dist/commands/export.js +54 -0
- package/dist/commands/export.js.map +1 -0
- package/dist/commands/ingest.d.ts +3 -0
- package/dist/commands/ingest.d.ts.map +1 -0
- package/dist/commands/ingest.js +26 -0
- package/dist/commands/ingest.js.map +1 -0
- package/dist/commands/init.d.ts +3 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +51 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/migrate.d.ts +3 -0
- package/dist/commands/migrate.d.ts.map +1 -0
- package/dist/commands/migrate.js +28 -0
- package/dist/commands/migrate.js.map +1 -0
- package/dist/commands/models.d.ts +3 -0
- package/dist/commands/models.d.ts.map +1 -0
- package/dist/commands/models.js +32 -0
- package/dist/commands/models.js.map +1 -0
- package/dist/commands/query.d.ts +3 -0
- package/dist/commands/query.d.ts.map +1 -0
- package/dist/commands/query.js +13 -0
- package/dist/commands/query.js.map +1 -0
- package/dist/commands/reindex.d.ts +3 -0
- package/dist/commands/reindex.d.ts.map +1 -0
- package/dist/commands/reindex.js +53 -0
- package/dist/commands/reindex.js.map +1 -0
- package/dist/commands/rescan.d.ts +3 -0
- package/dist/commands/rescan.d.ts.map +1 -0
- package/dist/commands/rescan.js +60 -0
- package/dist/commands/rescan.js.map +1 -0
- package/dist/commands/saturate.d.ts +3 -0
- package/dist/commands/saturate.d.ts.map +1 -0
- package/dist/commands/saturate.js +35 -0
- package/dist/commands/saturate.js.map +1 -0
- package/dist/commands/search.d.ts +3 -0
- package/dist/commands/search.d.ts.map +1 -0
- package/dist/commands/search.js +52 -0
- package/dist/commands/search.js.map +1 -0
- package/dist/commands/serve.d.ts +3 -0
- package/dist/commands/serve.d.ts.map +1 -0
- package/dist/commands/serve.js +9 -0
- package/dist/commands/serve.js.map +1 -0
- package/dist/commands/session.d.ts +3 -0
- package/dist/commands/session.d.ts.map +1 -0
- package/dist/commands/session.js +25 -0
- package/dist/commands/session.js.map +1 -0
- package/dist/commands/setup.d.ts +3 -0
- package/dist/commands/setup.d.ts.map +1 -0
- package/dist/commands/setup.js +38 -0
- package/dist/commands/setup.js.map +1 -0
- package/dist/commands/snap.d.ts +3 -0
- package/dist/commands/snap.d.ts.map +1 -0
- package/dist/commands/snap.js +27 -0
- package/dist/commands/snap.js.map +1 -0
- package/dist/commands/status.d.ts +3 -0
- package/dist/commands/status.d.ts.map +1 -0
- package/dist/commands/status.js +26 -0
- package/dist/commands/status.js.map +1 -0
- package/dist/commands/synthesize.d.ts +3 -0
- package/dist/commands/synthesize.d.ts.map +1 -0
- package/dist/commands/synthesize.js +13 -0
- package/dist/commands/synthesize.js.map +1 -0
- package/dist/commands/tag.d.ts +3 -0
- package/dist/commands/tag.d.ts.map +1 -0
- package/dist/commands/tag.js +46 -0
- package/dist/commands/tag.js.map +1 -0
- package/dist/commands/transcribe.d.ts +3 -0
- package/dist/commands/transcribe.d.ts.map +1 -0
- package/dist/commands/transcribe.js +110 -0
- package/dist/commands/transcribe.js.map +1 -0
- package/dist/commands/validate.d.ts +3 -0
- package/dist/commands/validate.d.ts.map +1 -0
- package/dist/commands/validate.js +81 -0
- package/dist/commands/validate.js.map +1 -0
- package/dist/commands/watch.d.ts +3 -0
- package/dist/commands/watch.d.ts.map +1 -0
- package/dist/commands/watch.js +50 -0
- package/dist/commands/watch.js.map +1 -0
- package/dist/errors.d.ts +9 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +12 -0
- package/dist/errors.js.map +1 -0
- package/dist/exporters/csv.d.ts +8 -0
- package/dist/exporters/csv.d.ts.map +1 -0
- package/dist/exporters/csv.js +44 -0
- package/dist/exporters/csv.js.map +1 -0
- package/dist/exporters/eaf.d.ts +3 -0
- package/dist/exporters/eaf.d.ts.map +1 -0
- package/dist/exporters/eaf.js +91 -0
- package/dist/exporters/eaf.js.map +1 -0
- package/dist/exporters/html.d.ts +8 -0
- package/dist/exporters/html.d.ts.map +1 -0
- package/dist/exporters/html.js +47 -0
- package/dist/exporters/html.js.map +1 -0
- package/dist/exporters/md.d.ts +3 -0
- package/dist/exporters/md.d.ts.map +1 -0
- package/dist/exporters/md.js +43 -0
- package/dist/exporters/md.js.map +1 -0
- package/dist/exporters/pdf.d.ts +9 -0
- package/dist/exporters/pdf.d.ts.map +1 -0
- package/dist/exporters/pdf.js +40 -0
- package/dist/exporters/pdf.js.map +1 -0
- package/dist/exporters/report.d.ts +43 -0
- package/dist/exporters/report.d.ts.map +1 -0
- package/dist/exporters/report.js +49 -0
- package/dist/exporters/report.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/dist/legacy_client.d.ts +38 -0
- package/dist/legacy_client.d.ts.map +1 -0
- package/dist/legacy_client.js +44 -0
- package/dist/legacy_client.js.map +1 -0
- package/dist/lib/answerSchema.d.ts +2 -0
- package/dist/lib/answerSchema.d.ts.map +1 -0
- package/dist/lib/answerSchema.js +27 -0
- package/dist/lib/answerSchema.js.map +1 -0
- package/dist/lib/artifacts.d.ts +64 -0
- package/dist/lib/artifacts.d.ts.map +1 -0
- package/dist/lib/artifacts.js +254 -0
- package/dist/lib/artifacts.js.map +1 -0
- package/dist/lib/blame.d.ts +30 -0
- package/dist/lib/blame.d.ts.map +1 -0
- package/dist/lib/blame.js +164 -0
- package/dist/lib/blame.js.map +1 -0
- package/dist/lib/canonicalSessions.d.ts +24 -0
- package/dist/lib/canonicalSessions.d.ts.map +1 -0
- package/dist/lib/canonicalSessions.js +48 -0
- package/dist/lib/canonicalSessions.js.map +1 -0
- package/dist/lib/chat.d.ts +24 -0
- package/dist/lib/chat.d.ts.map +1 -0
- package/dist/lib/chat.js +50 -0
- package/dist/lib/chat.js.map +1 -0
- package/dist/lib/config.d.ts +45 -0
- package/dist/lib/config.d.ts.map +1 -0
- package/dist/lib/config.js +143 -0
- package/dist/lib/config.js.map +1 -0
- package/dist/lib/dispatch.d.ts +10 -0
- package/dist/lib/dispatch.d.ts.map +1 -0
- package/dist/lib/dispatch.js +31 -0
- package/dist/lib/dispatch.js.map +1 -0
- package/dist/lib/doctor.d.ts +19 -0
- package/dist/lib/doctor.d.ts.map +1 -0
- package/dist/lib/doctor.js +43 -0
- package/dist/lib/doctor.js.map +1 -0
- package/dist/lib/events.d.ts +42 -0
- package/dist/lib/events.d.ts.map +1 -0
- package/dist/lib/events.js +57 -0
- package/dist/lib/events.js.map +1 -0
- package/dist/lib/export.d.ts +14 -0
- package/dist/lib/export.d.ts.map +1 -0
- package/dist/lib/export.js +36 -0
- package/dist/lib/export.js.map +1 -0
- package/dist/lib/ingest.d.ts +21 -0
- package/dist/lib/ingest.d.ts.map +1 -0
- package/dist/lib/ingest.js +68 -0
- package/dist/lib/ingest.js.map +1 -0
- package/dist/lib/journal.d.ts +23 -0
- package/dist/lib/journal.d.ts.map +1 -0
- package/dist/lib/journal.js +95 -0
- package/dist/lib/journal.js.map +1 -0
- package/dist/lib/migrate.d.ts +29 -0
- package/dist/lib/migrate.d.ts.map +1 -0
- package/dist/lib/migrate.js +117 -0
- package/dist/lib/migrate.js.map +1 -0
- package/dist/lib/nativeRuntime.d.ts +46 -0
- package/dist/lib/nativeRuntime.d.ts.map +1 -0
- package/dist/lib/nativeRuntime.js +61 -0
- package/dist/lib/nativeRuntime.js.map +1 -0
- package/dist/lib/provisionNative.d.ts +46 -0
- package/dist/lib/provisionNative.d.ts.map +1 -0
- package/dist/lib/provisionNative.js +105 -0
- package/dist/lib/provisionNative.js.map +1 -0
- package/dist/lib/queue.d.ts +36 -0
- package/dist/lib/queue.d.ts.map +1 -0
- package/dist/lib/queue.js +102 -0
- package/dist/lib/queue.js.map +1 -0
- package/dist/lib/retrieve.d.ts +51 -0
- package/dist/lib/retrieve.d.ts.map +1 -0
- package/dist/lib/retrieve.js +92 -0
- package/dist/lib/retrieve.js.map +1 -0
- package/dist/lib/saturate.d.ts +21 -0
- package/dist/lib/saturate.d.ts.map +1 -0
- package/dist/lib/saturate.js +132 -0
- package/dist/lib/saturate.js.map +1 -0
- package/dist/lib/schemas.generated.d.ts +15 -0
- package/dist/lib/schemas.generated.d.ts.map +1 -0
- package/dist/lib/schemas.generated.js +975 -0
- package/dist/lib/schemas.generated.js.map +1 -0
- package/dist/lib/seed.d.ts +16 -0
- package/dist/lib/seed.d.ts.map +1 -0
- package/dist/lib/seed.js +64 -0
- package/dist/lib/seed.js.map +1 -0
- package/dist/lib/seedResolve.d.ts +6 -0
- package/dist/lib/seedResolve.d.ts.map +1 -0
- package/dist/lib/seedResolve.js +27 -0
- package/dist/lib/seedResolve.js.map +1 -0
- package/dist/lib/session.d.ts +19 -0
- package/dist/lib/session.d.ts.map +1 -0
- package/dist/lib/session.js +61 -0
- package/dist/lib/session.js.map +1 -0
- package/dist/lib/setup.d.ts +44 -0
- package/dist/lib/setup.d.ts.map +1 -0
- package/dist/lib/setup.js +249 -0
- package/dist/lib/setup.js.map +1 -0
- package/dist/lib/snap.d.ts +15 -0
- package/dist/lib/snap.d.ts.map +1 -0
- package/dist/lib/snap.js +115 -0
- package/dist/lib/snap.js.map +1 -0
- package/dist/lib/status.d.ts +39 -0
- package/dist/lib/status.d.ts.map +1 -0
- package/dist/lib/status.js +174 -0
- package/dist/lib/status.js.map +1 -0
- package/dist/lib/tagcode.d.ts +26 -0
- package/dist/lib/tagcode.d.ts.map +1 -0
- package/dist/lib/tagcode.js +231 -0
- package/dist/lib/tagcode.js.map +1 -0
- package/dist/lib/templates.d.ts +3 -0
- package/dist/lib/templates.d.ts.map +1 -0
- package/dist/lib/templates.js +21 -0
- package/dist/lib/templates.js.map +1 -0
- package/dist/lib/transcribeNative.d.ts +44 -0
- package/dist/lib/transcribeNative.d.ts.map +1 -0
- package/dist/lib/transcribeNative.js +53 -0
- package/dist/lib/transcribeNative.js.map +1 -0
- package/dist/lib/transcript.d.ts +68 -0
- package/dist/lib/transcript.d.ts.map +1 -0
- package/dist/lib/transcript.js +10 -0
- package/dist/lib/transcript.js.map +1 -0
- package/dist/lib/validate.d.ts +16 -0
- package/dist/lib/validate.d.ts.map +1 -0
- package/dist/lib/validate.js +155 -0
- package/dist/lib/validate.js.map +1 -0
- package/dist/llm/adapter.d.ts +33 -0
- package/dist/llm/adapter.d.ts.map +1 -0
- package/dist/llm/adapter.js +106 -0
- package/dist/llm/adapter.js.map +1 -0
- package/dist/llm/http.d.ts +9 -0
- package/dist/llm/http.d.ts.map +1 -0
- package/dist/llm/http.js +56 -0
- package/dist/llm/http.js.map +1 -0
- package/dist/llm/providers/anthropic.d.ts +14 -0
- package/dist/llm/providers/anthropic.d.ts.map +1 -0
- package/dist/llm/providers/anthropic.js +66 -0
- package/dist/llm/providers/anthropic.js.map +1 -0
- package/dist/llm/providers/lmstudio.d.ts +7 -0
- package/dist/llm/providers/lmstudio.d.ts.map +1 -0
- package/dist/llm/providers/lmstudio.js +8 -0
- package/dist/llm/providers/lmstudio.js.map +1 -0
- package/dist/llm/providers/ollama.d.ts +12 -0
- package/dist/llm/providers/ollama.d.ts.map +1 -0
- package/dist/llm/providers/ollama.js +87 -0
- package/dist/llm/providers/ollama.js.map +1 -0
- package/dist/llm/providers/openai.d.ts +7 -0
- package/dist/llm/providers/openai.d.ts.map +1 -0
- package/dist/llm/providers/openai.js +8 -0
- package/dist/llm/providers/openai.js.map +1 -0
- package/dist/llm/providers/openai_compatible.d.ts +18 -0
- package/dist/llm/providers/openai_compatible.d.ts.map +1 -0
- package/dist/llm/providers/openai_compatible.js +70 -0
- package/dist/llm/providers/openai_compatible.js.map +1 -0
- package/dist/llm/types.d.ts +60 -0
- package/dist/llm/types.d.ts.map +1 -0
- package/dist/llm/types.js +2 -0
- package/dist/llm/types.js.map +1 -0
- package/dist/logging.d.ts +22 -0
- package/dist/logging.d.ts.map +1 -0
- package/dist/logging.js +58 -0
- package/dist/logging.js.map +1 -0
- package/dist/loops/embed_worker.d.ts +44 -0
- package/dist/loops/embed_worker.d.ts.map +1 -0
- package/dist/loops/embed_worker.js +144 -0
- package/dist/loops/embed_worker.js.map +1 -0
- package/dist/loops/eval_grader.d.ts +19 -0
- package/dist/loops/eval_grader.d.ts.map +1 -0
- package/dist/loops/eval_grader.js +22 -0
- package/dist/loops/eval_grader.js.map +1 -0
- package/dist/loops/ingest_watcher.d.ts +17 -0
- package/dist/loops/ingest_watcher.d.ts.map +1 -0
- package/dist/loops/ingest_watcher.js +66 -0
- package/dist/loops/ingest_watcher.js.map +1 -0
- package/dist/loops/legacy_worker.d.ts +26 -0
- package/dist/loops/legacy_worker.d.ts.map +1 -0
- package/dist/loops/legacy_worker.js +102 -0
- package/dist/loops/legacy_worker.js.map +1 -0
- package/dist/loops/supervisor.d.ts +71 -0
- package/dist/loops/supervisor.d.ts.map +1 -0
- package/dist/loops/supervisor.js +122 -0
- package/dist/loops/supervisor.js.map +1 -0
- package/dist/loops/synthesis.d.ts +31 -0
- package/dist/loops/synthesis.d.ts.map +1 -0
- package/dist/loops/synthesis.js +71 -0
- package/dist/loops/synthesis.js.map +1 -0
- package/dist/loops/transcribe_worker.d.ts +22 -0
- package/dist/loops/transcribe_worker.d.ts.map +1 -0
- package/dist/loops/transcribe_worker.js +77 -0
- package/dist/loops/transcribe_worker.js.map +1 -0
- package/dist/output.d.ts +8 -0
- package/dist/output.d.ts.map +1 -0
- package/dist/output.js +26 -0
- package/dist/output.js.map +1 -0
- package/dist/render/transcript_md.d.ts +5 -0
- package/dist/render/transcript_md.d.ts.map +1 -0
- package/dist/render/transcript_md.js +14 -0
- package/dist/render/transcript_md.js.map +1 -0
- package/dist/router.d.ts +3 -0
- package/dist/router.d.ts.map +1 -0
- package/dist/router.js +68 -0
- package/dist/router.js.map +1 -0
- package/dist/transcriber_client.d.ts +42 -0
- package/dist/transcriber_client.d.ts.map +1 -0
- package/dist/transcriber_client.js +72 -0
- package/dist/transcriber_client.js.map +1 -0
- package/package.json +45 -0
- package/templates/AGENTS.md +25 -0
- package/templates/config.toml +56 -0
- package/templates/sample-seed/codebook/desire-for-manual-override.md +8 -0
- package/templates/sample-seed/codebook/distrust-of-automation.md +8 -0
- package/templates/sample-seed/highlights/H-001.md +9 -0
- package/templates/sample-seed/highlights/H-002.md +9 -0
- package/templates/sample-seed/seed.md +17 -0
- package/templates/sample-seed/sessions/S001/transcript.json +74 -0
- package/templates/sample-seed/synthesis/themes/control-earns-trust.md +13 -0
- package/templates/seed.md +21 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { existsSync, readdirSync, readFileSync } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import { buildVectorRecords, chunkTranscript, openLanceDBForWrite } from 'compost-retrieval';
|
|
4
|
+
import { CompostError } from '../errors.js';
|
|
5
|
+
import { loadConfig, parseRoute } from '../lib/config.js';
|
|
6
|
+
import { LLMAdapter } from '../llm/adapter.js';
|
|
7
|
+
export async function runEmbedWorkerOnce(seedPath, deps = {}) {
|
|
8
|
+
const seedName = seedPath.split('/').pop() ?? 'seed';
|
|
9
|
+
const transcripts = findTranscripts(seedPath);
|
|
10
|
+
if (transcripts.length === 0) {
|
|
11
|
+
return { embedded: 0, inserted: 0, transcripts_scanned: 0 };
|
|
12
|
+
}
|
|
13
|
+
// Resolve embeddings provider + dimension.
|
|
14
|
+
let embed = deps.embed;
|
|
15
|
+
let vectorDim = deps.vectorDim ?? 1024;
|
|
16
|
+
if (embed === undefined) {
|
|
17
|
+
const config = loadConfig(seedPath);
|
|
18
|
+
const adapter = new LLMAdapter(config);
|
|
19
|
+
embed = async (texts) => {
|
|
20
|
+
const resp = await adapter.embed('embeddings', texts);
|
|
21
|
+
return resp.vectors;
|
|
22
|
+
};
|
|
23
|
+
// Try to discover dim from the provider config — most embeddings models
|
|
24
|
+
// declare it in their tag. bge-m3 is 1024; mxbai-embed-large is 1024 too.
|
|
25
|
+
// Fall back to 1024 by default; mismatches surface as a clear LanceDB error.
|
|
26
|
+
const route = config.defaults.embeddings;
|
|
27
|
+
if (route !== undefined) {
|
|
28
|
+
const { model } = parseRoute(route);
|
|
29
|
+
vectorDim = DEFAULT_DIM_FOR_MODEL[model] ?? 1024;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
// Chunk every transcript.
|
|
33
|
+
const allChunks = [];
|
|
34
|
+
for (const t of transcripts) {
|
|
35
|
+
const transcript = JSON.parse(readFileSync(t.path, 'utf8'));
|
|
36
|
+
const chunks = chunkTranscript(transcript, { seed: seedName });
|
|
37
|
+
for (const c of chunks)
|
|
38
|
+
allChunks.push({ ...c, sessionId: t.sessionId });
|
|
39
|
+
}
|
|
40
|
+
if (allChunks.length === 0) {
|
|
41
|
+
return { embedded: 0, inserted: 0, transcripts_scanned: transcripts.length };
|
|
42
|
+
}
|
|
43
|
+
// Open / create the LanceDB table, then filter for new SHAs before embedding
|
|
44
|
+
// (avoid wasting embed calls on content we'd dedup away anyway).
|
|
45
|
+
const writer = deps.writer ??
|
|
46
|
+
(await openLanceDBForWrite(join(seedPath, '.compost', 'vectors.lancedb'), vectorDim));
|
|
47
|
+
// Embed with a worker-level batch cap. The LLMAdapter already batches
|
|
48
|
+
// internally (provider-appropriate, ~50/req for Ollama), but a worker-level
|
|
49
|
+
// cap is defense-in-depth: a very large corpus (10k+ chunks) shouldn't be
|
|
50
|
+
// a single multi-megabyte JSON request even if the adapter would split it.
|
|
51
|
+
// 500 chunks/pass is a safe ceiling for HTTP body size on default Ollama
|
|
52
|
+
// configs while keeping the round-trip count low (~36 batches for a
|
|
53
|
+
// typical 18k-chunk corpus).
|
|
54
|
+
const vectors = await embedInBatches(embed, allChunks.map((c) => c.text), EMBED_BATCH_CAP);
|
|
55
|
+
if (vectors.length !== allChunks.length) {
|
|
56
|
+
throw new CompostError('PROVIDER_ERROR', `embeddings provider returned ${vectors.length} vectors for ${allChunks.length} chunks`);
|
|
57
|
+
}
|
|
58
|
+
// Build LanceDB rows.
|
|
59
|
+
const records = buildVectorRecords(allChunks.map((c, i) => ({
|
|
60
|
+
id: c.id,
|
|
61
|
+
kind: chunkTypeToArtifactKind(c.metadata.chunk_type),
|
|
62
|
+
seed: seedName,
|
|
63
|
+
session: c.sessionId,
|
|
64
|
+
speaker_id: c.metadata.speaker_id,
|
|
65
|
+
start_ms: c.metadata.start_ms,
|
|
66
|
+
end_ms: c.metadata.end_ms,
|
|
67
|
+
text: c.text,
|
|
68
|
+
text_sha: c.text_sha,
|
|
69
|
+
vector: vectors[i],
|
|
70
|
+
metadata: {
|
|
71
|
+
source_page: c.metadata.source_page,
|
|
72
|
+
highlight_ids: c.metadata.highlight_ids,
|
|
73
|
+
code_ids: c.metadata.code_ids,
|
|
74
|
+
actor_type: c.metadata.actor_type,
|
|
75
|
+
chunk_type: c.metadata.chunk_type,
|
|
76
|
+
},
|
|
77
|
+
})));
|
|
78
|
+
const inserted = await writer.upsertByTextSha(records);
|
|
79
|
+
return {
|
|
80
|
+
embedded: allChunks.length,
|
|
81
|
+
inserted,
|
|
82
|
+
transcripts_scanned: transcripts.length,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Worker-level batch cap. Defense in depth against very large corpora that
|
|
87
|
+
* would otherwise produce a single multi-megabyte JSON request. The
|
|
88
|
+
* LLMAdapter splits within this cap to provider-appropriate sizes.
|
|
89
|
+
*
|
|
90
|
+
* Tuning notes: 500 chunks × ~1KB text each = ~500KB request body, well
|
|
91
|
+
* under Ollama's default HTTP body limits. A typical interview corpus
|
|
92
|
+
* (~600 utterances per session × 3 chunks/utt × 30 sessions = 54k chunks)
|
|
93
|
+
* batches to ~108 round-trips.
|
|
94
|
+
*/
|
|
95
|
+
export const EMBED_BATCH_CAP = 500;
|
|
96
|
+
async function embedInBatches(embed, texts, cap) {
|
|
97
|
+
if (texts.length <= cap)
|
|
98
|
+
return embed(texts);
|
|
99
|
+
const out = [];
|
|
100
|
+
for (let i = 0; i < texts.length; i += cap) {
|
|
101
|
+
const slice = texts.slice(i, i + cap);
|
|
102
|
+
const partial = await embed(slice);
|
|
103
|
+
if (partial.length !== slice.length) {
|
|
104
|
+
throw new CompostError('PROVIDER_ERROR', `embeddings provider returned ${partial.length} vectors for ${slice.length} chunks ` +
|
|
105
|
+
`(batch ${i / cap + 1}/${Math.ceil(texts.length / cap)})`);
|
|
106
|
+
}
|
|
107
|
+
out.push(...partial);
|
|
108
|
+
}
|
|
109
|
+
return out;
|
|
110
|
+
}
|
|
111
|
+
const DEFAULT_DIM_FOR_MODEL = {
|
|
112
|
+
'bge-m3': 1024,
|
|
113
|
+
'bge-m3:q4_k_m': 1024,
|
|
114
|
+
'mxbai-embed-large': 1024,
|
|
115
|
+
'nomic-embed-text': 768,
|
|
116
|
+
'all-minilm': 384,
|
|
117
|
+
};
|
|
118
|
+
function chunkTypeToArtifactKind(type) {
|
|
119
|
+
switch (type) {
|
|
120
|
+
case 'highlight':
|
|
121
|
+
return 'highlight';
|
|
122
|
+
case 'term':
|
|
123
|
+
return 'term';
|
|
124
|
+
case 'page':
|
|
125
|
+
return 'legacy_chunk';
|
|
126
|
+
default:
|
|
127
|
+
return 'utterance';
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
function findTranscripts(seedPath) {
|
|
131
|
+
const sessionsDir = join(seedPath, 'sessions');
|
|
132
|
+
if (!existsSync(sessionsDir))
|
|
133
|
+
return [];
|
|
134
|
+
const out = [];
|
|
135
|
+
for (const entry of readdirSync(sessionsDir)) {
|
|
136
|
+
if (entry === '_inbox' || entry.startsWith('.'))
|
|
137
|
+
continue;
|
|
138
|
+
const tp = join(sessionsDir, entry, 'transcript.json');
|
|
139
|
+
if (existsSync(tp))
|
|
140
|
+
out.push({ sessionId: entry, path: tp });
|
|
141
|
+
}
|
|
142
|
+
return out;
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=embed_worker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embed_worker.js","sourceRoot":"","sources":["../../src/loops/embed_worker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAC/D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAGhC,OAAO,EAAE,kBAAkB,EAAE,eAAe,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAA;AAE5F,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAA;AAC3C,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAA;AAmC9C,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB,EAChB,OAAwB,EAAE;IAE1B,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,MAAM,CAAA;IACpD,MAAM,WAAW,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAA;IAC7C,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,QAAQ,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,mBAAmB,EAAE,CAAC,EAAE,CAAA;IAC7D,CAAC;IAED,2CAA2C;IAC3C,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAA;IACtB,IAAI,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,IAAI,CAAA;IACtC,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACxB,MAAM,MAAM,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAA;QACnC,MAAM,OAAO,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,CAAA;QACtC,KAAK,GAAG,KAAK,EAAE,KAAe,EAAE,EAAE;YAChC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,CAAA;YACrD,OAAO,IAAI,CAAC,OAAO,CAAA;QACrB,CAAC,CAAA;QACD,wEAAwE;QACxE,0EAA0E;QAC1E,6EAA6E;QAC7E,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAA;QACxC,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACxB,MAAM,EAAE,KAAK,EAAE,GAAG,UAAU,CAAC,KAAK,CAAC,CAAA;YACnC,SAAS,GAAG,qBAAqB,CAAC,KAAK,CAAC,IAAI,IAAI,CAAA;QAClD,CAAC;IACH,CAAC;IAED,0BAA0B;IAC1B,MAAM,SAAS,GAAyC,EAAE,CAAA;IAC1D,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAsB,CAAA;QAChF,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;QAC9D,KAAK,MAAM,CAAC,IAAI,MAAM;YAAE,SAAS,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,SAAS,EAAE,CAAC,CAAA;IAC1E,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,EAAE,QAAQ,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,mBAAmB,EAAE,WAAW,CAAC,MAAM,EAAE,CAAA;IAC9E,CAAC;IAED,6EAA6E;IAC7E,iEAAiE;IACjE,MAAM,MAAM,GACV,IAAI,CAAC,MAAM;QACX,CAAC,MAAM,mBAAmB,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,iBAAiB,CAAC,EAAE,SAAS,CAAC,CAAC,CAAA;IAEvF,sEAAsE;IACtE,4EAA4E;IAC5E,0EAA0E;IAC1E,2EAA2E;IAC3E,yEAAyE;IACzE,oEAAoE;IACpE,6BAA6B;IAC7B,MAAM,OAAO,GAAG,MAAM,cAAc,CAClC,KAAK,EACL,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAC5B,eAAe,CAChB,CAAA;IACD,IAAI,OAAO,CAAC,MAAM,KAAK,SAAS,CAAC,MAAM,EAAE,CAAC;QACxC,MAAM,IAAI,YAAY,CACpB,gBAAgB,EAChB,gCAAgC,OAAO,CAAC,MAAM,gBAAgB,SAAS,CAAC,MAAM,SAAS,CACxF,CAAA;IACH,CAAC;IAED,sBAAsB;IACtB,MAAM,OAAO,GAAG,kBAAkB,CAChC,SAAS,CAAC,GAAG,CACX,CAAC,CAAC,EAAE,CAAC,EAAqB,EAAE,CAAC,CAAC;QAC5B,EAAE,EAAE,CAAC,CAAC,EAAE;QACR,IAAI,EAAE,uBAAuB,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC;QACpD,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,CAAC,CAAC,SAAS;QACpB,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,UAAU;QACjC,QAAQ,EAAE,CAAC,CAAC,QAAQ,CAAC,QAAQ;QAC7B,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,MAAM;QACzB,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,MAAM,EAAE,OAAO,CAAC,CAAC,CAAa;QAC9B,QAAQ,EAAE;YACR,WAAW,EAAE,CAAC,CAAC,QAAQ,CAAC,WAAW;YACnC,aAAa,EAAE,CAAC,CAAC,QAAQ,CAAC,aAAa;YACvC,QAAQ,EAAE,CAAC,CAAC,QAAQ,CAAC,QAAQ;YAC7B,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,UAAU;YACjC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,UAAU;SAClC;KACF,CAAC,CACH,CACF,CAAA;IAED,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,OAAO,CAAC,CAAA;IACtD,OAAO;QACL,QAAQ,EAAE,SAAS,CAAC,MAAM;QAC1B,QAAQ;QACR,mBAAmB,EAAE,WAAW,CAAC,MAAM;KACxC,CAAA;AACH,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG,GAAG,CAAA;AAElC,KAAK,UAAU,cAAc,CAC3B,KAA+C,EAC/C,KAAe,EACf,GAAW;IAEX,IAAI,KAAK,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,KAAK,CAAC,KAAK,CAAC,CAAA;IAC5C,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,GAAG,EAAE,CAAC;QAC3C,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAA;QACrC,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,KAAK,CAAC,CAAA;QAClC,IAAI,OAAO,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;YACpC,MAAM,IAAI,YAAY,CACpB,gBAAgB,EAChB,gCAAgC,OAAO,CAAC,MAAM,gBAAgB,KAAK,CAAC,MAAM,UAAU;gBAClF,UAAU,CAAC,GAAG,GAAG,GAAG,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,GAAG,CAC5D,CAAA;QACH,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,CAAA;IACtB,CAAC;IACD,OAAO,GAAG,CAAA;AACZ,CAAC;AAED,MAAM,qBAAqB,GAA2B;IACpD,QAAQ,EAAE,IAAI;IACd,eAAe,EAAE,IAAI;IACrB,mBAAmB,EAAE,IAAI;IACzB,kBAAkB,EAAE,GAAG;IACvB,YAAY,EAAE,GAAG;CAClB,CAAA;AAED,SAAS,uBAAuB,CAC9B,IAA4D;IAE5D,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,WAAW;YACd,OAAO,WAAW,CAAA;QACpB,KAAK,MAAM;YACT,OAAO,MAAM,CAAA;QACf,KAAK,MAAM;YACT,OAAO,cAAc,CAAA;QACvB;YACE,OAAO,WAAW,CAAA;IACtB,CAAC;AACH,CAAC;AAOD,SAAS,eAAe,CAAC,QAAgB;IACvC,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAA;IAC9C,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC;QAAE,OAAO,EAAE,CAAA;IACvC,MAAM,GAAG,GAAuB,EAAE,CAAA;IAClC,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,WAAW,CAAC,EAAE,CAAC;QAC7C,IAAI,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAQ;QACzD,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,iBAAiB,CAAC,CAAA;QACtD,IAAI,UAAU,CAAC,EAAE,CAAC;YAAE,GAAG,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAA;IAC9D,CAAC;IACD,OAAO,GAAG,CAAA;AACZ,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { type Judge, type Suggestion } from 'compost-evals';
|
|
2
|
+
export interface EvalGraderDeps {
|
|
3
|
+
judge: Judge;
|
|
4
|
+
judgeModel: string;
|
|
5
|
+
maxPerRun?: number;
|
|
6
|
+
now?: () => Date;
|
|
7
|
+
}
|
|
8
|
+
export interface EvalGraderResult {
|
|
9
|
+
graded: number;
|
|
10
|
+
skipped: number;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Grade a batch of AI-authored suggestions for a seed and persist verdicts to
|
|
14
|
+
* .compost/evals.sqlite. Idempotent on suggestion_id (already-graded skipped)
|
|
15
|
+
* and throttled to maxPerRun calls. The caller supplies the suggestions
|
|
16
|
+
* (read from the provenance event log) and an LLM judge.
|
|
17
|
+
*/
|
|
18
|
+
export declare function runEvalGraderOnce(seedPath: string, suggestions: Suggestion[], deps: EvalGraderDeps): Promise<EvalGraderResult>;
|
|
19
|
+
//# sourceMappingURL=eval_grader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval_grader.d.ts","sourceRoot":"","sources":["../../src/loops/eval_grader.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,KAAK,KAAK,EACV,KAAK,UAAU,EAChB,MAAM,eAAe,CAAA;AAEtB,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,KAAK,CAAA;IACZ,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,GAAG,CAAC,EAAE,MAAM,IAAI,CAAA;CACjB;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,EAAE,MAAM,CAAA;CAChB;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,UAAU,EAAE,EACzB,IAAI,EAAE,cAAc,GACnB,OAAO,CAAC,gBAAgB,CAAC,CAY3B"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { EvalStore, evalsDbPath, gradeSuggestions, } from 'compost-evals';
|
|
2
|
+
/**
|
|
3
|
+
* Grade a batch of AI-authored suggestions for a seed and persist verdicts to
|
|
4
|
+
* .compost/evals.sqlite. Idempotent on suggestion_id (already-graded skipped)
|
|
5
|
+
* and throttled to maxPerRun calls. The caller supplies the suggestions
|
|
6
|
+
* (read from the provenance event log) and an LLM judge.
|
|
7
|
+
*/
|
|
8
|
+
export async function runEvalGraderOnce(seedPath, suggestions, deps) {
|
|
9
|
+
const store = new EvalStore(evalsDbPath(seedPath));
|
|
10
|
+
try {
|
|
11
|
+
const { graded, skipped } = await gradeSuggestions(store, suggestions, deps.judge, {
|
|
12
|
+
judgeModel: deps.judgeModel,
|
|
13
|
+
...(deps.maxPerRun !== undefined ? { maxPerRun: deps.maxPerRun } : {}),
|
|
14
|
+
...(deps.now !== undefined ? { now: deps.now } : {}),
|
|
15
|
+
});
|
|
16
|
+
return { graded: graded.length, skipped };
|
|
17
|
+
}
|
|
18
|
+
finally {
|
|
19
|
+
store.close();
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=eval_grader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval_grader.js","sourceRoot":"","sources":["../../src/loops/eval_grader.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,SAAS,EACT,WAAW,EACX,gBAAgB,GAGjB,MAAM,eAAe,CAAA;AActB;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,QAAgB,EAChB,WAAyB,EACzB,IAAoB;IAEpB,MAAM,KAAK,GAAG,IAAI,SAAS,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAA;IAClD,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,gBAAgB,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,KAAK,EAAE;YACjF,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,GAAG,CAAC,IAAI,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACtE,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACrD,CAAC,CAAA;QACF,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,CAAA;IAC3C,CAAC;YAAS,CAAC;QACT,KAAK,CAAC,KAAK,EAAE,CAAA;IACf,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export interface WatcherProcessResult {
|
|
2
|
+
moved: Array<{
|
|
3
|
+
from: string;
|
|
4
|
+
to: string;
|
|
5
|
+
session_id: string;
|
|
6
|
+
job_id: number;
|
|
7
|
+
}>;
|
|
8
|
+
unsupported: string[];
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Process everything currently in a seed's sessions/_inbox/: assign a session
|
|
12
|
+
* id, move the file to sessions/<sid>/source.<ext> atomically, enqueue a job,
|
|
13
|
+
* and emit an agent `create` event. Pure of timers — call it on boot (replay)
|
|
14
|
+
* and from the debounced live watcher. Idempotent: an empty inbox is a no-op.
|
|
15
|
+
*/
|
|
16
|
+
export declare function processInbox(seedPath: string): WatcherProcessResult;
|
|
17
|
+
//# sourceMappingURL=ingest_watcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest_watcher.d.ts","sourceRoot":"","sources":["../../src/loops/ingest_watcher.ts"],"names":[],"mappings":"AAUA,MAAM,WAAW,oBAAoB;IACnC,KAAK,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IAC9E,WAAW,EAAE,MAAM,EAAE,CAAA;CACtB;AAUD;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,oBAAoB,CAyCnE"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readdirSync, renameSync, statSync } from 'node:fs';
|
|
2
|
+
import { basename, extname, join } from 'node:path';
|
|
3
|
+
import { classify } from '../lib/dispatch.js';
|
|
4
|
+
import { emitAgentCreate, openSeedEvents } from '../lib/events.js';
|
|
5
|
+
import { JobQueue, stateDbPath } from '../lib/queue.js';
|
|
6
|
+
const AGENT_NAME = 'ingest-watcher';
|
|
7
|
+
const AGENT_VERSION = '0.1.0';
|
|
8
|
+
function nextSessionId(sessionsDir) {
|
|
9
|
+
const existing = existsSync(sessionsDir)
|
|
10
|
+
? readdirSync(sessionsDir).filter((e) => /^S\d+$/.test(e))
|
|
11
|
+
: [];
|
|
12
|
+
const max = existing.reduce((m, e) => Math.max(m, Number(e.slice(1))), 0);
|
|
13
|
+
return `S${String(max + 1).padStart(3, '0')}`;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Process everything currently in a seed's sessions/_inbox/: assign a session
|
|
17
|
+
* id, move the file to sessions/<sid>/source.<ext> atomically, enqueue a job,
|
|
18
|
+
* and emit an agent `create` event. Pure of timers — call it on boot (replay)
|
|
19
|
+
* and from the debounced live watcher. Idempotent: an empty inbox is a no-op.
|
|
20
|
+
*/
|
|
21
|
+
export function processInbox(seedPath) {
|
|
22
|
+
const sessionsDir = join(seedPath, 'sessions');
|
|
23
|
+
const inbox = join(sessionsDir, '_inbox');
|
|
24
|
+
const result = { moved: [], unsupported: [] };
|
|
25
|
+
if (!existsSync(inbox))
|
|
26
|
+
return result;
|
|
27
|
+
const queue = new JobQueue(stateDbPath(seedPath));
|
|
28
|
+
const events = openSeedEvents(seedPath);
|
|
29
|
+
try {
|
|
30
|
+
for (const entry of readdirSync(inbox)) {
|
|
31
|
+
if (entry.startsWith('.'))
|
|
32
|
+
continue;
|
|
33
|
+
const from = join(inbox, entry);
|
|
34
|
+
if (!statSync(from).isFile())
|
|
35
|
+
continue;
|
|
36
|
+
const d = classify(from);
|
|
37
|
+
if (d === null) {
|
|
38
|
+
result.unsupported.push(from);
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
const sid = nextSessionId(sessionsDir);
|
|
42
|
+
const sessionDir = join(sessionsDir, sid);
|
|
43
|
+
mkdirSync(sessionDir, { recursive: true });
|
|
44
|
+
const to = join(sessionDir, `source${extname(entry).toLowerCase()}`);
|
|
45
|
+
renameSync(from, to); // atomic within the same filesystem
|
|
46
|
+
const { id } = queue.enqueue(d.kind, to, {
|
|
47
|
+
category: d.category,
|
|
48
|
+
session_id: sid,
|
|
49
|
+
original_name: basename(entry),
|
|
50
|
+
});
|
|
51
|
+
emitAgentCreate(events, {
|
|
52
|
+
artifactKind: 'session',
|
|
53
|
+
initialState: { session_id: sid, source: to, kind: d.kind },
|
|
54
|
+
agentName: AGENT_NAME,
|
|
55
|
+
agentVersion: AGENT_VERSION,
|
|
56
|
+
});
|
|
57
|
+
result.moved.push({ from, to, session_id: sid, job_id: id });
|
|
58
|
+
}
|
|
59
|
+
return result;
|
|
60
|
+
}
|
|
61
|
+
finally {
|
|
62
|
+
queue.close();
|
|
63
|
+
events.close();
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
//# sourceMappingURL=ingest_watcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest_watcher.js","sourceRoot":"","sources":["../../src/loops/ingest_watcher.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAA;AAClF,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAEnD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAA;AAC7C,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AAClE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAA;AAEvD,MAAM,UAAU,GAAG,gBAAgB,CAAA;AACnC,MAAM,aAAa,GAAG,OAAO,CAAA;AAO7B,SAAS,aAAa,CAAC,WAAmB;IACxC,MAAM,QAAQ,GAAG,UAAU,CAAC,WAAW,CAAC;QACtC,CAAC,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC1D,CAAC,CAAC,EAAE,CAAA;IACN,MAAM,GAAG,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;IACzE,OAAO,IAAI,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAA;AAC/C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,QAAgB;IAC3C,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAA;IAC9C,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAA;IACzC,MAAM,MAAM,GAAyB,EAAE,KAAK,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,CAAA;IACnE,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,MAAM,CAAA;IAErC,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAA;IACjD,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAA;IACvC,IAAI,CAAC;QACH,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,KAAK,CAAC,EAAE,CAAC;YACvC,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;gBAAE,SAAQ;YACnC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;YAC/B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE;gBAAE,SAAQ;YACtC,MAAM,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAA;YACxB,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;gBACf,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;gBAC7B,SAAQ;YACV,CAAC;YACD,MAAM,GAAG,GAAG,aAAa,CAAC,WAAW,CAAC,CAAA;YACtC,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,GAAG,CAAC,CAAA;YACzC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;YAC1C,MAAM,EAAE,GAAG,IAAI,CAAC,UAAU,EAAE,SAAS,OAAO,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC,CAAA;YACpE,UAAU,CAAC,IAAI,EAAE,EAAE,CAAC,CAAA,CAAC,oCAAoC;YACzD,MAAM,EAAE,EAAE,EAAE,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,EAAE;gBACvC,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,UAAU,EAAE,GAAG;gBACf,aAAa,EAAE,QAAQ,CAAC,KAAK,CAAC;aAC/B,CAAC,CAAA;YACF,eAAe,CAAC,MAAM,EAAE;gBACtB,YAAY,EAAE,SAAS;gBACvB,YAAY,EAAE,EAAE,UAAU,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE;gBAC3D,SAAS,EAAE,UAAU;gBACrB,YAAY,EAAE,aAAa;aAC5B,CAAC,CAAA;YACF,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,CAAA;QAC9D,CAAC;QACD,OAAO,MAAM,CAAA;IACf,CAAC;YAAS,CAAC;QACT,KAAK,CAAC,KAAK,EAAE,CAAA;QACb,MAAM,CAAC,KAAK,EAAE,CAAA;IAChB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { LegacyIngestClient } from '../legacy_client.js';
|
|
2
|
+
export interface LegacyWorkerResult {
|
|
3
|
+
processed: number;
|
|
4
|
+
results: Array<{
|
|
5
|
+
job_id: number;
|
|
6
|
+
source_path: string;
|
|
7
|
+
status: string;
|
|
8
|
+
normalized_path?: string;
|
|
9
|
+
utterance_count?: number;
|
|
10
|
+
warnings?: string[];
|
|
11
|
+
}>;
|
|
12
|
+
}
|
|
13
|
+
export interface LegacyWorkerDeps {
|
|
14
|
+
client?: LegacyIngestClient;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Drain all queued `legacy-ingest` jobs (PDF/DOCX/PPTX/CSV/MD/TXT/XLSX).
|
|
18
|
+
* Each job is POSTed to the transcriber's /legacy-ingest route, which writes
|
|
19
|
+
* a normalized transcript-shaped JSON under `<seed>/legacy/<basename>.json`.
|
|
20
|
+
*
|
|
21
|
+
* Transient failures (service down) requeue with backoff; permanent failures
|
|
22
|
+
* (invalid input, missing dep) burn the attempt counter so the job moves to
|
|
23
|
+
* failed status after MAX_ATTEMPTS.
|
|
24
|
+
*/
|
|
25
|
+
export declare function runLegacyWorkerOnce(seedPath: string, deps?: LegacyWorkerDeps): Promise<LegacyWorkerResult>;
|
|
26
|
+
//# sourceMappingURL=legacy_worker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"legacy_worker.d.ts","sourceRoot":"","sources":["../../src/loops/legacy_worker.ts"],"names":[],"mappings":"AAEA,OAAO,EACL,kBAAkB,EAGnB,MAAM,qBAAqB,CAAA;AA2C5B,MAAM,WAAW,kBAAkB;IACjC,SAAS,EAAE,MAAM,CAAA;IACjB,OAAO,EAAE,KAAK,CAAC;QACb,MAAM,EAAE,MAAM,CAAA;QACd,WAAW,EAAE,MAAM,CAAA;QACnB,MAAM,EAAE,MAAM,CAAA;QACd,eAAe,CAAC,EAAE,MAAM,CAAA;QACxB,eAAe,CAAC,EAAE,MAAM,CAAA;QACxB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAA;KACpB,CAAC,CAAA;CACH;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,CAAC,EAAE,kBAAkB,CAAA;CAC5B;AAED;;;;;;;;GAQG;AACH,wBAAsB,mBAAmB,CACvC,QAAQ,EAAE,MAAM,EAChB,IAAI,GAAE,gBAAqB,GAC1B,OAAO,CAAC,kBAAkB,CAAC,CAyD7B"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
2
|
+
import { LegacyIngestClient, LegacyServiceError, } from '../legacy_client.js';
|
|
3
|
+
import { emitAgentCreate, openSeedEvents } from '../lib/events.js';
|
|
4
|
+
import { JobQueue, stateDbPath } from '../lib/queue.js';
|
|
5
|
+
const AGENT_NAME = 'legacy-ingest-worker';
|
|
6
|
+
const AGENT_VERSION = '0.1.0';
|
|
7
|
+
const MAX_ATTEMPTS = 3;
|
|
8
|
+
function readSidecar(sourcePath) {
|
|
9
|
+
const sidecarPath = `${sourcePath}.compost.json`;
|
|
10
|
+
if (!existsSync(sidecarPath))
|
|
11
|
+
return null;
|
|
12
|
+
try {
|
|
13
|
+
const parsed = JSON.parse(readFileSync(sidecarPath, 'utf8'));
|
|
14
|
+
if (typeof parsed !== 'object' || parsed === null)
|
|
15
|
+
return null;
|
|
16
|
+
const out = {};
|
|
17
|
+
const r = parsed;
|
|
18
|
+
if (typeof r.text_col === 'string')
|
|
19
|
+
out.text_col = r.text_col;
|
|
20
|
+
if (typeof r.speaker_col === 'string')
|
|
21
|
+
out.speaker_col = r.speaker_col;
|
|
22
|
+
if (typeof r.sheet === 'string')
|
|
23
|
+
out.sheet = r.sheet;
|
|
24
|
+
return out;
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
// Malformed sidecar — silently fall through to server-side auto-detect.
|
|
28
|
+
// The researcher's intent is unclear; we don't want to block the ingest.
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Drain all queued `legacy-ingest` jobs (PDF/DOCX/PPTX/CSV/MD/TXT/XLSX).
|
|
34
|
+
* Each job is POSTed to the transcriber's /legacy-ingest route, which writes
|
|
35
|
+
* a normalized transcript-shaped JSON under `<seed>/legacy/<basename>.json`.
|
|
36
|
+
*
|
|
37
|
+
* Transient failures (service down) requeue with backoff; permanent failures
|
|
38
|
+
* (invalid input, missing dep) burn the attempt counter so the job moves to
|
|
39
|
+
* failed status after MAX_ATTEMPTS.
|
|
40
|
+
*/
|
|
41
|
+
export async function runLegacyWorkerOnce(seedPath, deps = {}) {
|
|
42
|
+
const client = deps.client ?? new LegacyIngestClient();
|
|
43
|
+
const queue = new JobQueue(stateDbPath(seedPath));
|
|
44
|
+
const events = openSeedEvents(seedPath);
|
|
45
|
+
const out = { processed: 0, results: [] };
|
|
46
|
+
try {
|
|
47
|
+
while (true) {
|
|
48
|
+
const job = queue.claim('legacy-ingest');
|
|
49
|
+
if (job === null)
|
|
50
|
+
break;
|
|
51
|
+
out.processed += 1;
|
|
52
|
+
const sourcePath = job.source_path;
|
|
53
|
+
try {
|
|
54
|
+
const sidecar = readSidecar(sourcePath);
|
|
55
|
+
const ingestReq = {
|
|
56
|
+
seed_path: seedPath,
|
|
57
|
+
source_path: sourcePath,
|
|
58
|
+
...(sidecar?.text_col !== undefined ? { text_col: sidecar.text_col } : {}),
|
|
59
|
+
...(sidecar?.speaker_col !== undefined ? { speaker_col: sidecar.speaker_col } : {}),
|
|
60
|
+
...(sidecar?.sheet !== undefined ? { sheet: sidecar.sheet } : {}),
|
|
61
|
+
};
|
|
62
|
+
const resp = await client.ingest(ingestReq);
|
|
63
|
+
queue.complete(job.id);
|
|
64
|
+
emitAgentCreate(events, {
|
|
65
|
+
artifactKind: 'legacy_chunk',
|
|
66
|
+
initialState: {
|
|
67
|
+
source_path: resp.source_path,
|
|
68
|
+
normalized_path: resp.normalized_path,
|
|
69
|
+
utterance_count: resp.utterance_count,
|
|
70
|
+
status: resp.status,
|
|
71
|
+
text_col_resolved: resp.text_col_resolved ?? null,
|
|
72
|
+
sidecar_applied: sidecar !== null,
|
|
73
|
+
},
|
|
74
|
+
agentName: AGENT_NAME,
|
|
75
|
+
agentVersion: AGENT_VERSION,
|
|
76
|
+
});
|
|
77
|
+
out.results.push({
|
|
78
|
+
job_id: job.id,
|
|
79
|
+
source_path: sourcePath,
|
|
80
|
+
status: resp.status,
|
|
81
|
+
normalized_path: resp.normalized_path,
|
|
82
|
+
utterance_count: resp.utterance_count,
|
|
83
|
+
...(resp.warnings && resp.warnings.length > 0 ? { warnings: resp.warnings } : {}),
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
catch (err) {
|
|
87
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
88
|
+
queue.fail(job.id, msg, MAX_ATTEMPTS);
|
|
89
|
+
out.results.push({ job_id: job.id, source_path: sourcePath, status: 'error' });
|
|
90
|
+
// On service-down, stop the drain — nothing else will succeed now.
|
|
91
|
+
if (err instanceof LegacyServiceError && err.kind === 'down')
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return out;
|
|
96
|
+
}
|
|
97
|
+
finally {
|
|
98
|
+
queue.close();
|
|
99
|
+
events.close();
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=legacy_worker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"legacy_worker.js","sourceRoot":"","sources":["../../src/loops/legacy_worker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAElD,OAAO,EACL,kBAAkB,EAElB,kBAAkB,GACnB,MAAM,qBAAqB,CAAA;AAC5B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AAClE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAA;AAEvD,MAAM,UAAU,GAAG,sBAAsB,CAAA;AACzC,MAAM,aAAa,GAAG,OAAO,CAAA;AAC7B,MAAM,YAAY,GAAG,CAAC,CAAA;AAkBtB,SAAS,WAAW,CAAC,UAAkB;IACrC,MAAM,WAAW,GAAG,GAAG,UAAU,eAAe,CAAA;IAChD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC;QAAE,OAAO,IAAI,CAAA;IACzC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,WAAW,EAAE,MAAM,CAAC,CAAY,CAAA;QACvE,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI;YAAE,OAAO,IAAI,CAAA;QAC9D,MAAM,GAAG,GAAmB,EAAE,CAAA;QAC9B,MAAM,CAAC,GAAG,MAAiC,CAAA;QAC3C,IAAI,OAAO,CAAC,CAAC,QAAQ,KAAK,QAAQ;YAAE,GAAG,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAA;QAC7D,IAAI,OAAO,CAAC,CAAC,WAAW,KAAK,QAAQ;YAAE,GAAG,CAAC,WAAW,GAAG,CAAC,CAAC,WAAW,CAAA;QACtE,IAAI,OAAO,CAAC,CAAC,KAAK,KAAK,QAAQ;YAAE,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAA;QACpD,OAAO,GAAG,CAAA;IACZ,CAAC;IAAC,MAAM,CAAC;QACP,wEAAwE;QACxE,yEAAyE;QACzE,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAkBD;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,QAAgB,EAChB,OAAyB,EAAE;IAE3B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,kBAAkB,EAAE,CAAA;IACtD,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAA;IACjD,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAA;IACvC,MAAM,GAAG,GAAuB,EAAE,SAAS,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAA;IAE7D,IAAI,CAAC;QACH,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,eAAe,CAAC,CAAA;YACxC,IAAI,GAAG,KAAK,IAAI;gBAAE,MAAK;YACvB,GAAG,CAAC,SAAS,IAAI,CAAC,CAAA;YAClB,MAAM,UAAU,GAAG,GAAG,CAAC,WAAW,CAAA;YAClC,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,WAAW,CAAC,UAAU,CAAC,CAAA;gBACvC,MAAM,SAAS,GAAwB;oBACrC,SAAS,EAAE,QAAQ;oBACnB,WAAW,EAAE,UAAU;oBACvB,GAAG,CAAC,OAAO,EAAE,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC1E,GAAG,CAAC,OAAO,EAAE,WAAW,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnF,GAAG,CAAC,OAAO,EAAE,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBAClE,CAAA;gBACD,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;gBAC3C,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;gBACtB,eAAe,CAAC,MAAM,EAAE;oBACtB,YAAY,EAAE,cAAc;oBAC5B,YAAY,EAAE;wBACZ,WAAW,EAAE,IAAI,CAAC,WAAW;wBAC7B,eAAe,EAAE,IAAI,CAAC,eAAe;wBACrC,eAAe,EAAE,IAAI,CAAC,eAAe;wBACrC,MAAM,EAAE,IAAI,CAAC,MAAM;wBACnB,iBAAiB,EAAE,IAAI,CAAC,iBAAiB,IAAI,IAAI;wBACjD,eAAe,EAAE,OAAO,KAAK,IAAI;qBAClC;oBACD,SAAS,EAAE,UAAU;oBACrB,YAAY,EAAE,aAAa;iBAC5B,CAAC,CAAA;gBACF,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC;oBACf,MAAM,EAAE,GAAG,CAAC,EAAE;oBACd,WAAW,EAAE,UAAU;oBACvB,MAAM,EAAE,IAAI,CAAC,MAAM;oBACnB,eAAe,EAAE,IAAI,CAAC,eAAe;oBACrC,eAAe,EAAE,IAAI,CAAC,eAAe;oBACrC,GAAG,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBAClF,CAAC,CAAA;YACJ,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;gBAC5D,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,EAAE,YAAY,CAAC,CAAA;gBACrC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,EAAE,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAA;gBAC9E,mEAAmE;gBACnE,IAAI,GAAG,YAAY,kBAAkB,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM;oBAAE,MAAK;YACrE,CAAC;QACH,CAAC;QACD,OAAO,GAAG,CAAA;IACZ,CAAC;YAAS,CAAC;QACT,KAAK,CAAC,KAAK,EAAE,CAAA;QACb,MAAM,CAAC,KAAK,EAAE,CAAA;IAChB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { type EmbedWorkerDeps } from './embed_worker.js';
|
|
2
|
+
import { type LegacyWorkerDeps } from './legacy_worker.js';
|
|
3
|
+
import { type WorkerDeps } from './transcribe_worker.js';
|
|
4
|
+
export interface SupervisorResult {
|
|
5
|
+
inbox: {
|
|
6
|
+
moved: number;
|
|
7
|
+
unsupported: number;
|
|
8
|
+
};
|
|
9
|
+
transcribe: {
|
|
10
|
+
processed: number;
|
|
11
|
+
failed: number;
|
|
12
|
+
};
|
|
13
|
+
legacy: {
|
|
14
|
+
processed: number;
|
|
15
|
+
failed: number;
|
|
16
|
+
};
|
|
17
|
+
embed: {
|
|
18
|
+
embedded: number;
|
|
19
|
+
inserted: number;
|
|
20
|
+
transcripts_scanned: number;
|
|
21
|
+
};
|
|
22
|
+
/** Human-readable failure summaries; empty when the pass was clean. The watch
|
|
23
|
+
* command turns a non-empty list into a non-ok status + non-zero exit (#164). */
|
|
24
|
+
failures: string[];
|
|
25
|
+
}
|
|
26
|
+
/** A drained job whose status marks it as failed (vs ok / needs_speaker_labels). */
|
|
27
|
+
export declare function isFailedResult(r: {
|
|
28
|
+
status: string;
|
|
29
|
+
}): boolean;
|
|
30
|
+
/** Count DISTINCT failed jobs. A job retried up to MAX_ATTEMPTS within one pass
|
|
31
|
+
* produces several failed result rows, but it's one failed job — report jobs,
|
|
32
|
+
* not attempts. */
|
|
33
|
+
export declare function countFailedJobs(results: Array<{
|
|
34
|
+
job_id: number;
|
|
35
|
+
status: string;
|
|
36
|
+
}>): number;
|
|
37
|
+
export interface SupervisorDeps extends WorkerDeps {
|
|
38
|
+
legacy?: LegacyWorkerDeps;
|
|
39
|
+
embed?: EmbedWorkerDeps;
|
|
40
|
+
/** Skip the legacy pass (handy for tests that don't want to touch the legacy service). */
|
|
41
|
+
skipLegacy?: boolean;
|
|
42
|
+
/** Disable the embed pass (handy for tests that don't want to touch LanceDB). */
|
|
43
|
+
skipEmbed?: boolean;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* One cooperative pass:
|
|
47
|
+
* 1. Drain the inbox into sessions/SXXX/ shells
|
|
48
|
+
* 2. Drain transcribe jobs (audio/video → transcript.json)
|
|
49
|
+
* 3. Drain legacy-ingest jobs (PDF/DOCX/PPTX/CSV/MD/TXT/XLSX → legacy/<basename>.json)
|
|
50
|
+
* 4. Embed any newly-produced transcripts into LanceDB
|
|
51
|
+
*
|
|
52
|
+
* Order matters: embed runs LAST so it sees transcripts the transcribe and
|
|
53
|
+
* legacy passes just wrote. Transcribe and legacy are independent (different
|
|
54
|
+
* job kinds); we serialize them to keep the single Python service from
|
|
55
|
+
* saturating.
|
|
56
|
+
*/
|
|
57
|
+
export declare function runSupervisorOnce(seedPath: string, deps?: SupervisorDeps): Promise<SupervisorResult>;
|
|
58
|
+
/** Exponential backoff schedule (ms) for crash recovery, capped at maxAttempts. */
|
|
59
|
+
export declare function backoffSchedule(maxAttempts?: number, baseMs?: number): number[];
|
|
60
|
+
export interface RunLiveOptions extends WorkerDeps {
|
|
61
|
+
intervalMs?: number;
|
|
62
|
+
signal?: AbortSignal;
|
|
63
|
+
onError?: (loop: string, err: unknown) => void;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Live supervisor loop. Runs a pass every intervalMs until aborted. A pass that
|
|
67
|
+
* throws is retried with backoff up to 3 times before the loop re-arms on the
|
|
68
|
+
* next tick. (Headless-friendly; `compost watch` wires SIGINT to the signal.)
|
|
69
|
+
*/
|
|
70
|
+
export declare function runLive(seedPath: string, opts?: RunLiveOptions): Promise<void>;
|
|
71
|
+
//# sourceMappingURL=supervisor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"supervisor.d.ts","sourceRoot":"","sources":["../../src/loops/supervisor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,eAAe,EAAsB,MAAM,mBAAmB,CAAA;AAE5E,OAAO,EAAE,KAAK,gBAAgB,EAAuB,MAAM,oBAAoB,CAAA;AAC/E,OAAO,EAA2B,KAAK,UAAU,EAAE,MAAM,wBAAwB,CAAA;AAEjF,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAA;IAC7C,UAAU,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAA;IACjD,MAAM,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAA;IAC7C,KAAK,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,mBAAmB,EAAE,MAAM,CAAA;KAAE,CAAA;IAC1E;qFACiF;IACjF,QAAQ,EAAE,MAAM,EAAE,CAAA;CACnB;AAED,oFAAoF;AACpF,wBAAgB,cAAc,CAAC,CAAC,EAAE;IAAE,MAAM,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAE7D;AAED;;mBAEmB;AACnB,wBAAgB,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,GAAG,MAAM,CAE1F;AAED,MAAM,WAAW,cAAe,SAAQ,UAAU;IAChD,MAAM,CAAC,EAAE,gBAAgB,CAAA;IACzB,KAAK,CAAC,EAAE,eAAe,CAAA;IACvB,0FAA0F;IAC1F,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,iFAAiF;IACjF,SAAS,CAAC,EAAE,OAAO,CAAA;CACpB;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,IAAI,GAAE,cAAmB,GACxB,OAAO,CAAC,gBAAgB,CAAC,CAgD3B;AAQD,mFAAmF;AACnF,wBAAgB,eAAe,CAAC,WAAW,SAAI,EAAE,MAAM,SAAM,GAAG,MAAM,EAAE,CAEvE;AAED,MAAM,WAAW,cAAe,SAAQ,UAAU;IAChD,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,MAAM,CAAC,EAAE,WAAW,CAAA;IACpB,OAAO,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,OAAO,KAAK,IAAI,CAAA;CAC/C;AAED;;;;GAIG;AACH,wBAAsB,OAAO,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,IAAI,CAAC,CAkBxF"}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import { getLogPath, Logger } from '../logging.js';
|
|
2
|
+
import { runEmbedWorkerOnce } from './embed_worker.js';
|
|
3
|
+
import { processInbox } from './ingest_watcher.js';
|
|
4
|
+
import { runLegacyWorkerOnce } from './legacy_worker.js';
|
|
5
|
+
import { runTranscribeWorkerOnce } from './transcribe_worker.js';
|
|
6
|
+
/** A drained job whose status marks it as failed (vs ok / needs_speaker_labels). */
|
|
7
|
+
export function isFailedResult(r) {
|
|
8
|
+
return r.status === 'error' || r.status === 'failed_transcription';
|
|
9
|
+
}
|
|
10
|
+
/** Count DISTINCT failed jobs. A job retried up to MAX_ATTEMPTS within one pass
|
|
11
|
+
* produces several failed result rows, but it's one failed job — report jobs,
|
|
12
|
+
* not attempts. */
|
|
13
|
+
export function countFailedJobs(results) {
|
|
14
|
+
return new Set(results.filter(isFailedResult).map((r) => r.job_id)).size;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* One cooperative pass:
|
|
18
|
+
* 1. Drain the inbox into sessions/SXXX/ shells
|
|
19
|
+
* 2. Drain transcribe jobs (audio/video → transcript.json)
|
|
20
|
+
* 3. Drain legacy-ingest jobs (PDF/DOCX/PPTX/CSV/MD/TXT/XLSX → legacy/<basename>.json)
|
|
21
|
+
* 4. Embed any newly-produced transcripts into LanceDB
|
|
22
|
+
*
|
|
23
|
+
* Order matters: embed runs LAST so it sees transcripts the transcribe and
|
|
24
|
+
* legacy passes just wrote. Transcribe and legacy are independent (different
|
|
25
|
+
* job kinds); we serialize them to keep the single Python service from
|
|
26
|
+
* saturating.
|
|
27
|
+
*/
|
|
28
|
+
export async function runSupervisorOnce(seedPath, deps = {}) {
|
|
29
|
+
const logger = loopLogger(seedPath, 'supervisor');
|
|
30
|
+
const failures = [];
|
|
31
|
+
const inbox = processInbox(seedPath);
|
|
32
|
+
await logger.info('inbox drained', {
|
|
33
|
+
moved: inbox.moved.length,
|
|
34
|
+
unsupported: inbox.unsupported.length,
|
|
35
|
+
});
|
|
36
|
+
const worker = await runTranscribeWorkerOnce(seedPath, deps);
|
|
37
|
+
const transcribeFailed = countFailedJobs(worker.results);
|
|
38
|
+
if (transcribeFailed > 0)
|
|
39
|
+
failures.push(`transcribe: ${transcribeFailed} job(s) failed`);
|
|
40
|
+
await logger.info('transcribe drained', { processed: worker.processed, failed: transcribeFailed });
|
|
41
|
+
let legacy = { processed: 0, failed: 0 };
|
|
42
|
+
if (deps.skipLegacy !== true) {
|
|
43
|
+
try {
|
|
44
|
+
const result = await runLegacyWorkerOnce(seedPath, deps.legacy ?? {});
|
|
45
|
+
const failed = countFailedJobs(result.results);
|
|
46
|
+
legacy = { processed: result.processed, failed };
|
|
47
|
+
if (failed > 0)
|
|
48
|
+
failures.push(`legacy: ${failed} job(s) failed`);
|
|
49
|
+
await logger.info('legacy drained', legacy);
|
|
50
|
+
}
|
|
51
|
+
catch (err) {
|
|
52
|
+
// The whole legacy pass threw (e.g. service down) — surface it, don't block.
|
|
53
|
+
failures.push(`legacy: ${String(err)}`);
|
|
54
|
+
await logger.error('legacy failed', { error: String(err) });
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
let embed = { embedded: 0, inserted: 0, transcripts_scanned: 0 };
|
|
58
|
+
if (deps.skipEmbed !== true) {
|
|
59
|
+
try {
|
|
60
|
+
embed = await runEmbedWorkerOnce(seedPath, deps.embed ?? {});
|
|
61
|
+
await logger.info('embed drained', embed);
|
|
62
|
+
}
|
|
63
|
+
catch (err) {
|
|
64
|
+
// Embed failures must not block ingest/transcribe/legacy progress — surface + continue.
|
|
65
|
+
// Common cause: Ollama not running. Surfaced clearly by `compost-setup` (v0.1-07).
|
|
66
|
+
failures.push(`embed: ${String(err)}`);
|
|
67
|
+
await logger.error('embed failed', { error: String(err) });
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return {
|
|
71
|
+
inbox: { moved: inbox.moved.length, unsupported: inbox.unsupported.length },
|
|
72
|
+
transcribe: { processed: worker.processed, failed: transcribeFailed },
|
|
73
|
+
legacy,
|
|
74
|
+
embed,
|
|
75
|
+
failures,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
function loopLogger(seedPath, loop) {
|
|
79
|
+
const base = getLogPath(seedPath); // <seed>/.compost/logs/<date>.jsonl
|
|
80
|
+
const perLoop = base.replace(/[^/]+$/, `${loop}.jsonl`);
|
|
81
|
+
return new Logger(perLoop);
|
|
82
|
+
}
|
|
83
|
+
/** Exponential backoff schedule (ms) for crash recovery, capped at maxAttempts. */
|
|
84
|
+
export function backoffSchedule(maxAttempts = 3, baseMs = 500) {
|
|
85
|
+
return Array.from({ length: maxAttempts }, (_, i) => baseMs * 2 ** i);
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Live supervisor loop. Runs a pass every intervalMs until aborted. A pass that
|
|
89
|
+
* throws is retried with backoff up to 3 times before the loop re-arms on the
|
|
90
|
+
* next tick. (Headless-friendly; `compost watch` wires SIGINT to the signal.)
|
|
91
|
+
*/
|
|
92
|
+
export async function runLive(seedPath, opts = {}) {
|
|
93
|
+
const interval = opts.intervalMs ?? 2000;
|
|
94
|
+
const backoff = backoffSchedule();
|
|
95
|
+
while (opts.signal?.aborted !== true) {
|
|
96
|
+
let attempt = 0;
|
|
97
|
+
while (true) {
|
|
98
|
+
try {
|
|
99
|
+
await runSupervisorOnce(seedPath, opts);
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
catch (err) {
|
|
103
|
+
opts.onError?.('supervisor', err);
|
|
104
|
+
if (attempt >= backoff.length)
|
|
105
|
+
break;
|
|
106
|
+
await sleep(backoff[attempt], opts.signal);
|
|
107
|
+
attempt += 1;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
await sleep(interval, opts.signal);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
function sleep(ms, signal) {
|
|
114
|
+
return new Promise((resolve) => {
|
|
115
|
+
const t = setTimeout(resolve, ms);
|
|
116
|
+
signal?.addEventListener('abort', () => {
|
|
117
|
+
clearTimeout(t);
|
|
118
|
+
resolve();
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
//# sourceMappingURL=supervisor.js.map
|