@openparachute/vault 0.4.7-rc.1 → 0.4.8-rc.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -10
- package/core/src/connection-pragmas.test.ts +232 -0
- package/core/src/core.test.ts +257 -0
- package/core/src/cursor.test.ts +160 -0
- package/core/src/cursor.ts +272 -0
- package/core/src/mcp.ts +51 -7
- package/core/src/notes.ts +164 -2
- package/core/src/portable-md.test.ts +247 -0
- package/core/src/portable-md.ts +118 -1
- package/core/src/schema.ts +98 -2
- package/core/src/store.ts +11 -1
- package/core/src/types.ts +32 -0
- package/package.json +1 -1
- package/src/auth-status.ts +4 -0
- package/src/auto-transcribe.test.ts +116 -0
- package/src/auto-transcribe.ts +48 -0
- package/src/cli.ts +151 -50
- package/src/config.test.ts +26 -0
- package/src/config.ts +53 -1
- package/src/db.ts +15 -2
- package/src/export-watch.test.ts +99 -0
- package/src/mcp-install-interactive.test.ts +23 -2
- package/src/mcp-install-interactive.ts +21 -2
- package/src/mcp-install.test.ts +40 -0
- package/src/mcp-tools.ts +17 -1
- package/src/module-config.ts +70 -14
- package/src/module-manifest.test.ts +93 -0
- package/src/module-manifest.ts +94 -0
- package/src/routes.ts +267 -50
- package/src/scribe-discovery.test.ts +77 -0
- package/src/scribe-discovery.ts +91 -0
- package/src/scribe-env.test.ts +66 -1
- package/src/scribe-env.ts +42 -1
- package/src/self-register.test.ts +380 -0
- package/src/self-register.ts +234 -0
- package/src/server.ts +46 -11
- package/src/transcript-note.test.ts +171 -0
- package/src/transcript-note.ts +189 -0
- package/src/transcription-registry.ts +22 -0
- package/src/transcription-worker.test.ts +250 -0
- package/src/transcription-worker.ts +186 -27
- package/src/vault.test.ts +347 -0
package/src/routes.ts
CHANGED
|
@@ -45,6 +45,7 @@ import {
|
|
|
45
45
|
import { join, extname, normalize } from "path";
|
|
46
46
|
import { existsSync, mkdirSync, readFileSync, statSync, unlinkSync, writeFileSync } from "fs";
|
|
47
47
|
import { vaultDir } from "./config.ts";
|
|
48
|
+
import { shouldAutoTranscribe } from "./auto-transcribe.ts";
|
|
48
49
|
|
|
49
50
|
// ---------------------------------------------------------------------------
|
|
50
51
|
// Helpers
|
|
@@ -509,6 +510,21 @@ async function handleNotesInner(
|
|
|
509
510
|
return json(result);
|
|
510
511
|
}
|
|
511
512
|
|
|
513
|
+
// Cursor + full-text search is mutually exclusive (vault#313 reviewer).
|
|
514
|
+
// FTS owns its own ordering (relevance, not updated_at), so a cursor
|
|
515
|
+
// would skip rows. MCP rejects this combo at `core/src/mcp.ts`; REST
|
|
516
|
+
// would otherwise route into the `if (search)` branch below and
|
|
517
|
+
// silently drop the cursor. Reject here for surface parity.
|
|
518
|
+
if (search && parseQuery(url, "cursor")) {
|
|
519
|
+
return json(
|
|
520
|
+
{
|
|
521
|
+
error: "cursor is incompatible with full-text search — FTS has its own ordering. Use date_filter on updated_at for since-last-checked search.",
|
|
522
|
+
code: "INVALID_QUERY",
|
|
523
|
+
},
|
|
524
|
+
400,
|
|
525
|
+
);
|
|
526
|
+
}
|
|
527
|
+
|
|
512
528
|
// Full-text search
|
|
513
529
|
if (search) {
|
|
514
530
|
const searchTags = parseQueryList(url, "tag");
|
|
@@ -564,49 +580,74 @@ async function handleNotesInner(
|
|
|
564
580
|
const tags = parseQueryList(url, "tag");
|
|
565
581
|
const bracket = parseMetaBrackets(url);
|
|
566
582
|
if (bracket.error) return bracket.error;
|
|
583
|
+
// Opaque cursor for "since last checked" agent loops (vault#313).
|
|
584
|
+
// When present, switches the response shape to {notes, next_cursor}
|
|
585
|
+
// and routes through queryNotesPaged for keyset pagination. Mutually
|
|
586
|
+
// exclusive with the `near` graph-neighborhood scope (rebuilding the
|
|
587
|
+
// neighborhood per page isn't stable) — rejected below.
|
|
588
|
+
const cursorParam = parseQuery(url, "cursor");
|
|
589
|
+
const nearNoteIdEarly = parseQuery(url, "near[note_id]");
|
|
590
|
+
if (cursorParam && nearNoteIdEarly) {
|
|
591
|
+
return json(
|
|
592
|
+
{
|
|
593
|
+
error: "cursor is incompatible with near (graph neighborhood). Resolve the neighborhood first, then iterate with cursor over the resulting note set.",
|
|
594
|
+
code: "INVALID_QUERY",
|
|
595
|
+
},
|
|
596
|
+
400,
|
|
597
|
+
);
|
|
598
|
+
}
|
|
567
599
|
let results: Note[];
|
|
600
|
+
let nextCursor: string | null = null;
|
|
601
|
+
const queryOpts = {
|
|
602
|
+
tags,
|
|
603
|
+
tagMatch: (parseQuery(url, "tag_match") as "all" | "any") ?? (tags && tags.length > 1 ? "any" : undefined),
|
|
604
|
+
excludeTags: parseQueryList(url, "exclude_tag"),
|
|
605
|
+
hasTags: parseBoolOrUndef(parseQuery(url, "has_tags")),
|
|
606
|
+
hasLinks: parseBoolOrUndef(parseQuery(url, "has_links")),
|
|
607
|
+
path: parseQuery(url, "path") ?? undefined,
|
|
608
|
+
pathPrefix: parseQuery(url, "path_prefix") ?? undefined,
|
|
609
|
+
// Extension filter (vault#328). Accepts repeated `extension=`
|
|
610
|
+
// params for the array form: `?extension=csv&extension=yaml`.
|
|
611
|
+
// `parseQueryList` already returns undefined when no params
|
|
612
|
+
// are present, so the filter is silently skipped on a plain
|
|
613
|
+
// GET without the extension query.
|
|
614
|
+
extension: parseExtensionFilter(url),
|
|
615
|
+
metadata: bracket.metadata,
|
|
616
|
+
// Date-range precedence chain (highest to lowest):
|
|
617
|
+
// 1. Bracket-style `meta[created_at][gte]=…` (canonical).
|
|
618
|
+
// 2. Flat `date_field=…&date_from=…&date_to=…` (deprecated).
|
|
619
|
+
// 3. Legacy `date_from=…&date_to=…` (no date_field, deprecated)
|
|
620
|
+
// — filters on `n.created_at` by definition.
|
|
621
|
+
// The engine rejects combinations of `dateFilter` with the legacy
|
|
622
|
+
// `dateFrom`/`dateTo`, so we never set both shapes simultaneously.
|
|
623
|
+
...(bracket.dateFilter
|
|
624
|
+
? { dateFilter: bracket.dateFilter }
|
|
625
|
+
: parseQuery(url, "date_field")
|
|
626
|
+
? {
|
|
627
|
+
dateFilter: {
|
|
628
|
+
field: parseQuery(url, "date_field")!,
|
|
629
|
+
from: parseQuery(url, "date_from") ?? undefined,
|
|
630
|
+
to: parseQuery(url, "date_to") ?? undefined,
|
|
631
|
+
},
|
|
632
|
+
}
|
|
633
|
+
: {
|
|
634
|
+
dateFrom: parseQuery(url, "date_from") ?? undefined,
|
|
635
|
+
dateTo: parseQuery(url, "date_to") ?? undefined,
|
|
636
|
+
}),
|
|
637
|
+
sort: (parseQuery(url, "sort") as "asc" | "desc") ?? undefined,
|
|
638
|
+
orderBy: parseQuery(url, "order_by") ?? undefined,
|
|
639
|
+
limit: parseInt10(parseQuery(url, "limit")) ?? 50,
|
|
640
|
+
offset: parseInt10(parseQuery(url, "offset")),
|
|
641
|
+
cursor: cursorParam ?? undefined,
|
|
642
|
+
};
|
|
568
643
|
try {
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
pathPrefix: parseQuery(url, "path_prefix") ?? undefined,
|
|
577
|
-
// Extension filter (vault#328). Accepts repeated `extension=`
|
|
578
|
-
// params for the array form: `?extension=csv&extension=yaml`.
|
|
579
|
-
// `parseQueryList` already returns undefined when no params
|
|
580
|
-
// are present, so the filter is silently skipped on a plain
|
|
581
|
-
// GET without the extension query.
|
|
582
|
-
extension: parseExtensionFilter(url),
|
|
583
|
-
metadata: bracket.metadata,
|
|
584
|
-
// Date-range precedence chain (highest to lowest):
|
|
585
|
-
// 1. Bracket-style `meta[created_at][gte]=…` (canonical).
|
|
586
|
-
// 2. Flat `date_field=…&date_from=…&date_to=…` (deprecated).
|
|
587
|
-
// 3. Legacy `date_from=…&date_to=…` (no date_field, deprecated)
|
|
588
|
-
// — filters on `n.created_at` by definition.
|
|
589
|
-
// The engine rejects combinations of `dateFilter` with the legacy
|
|
590
|
-
// `dateFrom`/`dateTo`, so we never set both shapes simultaneously.
|
|
591
|
-
...(bracket.dateFilter
|
|
592
|
-
? { dateFilter: bracket.dateFilter }
|
|
593
|
-
: parseQuery(url, "date_field")
|
|
594
|
-
? {
|
|
595
|
-
dateFilter: {
|
|
596
|
-
field: parseQuery(url, "date_field")!,
|
|
597
|
-
from: parseQuery(url, "date_from") ?? undefined,
|
|
598
|
-
to: parseQuery(url, "date_to") ?? undefined,
|
|
599
|
-
},
|
|
600
|
-
}
|
|
601
|
-
: {
|
|
602
|
-
dateFrom: parseQuery(url, "date_from") ?? undefined,
|
|
603
|
-
dateTo: parseQuery(url, "date_to") ?? undefined,
|
|
604
|
-
}),
|
|
605
|
-
sort: (parseQuery(url, "sort") as "asc" | "desc") ?? undefined,
|
|
606
|
-
orderBy: parseQuery(url, "order_by") ?? undefined,
|
|
607
|
-
limit: parseInt10(parseQuery(url, "limit")) ?? 50,
|
|
608
|
-
offset: parseInt10(parseQuery(url, "offset")),
|
|
609
|
-
});
|
|
644
|
+
if (cursorParam) {
|
|
645
|
+
const page = await store.queryNotesPaged(queryOpts);
|
|
646
|
+
results = page.notes;
|
|
647
|
+
nextCursor = page.next_cursor;
|
|
648
|
+
} else {
|
|
649
|
+
results = await store.queryNotes(queryOpts);
|
|
650
|
+
}
|
|
610
651
|
} catch (e: any) {
|
|
611
652
|
// QueryError (non-indexed order_by, unknown operator, ...) surfaces
|
|
612
653
|
// here. Duck-type on `name` + `code` — core is a separate module, so
|
|
@@ -614,6 +655,14 @@ async function handleNotesInner(
|
|
|
614
655
|
if (e && e.name === "QueryError") {
|
|
615
656
|
return json({ error: e.message, code: e.code ?? "INVALID_QUERY" }, 400);
|
|
616
657
|
}
|
|
658
|
+
// CursorError carries a structured code (cursor_invalid /
|
|
659
|
+
// cursor_query_mismatch) so the agent loop can distinguish a
|
|
660
|
+
// malformed cursor from a hash-mismatch and react appropriately
|
|
661
|
+
// (the latter typically means the agent changed its filter and
|
|
662
|
+
// should drop the cursor + restart from scratch).
|
|
663
|
+
if (e && e.name === "CursorError") {
|
|
664
|
+
return json({ error: e.message, code: e.code ?? "cursor_invalid" }, 400);
|
|
665
|
+
}
|
|
617
666
|
throw e;
|
|
618
667
|
}
|
|
619
668
|
|
|
@@ -682,9 +731,14 @@ async function handleNotesInner(
|
|
|
682
731
|
if (includeAttachments) enriched.attachments = await store.getAttachments(n.id);
|
|
683
732
|
enrichedOut.push(enriched);
|
|
684
733
|
}
|
|
734
|
+
// Cursor mode wraps the list in {notes, next_cursor} so an agent
|
|
735
|
+
// loop can chain calls without tracking a watermark client-side.
|
|
736
|
+
// Legacy callers (no `cursor` param) still get the flat array.
|
|
737
|
+
if (cursorParam) return json({ notes: enrichedOut, next_cursor: nextCursor });
|
|
685
738
|
return json(enrichedOut);
|
|
686
739
|
}
|
|
687
740
|
|
|
741
|
+
if (cursorParam) return json({ notes: output, next_cursor: nextCursor });
|
|
688
742
|
return json(output);
|
|
689
743
|
}
|
|
690
744
|
|
|
@@ -813,19 +867,33 @@ async function handleNotesInner(
|
|
|
813
867
|
const body = await req.json() as { path: string; mimeType: string; transcribe?: boolean };
|
|
814
868
|
if (!body.path || !body.mimeType) return json({ error: "path and mimeType are required" }, 400);
|
|
815
869
|
|
|
816
|
-
//
|
|
817
|
-
//
|
|
818
|
-
//
|
|
819
|
-
//
|
|
820
|
-
//
|
|
821
|
-
//
|
|
822
|
-
|
|
823
|
-
|
|
870
|
+
// Decide whether to enqueue this attachment for transcription. Two paths:
|
|
871
|
+
//
|
|
872
|
+
// - **Explicit caller opt-in (legacy path, Lens flow):** `transcribe: true`
|
|
873
|
+
// on the POST. The note already has a `_Transcript pending._` stub the
|
|
874
|
+
// worker replaces on success — `transcribe_origin: "legacy"` preserves
|
|
875
|
+
// the stub-patching behavior.
|
|
876
|
+
// - **Auto-transcribe (vault#353):** mime-type is `audio/*` AND the
|
|
877
|
+
// operator has flipped `auto_transcribe.enabled = true` AND scribe is
|
|
878
|
+
// reachable. The caller didn't opt in explicitly; we infer from the
|
|
879
|
+
// audio mime-type. `transcribe_origin: "auto"` tells the worker to
|
|
880
|
+
// materialize a `<attachment-path>.transcript.md` note on completion.
|
|
881
|
+
//
|
|
882
|
+
// Explicit `transcribe: true` wins — if the caller asked, we honor that
|
|
883
|
+
// regardless of the auto-transcribe toggle (back-compat).
|
|
884
|
+
const explicitOptIn = body.transcribe === true;
|
|
885
|
+
const autoOptIn = !explicitOptIn && shouldAutoTranscribe(body.mimeType);
|
|
886
|
+
const attMeta = (explicitOptIn || autoOptIn)
|
|
887
|
+
? {
|
|
888
|
+
transcribe_status: "pending" as const,
|
|
889
|
+
transcribe_requested_at: new Date().toISOString(),
|
|
890
|
+
transcribe_origin: (explicitOptIn ? "legacy" : "auto") as "legacy" | "auto",
|
|
891
|
+
}
|
|
824
892
|
: undefined;
|
|
825
893
|
|
|
826
894
|
const attachment = await store.addAttachment(note.id, body.path, body.mimeType, attMeta);
|
|
827
895
|
|
|
828
|
-
if (
|
|
896
|
+
if (explicitOptIn) {
|
|
829
897
|
const noteMeta = (note.metadata as Record<string, unknown> | undefined) ?? {};
|
|
830
898
|
if (noteMeta.transcribe_stub !== true) {
|
|
831
899
|
await store.updateNote(note.id, {
|
|
@@ -874,6 +942,33 @@ async function handleNotesInner(
|
|
|
874
942
|
return json({ error: "Method not allowed" }, 405);
|
|
875
943
|
}
|
|
876
944
|
|
|
945
|
+
// POST /notes/:idOrPath/retry-transcription — vault#353 design Q5.
|
|
946
|
+
//
|
|
947
|
+
// Re-runs the auto-transcribe pipeline against the original audio
|
|
948
|
+
// attachment recorded in the transcript note's `transcript_attachment_id`
|
|
949
|
+
// frontmatter. Only valid on transcript notes (the target idOrPath must
|
|
950
|
+
// be a transcript note with `transcript_status: "failed"`); calling on
|
|
951
|
+
// anything else returns 400 with a clear reason.
|
|
952
|
+
//
|
|
953
|
+
// Wire shape:
|
|
954
|
+
// POST .../notes/<idOrPath>/retry-transcription
|
|
955
|
+
// → 202 { attachment_id, transcript_path } when re-enqueued
|
|
956
|
+
// 400 invalid_target (not a transcript note)
|
|
957
|
+
// 400 not_failed (transcript already succeeded; nothing to retry)
|
|
958
|
+
// 404 attachment_missing (transcript_attachment_id row deleted)
|
|
959
|
+
// 404 audio_missing (audio file unlinked from disk)
|
|
960
|
+
// 503 scribe_unavailable (no worker configured this boot)
|
|
961
|
+
if (sub === "/retry-transcription") {
|
|
962
|
+
if (method !== "POST") return json({ error: "Method not allowed" }, 405);
|
|
963
|
+
if (!vault) return json({ error: "Vault context required" }, 400);
|
|
964
|
+
const note = await resolveNote(store, idOrPath);
|
|
965
|
+
if (!note) return json({ error: "Not found" }, 404);
|
|
966
|
+
if (!noteWithinTagScope(note, tagScope.allowed, tagScope.raw)) {
|
|
967
|
+
return json({ error: "Not found" }, 404);
|
|
968
|
+
}
|
|
969
|
+
return handleRetryTranscription(store, note, vault);
|
|
970
|
+
}
|
|
971
|
+
|
|
877
972
|
if (sub !== "") return json({ error: "Not found" }, 404);
|
|
878
973
|
|
|
879
974
|
// GET /notes/:idOrPath — single note
|
|
@@ -1823,6 +1918,128 @@ ${rendered}
|
|
|
1823
1918
|
});
|
|
1824
1919
|
}
|
|
1825
1920
|
|
|
1921
|
+
// ---------------------------------------------------------------------------
|
|
1922
|
+
// Retry transcription (vault#353 design Q5)
|
|
1923
|
+
// ---------------------------------------------------------------------------
|
|
1924
|
+
|
|
1925
|
+
/**
|
|
1926
|
+
* Re-enqueue the original audio attachment for a `transcript_status: failed`
|
|
1927
|
+
* transcript note. Steps:
|
|
1928
|
+
*
|
|
1929
|
+
* 1. Validate target is a transcript note (`transcript_status` set in
|
|
1930
|
+
* metadata) AND that status is `failed`.
|
|
1931
|
+
* 2. Find the original audio attachment by id from
|
|
1932
|
+
* `transcript_attachment_id` frontmatter. 404 if the row's gone.
|
|
1933
|
+
* 3. Validate the audio file still exists on disk (retention=keep is
|
|
1934
|
+
* assumed by the retry contract; retention=until_transcribed unlinks
|
|
1935
|
+
* only on success, retention=never unlinks on failure — that last one
|
|
1936
|
+
* explicitly breaks retry, by design).
|
|
1937
|
+
* 4. Reset `transcribe_status = "pending"`, clear backoff + error fields.
|
|
1938
|
+
* The auto-origin marker is preserved so the worker writes a transcript
|
|
1939
|
+
* note (overwriting this one in place).
|
|
1940
|
+
* 5. Kick the worker if registered; otherwise the sweep picks it up.
|
|
1941
|
+
*/
|
|
1942
|
+
async function handleRetryTranscription(
|
|
1943
|
+
store: Store,
|
|
1944
|
+
note: Note,
|
|
1945
|
+
vault: string,
|
|
1946
|
+
): Promise<Response> {
|
|
1947
|
+
const meta = (note.metadata as Record<string, unknown> | undefined) ?? {};
|
|
1948
|
+
if (typeof meta.transcript_status !== "string") {
|
|
1949
|
+
return json(
|
|
1950
|
+
{
|
|
1951
|
+
error: "invalid_target",
|
|
1952
|
+
message: "Target note is not a transcript note (no transcript_status frontmatter).",
|
|
1953
|
+
},
|
|
1954
|
+
400,
|
|
1955
|
+
);
|
|
1956
|
+
}
|
|
1957
|
+
if (meta.transcript_status !== "failed") {
|
|
1958
|
+
return json(
|
|
1959
|
+
{
|
|
1960
|
+
error: "not_failed",
|
|
1961
|
+
message: `Transcript note status is "${meta.transcript_status}" — only failed transcripts can be retried.`,
|
|
1962
|
+
transcript_status: meta.transcript_status,
|
|
1963
|
+
},
|
|
1964
|
+
400,
|
|
1965
|
+
);
|
|
1966
|
+
}
|
|
1967
|
+
const attachmentId = typeof meta.transcript_attachment_id === "string"
|
|
1968
|
+
? meta.transcript_attachment_id
|
|
1969
|
+
: undefined;
|
|
1970
|
+
if (!attachmentId) {
|
|
1971
|
+
return json(
|
|
1972
|
+
{
|
|
1973
|
+
error: "missing_attachment_id",
|
|
1974
|
+
message: "Transcript note has no `transcript_attachment_id` — can't locate the original audio.",
|
|
1975
|
+
},
|
|
1976
|
+
400,
|
|
1977
|
+
);
|
|
1978
|
+
}
|
|
1979
|
+
const attachment = await store.getAttachment(attachmentId);
|
|
1980
|
+
if (!attachment) {
|
|
1981
|
+
return json(
|
|
1982
|
+
{
|
|
1983
|
+
error: "attachment_missing",
|
|
1984
|
+
message: `Original audio attachment ${attachmentId} no longer exists in the vault.`,
|
|
1985
|
+
},
|
|
1986
|
+
404,
|
|
1987
|
+
);
|
|
1988
|
+
}
|
|
1989
|
+
// Audio file existence + safety: defense-in-depth against a bad attachment
|
|
1990
|
+
// row pointing outside the vault assets dir. Same guard as the worker.
|
|
1991
|
+
const assetsRoot = assetsDir(vault);
|
|
1992
|
+
const audioFilePath = normalize(join(assetsRoot, attachment.path));
|
|
1993
|
+
if (!audioFilePath.startsWith(normalize(assetsRoot)) || !existsSync(audioFilePath)) {
|
|
1994
|
+
return json(
|
|
1995
|
+
{
|
|
1996
|
+
error: "audio_missing",
|
|
1997
|
+
message: `Original audio file at "${attachment.path}" no longer exists on disk.`,
|
|
1998
|
+
},
|
|
1999
|
+
404,
|
|
2000
|
+
);
|
|
2001
|
+
}
|
|
2002
|
+
|
|
2003
|
+
// Reset transcribe_status. Worker reads this row, sees "pending", processes
|
|
2004
|
+
// it. Preserve `transcribe_origin: "auto"` so the worker materializes the
|
|
2005
|
+
// transcript note (overwriting this failed note in place).
|
|
2006
|
+
const attMeta = { ...(attachment.metadata ?? {}) } as Record<string, unknown>;
|
|
2007
|
+
attMeta.transcribe_status = "pending";
|
|
2008
|
+
attMeta.transcribe_requested_at = new Date().toISOString();
|
|
2009
|
+
attMeta.transcribe_origin = "auto";
|
|
2010
|
+
delete attMeta.transcribe_backoff_until;
|
|
2011
|
+
delete attMeta.transcribe_error;
|
|
2012
|
+
delete attMeta.transcribe_error_code;
|
|
2013
|
+
delete attMeta.transcribe_attempts;
|
|
2014
|
+
await store.setAttachmentMetadata(attachment.id, attMeta);
|
|
2015
|
+
|
|
2016
|
+
// Kick the worker for an event-driven re-run (no 30s sweep wait). The
|
|
2017
|
+
// worker re-reads the row + processes immediately. If the worker isn't
|
|
2018
|
+
// registered (scribe not configured this boot), we still reset the row;
|
|
2019
|
+
// the next boot's sweep will pick it up. The 503 path is for callers that
|
|
2020
|
+
// want certainty — but for v0.6 the sweep guarantee is enough.
|
|
2021
|
+
const { getTranscriptionWorker } = await import("./transcription-registry.ts");
|
|
2022
|
+
const worker = getTranscriptionWorker();
|
|
2023
|
+
if (worker) {
|
|
2024
|
+
// Refresh the attachment after the metadata write so the worker's
|
|
2025
|
+
// in-process dedupe check sees pending.
|
|
2026
|
+
const fresh = await store.getAttachment(attachment.id) ?? attachment;
|
|
2027
|
+
// Fire-and-forget — the response shouldn't wait on transcription.
|
|
2028
|
+
void worker.kick(vault, fresh);
|
|
2029
|
+
}
|
|
2030
|
+
|
|
2031
|
+
return json(
|
|
2032
|
+
{
|
|
2033
|
+
status: "queued",
|
|
2034
|
+
attachment_id: attachment.id,
|
|
2035
|
+
attachment_path: attachment.path,
|
|
2036
|
+
transcript_note_id: note.id,
|
|
2037
|
+
worker: worker ? "kicked" : "sweep-only",
|
|
2038
|
+
},
|
|
2039
|
+
202,
|
|
2040
|
+
);
|
|
2041
|
+
}
|
|
2042
|
+
|
|
1826
2043
|
// ---------------------------------------------------------------------------
|
|
1827
2044
|
// Storage (file upload/serve) — kept as-is, Daily needs it
|
|
1828
2045
|
// ---------------------------------------------------------------------------
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for vault's scribe service-discovery (vault#353).
|
|
3
|
+
*
|
|
4
|
+
* Single decision site for "where does scribe live": env override, then
|
|
5
|
+
* `~/.parachute/services.json`. The cache layer is exercised separately
|
|
6
|
+
* so the resolution rule stays unit-testable without filesystem state.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, test, expect, beforeEach } from "bun:test";
|
|
10
|
+
import { resolveScribeUrl, clearScribeUrlCache } from "./scribe-discovery.ts";
|
|
11
|
+
|
|
12
|
+
function mkManifest(services: Array<{ name: string; port: number; origin?: string }>): typeof import("./services-manifest.ts").readManifest {
|
|
13
|
+
return () => ({
|
|
14
|
+
services: services.map((s) => ({
|
|
15
|
+
name: s.name,
|
|
16
|
+
port: s.port,
|
|
17
|
+
paths: [`/${s.name}`],
|
|
18
|
+
health: "/health",
|
|
19
|
+
version: "0.0.0-test",
|
|
20
|
+
...(s.origin ? { origin: s.origin } : {}),
|
|
21
|
+
})) as any,
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
beforeEach(() => {
|
|
26
|
+
clearScribeUrlCache();
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
describe("resolveScribeUrl", () => {
|
|
30
|
+
test("returns SCRIBE_URL env var (overrides services.json)", () => {
|
|
31
|
+
const env = { SCRIBE_URL: "http://example.test:9999" } as NodeJS.ProcessEnv;
|
|
32
|
+
const manifest = mkManifest([{ name: "parachute-scribe", port: 1943 }]);
|
|
33
|
+
expect(resolveScribeUrl(env, manifest)).toBe("http://example.test:9999");
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test("strips trailing slash from SCRIBE_URL env var", () => {
|
|
37
|
+
const env = { SCRIBE_URL: "http://example.test:9999/" } as NodeJS.ProcessEnv;
|
|
38
|
+
const manifest = mkManifest([]);
|
|
39
|
+
expect(resolveScribeUrl(env, manifest)).toBe("http://example.test:9999");
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test("falls back to services.json parachute-scribe entry", () => {
|
|
43
|
+
const env = {} as NodeJS.ProcessEnv;
|
|
44
|
+
const manifest = mkManifest([{ name: "parachute-scribe", port: 1943 }]);
|
|
45
|
+
expect(resolveScribeUrl(env, manifest)).toBe("http://127.0.0.1:1943");
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
test("honors explicit `origin` on the service entry (v0.7 shape)", () => {
|
|
49
|
+
const env = {} as NodeJS.ProcessEnv;
|
|
50
|
+
const manifest = mkManifest([
|
|
51
|
+
{ name: "parachute-scribe", port: 1943, origin: "https://scribe.cloud.example.com" },
|
|
52
|
+
]);
|
|
53
|
+
expect(resolveScribeUrl(env, manifest)).toBe("https://scribe.cloud.example.com");
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test("returns undefined when no env override AND no scribe entry", () => {
|
|
57
|
+
const env = {} as NodeJS.ProcessEnv;
|
|
58
|
+
const manifest = mkManifest([{ name: "parachute-vault", port: 1940 }]);
|
|
59
|
+
expect(resolveScribeUrl(env, manifest)).toBeUndefined();
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
test("returns undefined when manifest read throws", () => {
|
|
63
|
+
const env = {} as NodeJS.ProcessEnv;
|
|
64
|
+
const calls: unknown[][] = [];
|
|
65
|
+
const logger = { warn: (...args: unknown[]) => calls.push(args) };
|
|
66
|
+
const manifest = (() => { throw new Error("boom"); }) as unknown as Parameters<typeof resolveScribeUrl>[1];
|
|
67
|
+
expect(resolveScribeUrl(env, manifest, logger)).toBeUndefined();
|
|
68
|
+
expect(calls.length).toBe(1);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
test("trims whitespace-only SCRIBE_URL as unset", () => {
|
|
72
|
+
const env = { SCRIBE_URL: " " } as NodeJS.ProcessEnv;
|
|
73
|
+
const manifest = mkManifest([{ name: "parachute-scribe", port: 1943 }]);
|
|
74
|
+
// Whitespace-only env falls through to services.json.
|
|
75
|
+
expect(resolveScribeUrl(env, manifest)).toBe("http://127.0.0.1:1943");
|
|
76
|
+
});
|
|
77
|
+
});
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Service discovery for the scribe transcription module.
|
|
3
|
+
*
|
|
4
|
+
* Per the 2026-05-21 vault↔scribe design (Part 2, design question 2), vault
|
|
5
|
+
* locates scribe via `~/.parachute/services.json` — the canonical hub-
|
|
6
|
+
* maintained registry. This module is the single read site so the
|
|
7
|
+
* resolution rule lives in one place.
|
|
8
|
+
*
|
|
9
|
+
* Resolution order (first hit wins):
|
|
10
|
+
*
|
|
11
|
+
* 1. `SCRIBE_URL` env var (operator override; useful for tests, Docker
|
|
12
|
+
* compose, and any deploy where scribe runs at a non-loopback host).
|
|
13
|
+
* 2. Entry `name === "parachute-scribe"` in `~/.parachute/services.json`
|
|
14
|
+
* → construct `http://127.0.0.1:<port>`.
|
|
15
|
+
* 3. `undefined` (auto-transcribe stays a no-op).
|
|
16
|
+
*
|
|
17
|
+
* The bearer token resolution stays in `./scribe-env.ts:resolveScribeAuthToken`.
|
|
18
|
+
* Service discovery is just about WHERE scribe lives; AUTH is a separate
|
|
19
|
+
* concern with its own env-var precedence (SCRIBE_AUTH_TOKEN over the legacy
|
|
20
|
+
* SCRIBE_TOKEN). When the v0.7 hub-issued-JWT path lands, the bearer source
|
|
21
|
+
* changes but the URL source stays the same — one file, one concern.
|
|
22
|
+
*
|
|
23
|
+
* v0.6 deploy is single-container (hub-as-supervisor) so loopback is fine.
|
|
24
|
+
* v0.7 cloud-multi-container will grow an `origin` field on the services.json
|
|
25
|
+
* entry; this resolver will honor it without API changes — `port` becomes
|
|
26
|
+
* a fallback when `origin` isn't set, no breaking change for v0.6 callers.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { readManifest, ServicesManifestError } from "./services-manifest.ts";
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Resolve the scribe base URL (no trailing slash) by consulting the env-var
|
|
33
|
+
* override first, then services.json. Returns `undefined` when scribe isn't
|
|
34
|
+
* configured — callers MUST treat that as "auto-transcribe disabled."
|
|
35
|
+
*
|
|
36
|
+
* The `env` + `readManifestImpl` parameters are injection seams for tests;
|
|
37
|
+
* production callers omit them and pick up `process.env` + the real
|
|
38
|
+
* `~/.parachute/services.json`.
|
|
39
|
+
*/
|
|
40
|
+
export function resolveScribeUrl(
|
|
41
|
+
env: NodeJS.ProcessEnv = process.env,
|
|
42
|
+
readManifestImpl: typeof readManifest = readManifest,
|
|
43
|
+
logger: { warn?: (...args: unknown[]) => void } = console,
|
|
44
|
+
): string | undefined {
|
|
45
|
+
const override = env.SCRIBE_URL?.trim();
|
|
46
|
+
if (override) return override.replace(/\/$/, "");
|
|
47
|
+
|
|
48
|
+
let manifest;
|
|
49
|
+
try {
|
|
50
|
+
manifest = readManifestImpl();
|
|
51
|
+
} catch (err) {
|
|
52
|
+
if (err instanceof ServicesManifestError) {
|
|
53
|
+
logger.warn?.(`[scribe-discovery] services.json unreadable: ${err.message}`);
|
|
54
|
+
} else {
|
|
55
|
+
logger.warn?.(`[scribe-discovery] services.json read failed: ${err}`);
|
|
56
|
+
}
|
|
57
|
+
return undefined;
|
|
58
|
+
}
|
|
59
|
+
const entry = manifest.services.find((s) => s.name === "parachute-scribe");
|
|
60
|
+
if (!entry) return undefined;
|
|
61
|
+
// v0.6 loopback shape; v0.7 will add an explicit `origin` field on the
|
|
62
|
+
// service entry which wins over loopback when present.
|
|
63
|
+
const origin = (entry as { origin?: string }).origin;
|
|
64
|
+
if (typeof origin === "string" && origin.trim()) {
|
|
65
|
+
return origin.trim().replace(/\/$/, "");
|
|
66
|
+
}
|
|
67
|
+
return `http://127.0.0.1:${entry.port}`;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Process-lifetime cache. Computed at first call (typically during server
|
|
72
|
+
* boot), reused for every subsequent transcription request. Operators who
|
|
73
|
+
* change the scribe URL via `services.json` (re-install of scribe with a
|
|
74
|
+
* different port) need to restart vault; we deliberately don't watch the
|
|
75
|
+
* file because the v0.6 deploy model has a single restart-on-change story.
|
|
76
|
+
*
|
|
77
|
+
* Tests should pass an explicit `env` + `readManifestImpl` to `resolveScribeUrl`
|
|
78
|
+
* directly to bypass the cache.
|
|
79
|
+
*/
|
|
80
|
+
let cachedScribeUrl: string | undefined | null = null;
|
|
81
|
+
|
|
82
|
+
export function getCachedScribeUrl(): string | undefined {
|
|
83
|
+
if (cachedScribeUrl === null) {
|
|
84
|
+
cachedScribeUrl = resolveScribeUrl();
|
|
85
|
+
}
|
|
86
|
+
return cachedScribeUrl;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export function clearScribeUrlCache(): void {
|
|
90
|
+
cachedScribeUrl = null;
|
|
91
|
+
}
|
package/src/scribe-env.test.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import { resolveScribeAuthToken } from "./scribe-env.ts";
|
|
2
|
+
import { resolveScribeAuthToken, generateScribeBearer, ensureScribeBearer } from "./scribe-env.ts";
|
|
3
3
|
|
|
4
4
|
function captureWarn() {
|
|
5
5
|
const calls: unknown[][] = [];
|
|
@@ -47,3 +47,68 @@ describe("resolveScribeAuthToken", () => {
|
|
|
47
47
|
expect(calls.length).toBe(0);
|
|
48
48
|
});
|
|
49
49
|
});
|
|
50
|
+
|
|
51
|
+
describe("generateScribeBearer (vault#353)", () => {
|
|
52
|
+
test("returns 32-byte base64url string (~43 chars, no padding)", () => {
|
|
53
|
+
const bearer = generateScribeBearer();
|
|
54
|
+
// 32 bytes base64url-encoded = 43 chars (no `=` padding in base64url).
|
|
55
|
+
expect(bearer.length).toBe(43);
|
|
56
|
+
expect(bearer).toMatch(/^[A-Za-z0-9_-]+$/);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test("each call yields a unique value", () => {
|
|
60
|
+
const a = generateScribeBearer();
|
|
61
|
+
const b = generateScribeBearer();
|
|
62
|
+
expect(a).not.toBe(b);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
describe("ensureScribeBearer (vault#353)", () => {
|
|
67
|
+
test("generates + persists a bearer when neither env var is set", () => {
|
|
68
|
+
const env: Record<string, string> = {};
|
|
69
|
+
const writes: Array<[string, string]> = [];
|
|
70
|
+
const { created, token } = ensureScribeBearer(
|
|
71
|
+
() => ({ ...env }),
|
|
72
|
+
(k, v) => writes.push([k, v]),
|
|
73
|
+
);
|
|
74
|
+
expect(created).toBe(true);
|
|
75
|
+
expect(token.length).toBe(43);
|
|
76
|
+
expect(writes).toEqual([["SCRIBE_AUTH_TOKEN", token]]);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test("preserves existing SCRIBE_AUTH_TOKEN (idempotent)", () => {
|
|
80
|
+
const env: Record<string, string> = { SCRIBE_AUTH_TOKEN: "already-set" };
|
|
81
|
+
const writes: Array<[string, string]> = [];
|
|
82
|
+
const { created, token } = ensureScribeBearer(
|
|
83
|
+
() => ({ ...env }),
|
|
84
|
+
(k, v) => writes.push([k, v]),
|
|
85
|
+
);
|
|
86
|
+
expect(created).toBe(false);
|
|
87
|
+
expect(token).toBe("already-set");
|
|
88
|
+
expect(writes.length).toBe(0);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test("preserves legacy SCRIBE_TOKEN without rewriting it", () => {
|
|
92
|
+
const env: Record<string, string> = { SCRIBE_TOKEN: "legacy" };
|
|
93
|
+
const writes: Array<[string, string]> = [];
|
|
94
|
+
const { created, token } = ensureScribeBearer(
|
|
95
|
+
() => ({ ...env }),
|
|
96
|
+
(k, v) => writes.push([k, v]),
|
|
97
|
+
);
|
|
98
|
+
expect(created).toBe(false);
|
|
99
|
+
expect(token).toBe("legacy");
|
|
100
|
+
expect(writes.length).toBe(0);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
test("treats whitespace-only env value as unset (generates fresh)", () => {
|
|
104
|
+
const env: Record<string, string> = { SCRIBE_AUTH_TOKEN: " " };
|
|
105
|
+
const writes: Array<[string, string]> = [];
|
|
106
|
+
const { created, token } = ensureScribeBearer(
|
|
107
|
+
() => ({ ...env }),
|
|
108
|
+
(k, v) => writes.push([k, v]),
|
|
109
|
+
);
|
|
110
|
+
expect(created).toBe(true);
|
|
111
|
+
expect(token.length).toBe(43);
|
|
112
|
+
expect(writes[0]?.[0]).toBe("SCRIBE_AUTH_TOKEN");
|
|
113
|
+
});
|
|
114
|
+
});
|