@openparachute/vault 0.4.7-rc.1 → 0.4.8-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +44 -10
  2. package/core/src/connection-pragmas.test.ts +232 -0
  3. package/core/src/core.test.ts +257 -0
  4. package/core/src/cursor.test.ts +160 -0
  5. package/core/src/cursor.ts +272 -0
  6. package/core/src/mcp.ts +51 -7
  7. package/core/src/notes.ts +164 -2
  8. package/core/src/portable-md.test.ts +247 -0
  9. package/core/src/portable-md.ts +118 -1
  10. package/core/src/schema.ts +98 -2
  11. package/core/src/store.ts +11 -1
  12. package/core/src/types.ts +32 -0
  13. package/package.json +1 -1
  14. package/src/auth-status.ts +4 -0
  15. package/src/auto-transcribe.test.ts +116 -0
  16. package/src/auto-transcribe.ts +48 -0
  17. package/src/cli.ts +151 -50
  18. package/src/config.test.ts +26 -0
  19. package/src/config.ts +53 -1
  20. package/src/db.ts +15 -2
  21. package/src/export-watch.test.ts +99 -0
  22. package/src/mcp-install-interactive.test.ts +23 -2
  23. package/src/mcp-install-interactive.ts +21 -2
  24. package/src/mcp-install.test.ts +40 -0
  25. package/src/mcp-tools.ts +17 -1
  26. package/src/module-config.ts +70 -14
  27. package/src/module-manifest.test.ts +93 -0
  28. package/src/module-manifest.ts +94 -0
  29. package/src/routes.ts +267 -50
  30. package/src/scribe-discovery.test.ts +77 -0
  31. package/src/scribe-discovery.ts +91 -0
  32. package/src/scribe-env.test.ts +66 -1
  33. package/src/scribe-env.ts +42 -1
  34. package/src/self-register.test.ts +380 -0
  35. package/src/self-register.ts +234 -0
  36. package/src/server.ts +46 -11
  37. package/src/transcript-note.test.ts +171 -0
  38. package/src/transcript-note.ts +189 -0
  39. package/src/transcription-registry.ts +22 -0
  40. package/src/transcription-worker.test.ts +250 -0
  41. package/src/transcription-worker.ts +186 -27
  42. package/src/vault.test.ts +347 -0
package/src/routes.ts CHANGED
@@ -45,6 +45,7 @@ import {
45
45
  import { join, extname, normalize } from "path";
46
46
  import { existsSync, mkdirSync, readFileSync, statSync, unlinkSync, writeFileSync } from "fs";
47
47
  import { vaultDir } from "./config.ts";
48
+ import { shouldAutoTranscribe } from "./auto-transcribe.ts";
48
49
 
49
50
  // ---------------------------------------------------------------------------
50
51
  // Helpers
@@ -509,6 +510,21 @@ async function handleNotesInner(
509
510
  return json(result);
510
511
  }
511
512
 
513
+ // Cursor + full-text search is mutually exclusive (vault#313 reviewer).
514
+ // FTS owns its own ordering (relevance, not updated_at), so a cursor
515
+ // would skip rows. MCP rejects this combo at `core/src/mcp.ts`; REST
516
+ // would otherwise route into the `if (search)` branch below and
517
+ // silently drop the cursor. Reject here for surface parity.
518
+ if (search && parseQuery(url, "cursor")) {
519
+ return json(
520
+ {
521
+ error: "cursor is incompatible with full-text search — FTS has its own ordering. Use date_filter on updated_at for since-last-checked search.",
522
+ code: "INVALID_QUERY",
523
+ },
524
+ 400,
525
+ );
526
+ }
527
+
512
528
  // Full-text search
513
529
  if (search) {
514
530
  const searchTags = parseQueryList(url, "tag");
@@ -564,49 +580,74 @@ async function handleNotesInner(
564
580
  const tags = parseQueryList(url, "tag");
565
581
  const bracket = parseMetaBrackets(url);
566
582
  if (bracket.error) return bracket.error;
583
+ // Opaque cursor for "since last checked" agent loops (vault#313).
584
+ // When present, switches the response shape to {notes, next_cursor}
585
+ // and routes through queryNotesPaged for keyset pagination. Mutually
586
+ // exclusive with the `near` graph-neighborhood scope (rebuilding the
587
+ // neighborhood per page isn't stable) — rejected below.
588
+ const cursorParam = parseQuery(url, "cursor");
589
+ const nearNoteIdEarly = parseQuery(url, "near[note_id]");
590
+ if (cursorParam && nearNoteIdEarly) {
591
+ return json(
592
+ {
593
+ error: "cursor is incompatible with near (graph neighborhood). Resolve the neighborhood first, then iterate with cursor over the resulting note set.",
594
+ code: "INVALID_QUERY",
595
+ },
596
+ 400,
597
+ );
598
+ }
567
599
  let results: Note[];
600
+ let nextCursor: string | null = null;
601
+ const queryOpts = {
602
+ tags,
603
+ tagMatch: (parseQuery(url, "tag_match") as "all" | "any") ?? (tags && tags.length > 1 ? "any" : undefined),
604
+ excludeTags: parseQueryList(url, "exclude_tag"),
605
+ hasTags: parseBoolOrUndef(parseQuery(url, "has_tags")),
606
+ hasLinks: parseBoolOrUndef(parseQuery(url, "has_links")),
607
+ path: parseQuery(url, "path") ?? undefined,
608
+ pathPrefix: parseQuery(url, "path_prefix") ?? undefined,
609
+ // Extension filter (vault#328). Accepts repeated `extension=`
610
+ // params for the array form: `?extension=csv&extension=yaml`.
611
+ // `parseQueryList` already returns undefined when no params
612
+ // are present, so the filter is silently skipped on a plain
613
+ // GET without the extension query.
614
+ extension: parseExtensionFilter(url),
615
+ metadata: bracket.metadata,
616
+ // Date-range precedence chain (highest to lowest):
617
+ // 1. Bracket-style `meta[created_at][gte]=…` (canonical).
618
+ // 2. Flat `date_field=…&date_from=…&date_to=…` (deprecated).
619
+ // 3. Legacy `date_from=…&date_to=…` (no date_field, deprecated)
620
+ // — filters on `n.created_at` by definition.
621
+ // The engine rejects combinations of `dateFilter` with the legacy
622
+ // `dateFrom`/`dateTo`, so we never set both shapes simultaneously.
623
+ ...(bracket.dateFilter
624
+ ? { dateFilter: bracket.dateFilter }
625
+ : parseQuery(url, "date_field")
626
+ ? {
627
+ dateFilter: {
628
+ field: parseQuery(url, "date_field")!,
629
+ from: parseQuery(url, "date_from") ?? undefined,
630
+ to: parseQuery(url, "date_to") ?? undefined,
631
+ },
632
+ }
633
+ : {
634
+ dateFrom: parseQuery(url, "date_from") ?? undefined,
635
+ dateTo: parseQuery(url, "date_to") ?? undefined,
636
+ }),
637
+ sort: (parseQuery(url, "sort") as "asc" | "desc") ?? undefined,
638
+ orderBy: parseQuery(url, "order_by") ?? undefined,
639
+ limit: parseInt10(parseQuery(url, "limit")) ?? 50,
640
+ offset: parseInt10(parseQuery(url, "offset")),
641
+ cursor: cursorParam ?? undefined,
642
+ };
568
643
  try {
569
- results = await store.queryNotes({
570
- tags,
571
- tagMatch: (parseQuery(url, "tag_match") as "all" | "any") ?? (tags && tags.length > 1 ? "any" : undefined),
572
- excludeTags: parseQueryList(url, "exclude_tag"),
573
- hasTags: parseBoolOrUndef(parseQuery(url, "has_tags")),
574
- hasLinks: parseBoolOrUndef(parseQuery(url, "has_links")),
575
- path: parseQuery(url, "path") ?? undefined,
576
- pathPrefix: parseQuery(url, "path_prefix") ?? undefined,
577
- // Extension filter (vault#328). Accepts repeated `extension=`
578
- // params for the array form: `?extension=csv&extension=yaml`.
579
- // `parseQueryList` already returns undefined when no params
580
- // are present, so the filter is silently skipped on a plain
581
- // GET without the extension query.
582
- extension: parseExtensionFilter(url),
583
- metadata: bracket.metadata,
584
- // Date-range precedence chain (highest to lowest):
585
- // 1. Bracket-style `meta[created_at][gte]=…` (canonical).
586
- // 2. Flat `date_field=…&date_from=…&date_to=…` (deprecated).
587
- // 3. Legacy `date_from=…&date_to=…` (no date_field, deprecated)
588
- // — filters on `n.created_at` by definition.
589
- // The engine rejects combinations of `dateFilter` with the legacy
590
- // `dateFrom`/`dateTo`, so we never set both shapes simultaneously.
591
- ...(bracket.dateFilter
592
- ? { dateFilter: bracket.dateFilter }
593
- : parseQuery(url, "date_field")
594
- ? {
595
- dateFilter: {
596
- field: parseQuery(url, "date_field")!,
597
- from: parseQuery(url, "date_from") ?? undefined,
598
- to: parseQuery(url, "date_to") ?? undefined,
599
- },
600
- }
601
- : {
602
- dateFrom: parseQuery(url, "date_from") ?? undefined,
603
- dateTo: parseQuery(url, "date_to") ?? undefined,
604
- }),
605
- sort: (parseQuery(url, "sort") as "asc" | "desc") ?? undefined,
606
- orderBy: parseQuery(url, "order_by") ?? undefined,
607
- limit: parseInt10(parseQuery(url, "limit")) ?? 50,
608
- offset: parseInt10(parseQuery(url, "offset")),
609
- });
644
+ if (cursorParam) {
645
+ const page = await store.queryNotesPaged(queryOpts);
646
+ results = page.notes;
647
+ nextCursor = page.next_cursor;
648
+ } else {
649
+ results = await store.queryNotes(queryOpts);
650
+ }
610
651
  } catch (e: any) {
611
652
  // QueryError (non-indexed order_by, unknown operator, ...) surfaces
612
653
  // here. Duck-type on `name` + `code` — core is a separate module, so
@@ -614,6 +655,14 @@ async function handleNotesInner(
614
655
  if (e && e.name === "QueryError") {
615
656
  return json({ error: e.message, code: e.code ?? "INVALID_QUERY" }, 400);
616
657
  }
658
+ // CursorError carries a structured code (cursor_invalid /
659
+ // cursor_query_mismatch) so the agent loop can distinguish a
660
+ // malformed cursor from a hash-mismatch and react appropriately
661
+ // (the latter typically means the agent changed its filter and
662
+ // should drop the cursor + restart from scratch).
663
+ if (e && e.name === "CursorError") {
664
+ return json({ error: e.message, code: e.code ?? "cursor_invalid" }, 400);
665
+ }
617
666
  throw e;
618
667
  }
619
668
 
@@ -682,9 +731,14 @@ async function handleNotesInner(
682
731
  if (includeAttachments) enriched.attachments = await store.getAttachments(n.id);
683
732
  enrichedOut.push(enriched);
684
733
  }
734
+ // Cursor mode wraps the list in {notes, next_cursor} so an agent
735
+ // loop can chain calls without tracking a watermark client-side.
736
+ // Legacy callers (no `cursor` param) still get the flat array.
737
+ if (cursorParam) return json({ notes: enrichedOut, next_cursor: nextCursor });
685
738
  return json(enrichedOut);
686
739
  }
687
740
 
741
+ if (cursorParam) return json({ notes: output, next_cursor: nextCursor });
688
742
  return json(output);
689
743
  }
690
744
 
@@ -813,19 +867,33 @@ async function handleNotesInner(
813
867
  const body = await req.json() as { path: string; mimeType: string; transcribe?: boolean };
814
868
  if (!body.path || !body.mimeType) return json({ error: "path and mimeType are required" }, 400);
815
869
 
816
- // `transcribe: true` asks the transcription worker to read this audio
817
- // file and replace the note's content with the transcript. The caller
818
- // is declaring "overwrite my current content when the transcript lands"
819
- // we persist that as `transcribe_stub: true` on the note so a later
820
- // user edit (which clears the marker) can opt out before the worker
821
- // runs.
822
- const attMeta = body.transcribe
823
- ? { transcribe_status: "pending" as const, transcribe_requested_at: new Date().toISOString() }
870
+ // Decide whether to enqueue this attachment for transcription. Two paths:
871
+ //
872
+ // - **Explicit caller opt-in (legacy path, Lens flow):** `transcribe: true`
873
+ // on the POST. The note already has a `_Transcript pending._` stub the
874
+ // worker replaces on success `transcribe_origin: "legacy"` preserves
875
+ // the stub-patching behavior.
876
+ // - **Auto-transcribe (vault#353):** mime-type is `audio/*` AND the
877
+ // operator has flipped `auto_transcribe.enabled = true` AND scribe is
878
+ // reachable. The caller didn't opt in explicitly; we infer from the
879
+ // audio mime-type. `transcribe_origin: "auto"` tells the worker to
880
+ // materialize a `<attachment-path>.transcript.md` note on completion.
881
+ //
882
+ // Explicit `transcribe: true` wins — if the caller asked, we honor that
883
+ // regardless of the auto-transcribe toggle (back-compat).
884
+ const explicitOptIn = body.transcribe === true;
885
+ const autoOptIn = !explicitOptIn && shouldAutoTranscribe(body.mimeType);
886
+ const attMeta = (explicitOptIn || autoOptIn)
887
+ ? {
888
+ transcribe_status: "pending" as const,
889
+ transcribe_requested_at: new Date().toISOString(),
890
+ transcribe_origin: (explicitOptIn ? "legacy" : "auto") as "legacy" | "auto",
891
+ }
824
892
  : undefined;
825
893
 
826
894
  const attachment = await store.addAttachment(note.id, body.path, body.mimeType, attMeta);
827
895
 
828
- if (body.transcribe) {
896
+ if (explicitOptIn) {
829
897
  const noteMeta = (note.metadata as Record<string, unknown> | undefined) ?? {};
830
898
  if (noteMeta.transcribe_stub !== true) {
831
899
  await store.updateNote(note.id, {
@@ -874,6 +942,33 @@ async function handleNotesInner(
874
942
  return json({ error: "Method not allowed" }, 405);
875
943
  }
876
944
 
945
+ // POST /notes/:idOrPath/retry-transcription — vault#353 design Q5.
946
+ //
947
+ // Re-runs the auto-transcribe pipeline against the original audio
948
+ // attachment recorded in the transcript note's `transcript_attachment_id`
949
+ // frontmatter. Only valid on transcript notes (the target idOrPath must
950
+ // be a transcript note with `transcript_status: "failed"`); calling on
951
+ // anything else returns 400 with a clear reason.
952
+ //
953
+ // Wire shape:
954
+ // POST .../notes/<idOrPath>/retry-transcription
955
+ // → 202 { attachment_id, transcript_path } when re-enqueued
956
+ // 400 invalid_target (not a transcript note)
957
+ // 400 not_failed (transcript already succeeded; nothing to retry)
958
+ // 404 attachment_missing (transcript_attachment_id row deleted)
959
+ // 404 audio_missing (audio file unlinked from disk)
960
+ // 503 scribe_unavailable (no worker configured this boot)
961
+ if (sub === "/retry-transcription") {
962
+ if (method !== "POST") return json({ error: "Method not allowed" }, 405);
963
+ if (!vault) return json({ error: "Vault context required" }, 400);
964
+ const note = await resolveNote(store, idOrPath);
965
+ if (!note) return json({ error: "Not found" }, 404);
966
+ if (!noteWithinTagScope(note, tagScope.allowed, tagScope.raw)) {
967
+ return json({ error: "Not found" }, 404);
968
+ }
969
+ return handleRetryTranscription(store, note, vault);
970
+ }
971
+
877
972
  if (sub !== "") return json({ error: "Not found" }, 404);
878
973
 
879
974
  // GET /notes/:idOrPath — single note
@@ -1823,6 +1918,128 @@ ${rendered}
1823
1918
  });
1824
1919
  }
1825
1920
 
1921
+ // ---------------------------------------------------------------------------
1922
+ // Retry transcription (vault#353 design Q5)
1923
+ // ---------------------------------------------------------------------------
1924
+
1925
+ /**
1926
+ * Re-enqueue the original audio attachment for a `transcript_status: failed`
1927
+ * transcript note. Steps:
1928
+ *
1929
+ * 1. Validate target is a transcript note (`transcript_status` set in
1930
+ * metadata) AND that status is `failed`.
1931
+ * 2. Find the original audio attachment by id from
1932
+ * `transcript_attachment_id` frontmatter. 404 if the row's gone.
1933
+ * 3. Validate the audio file still exists on disk (retention=keep is
1934
+ * assumed by the retry contract; retention=until_transcribed unlinks
1935
+ * only on success, retention=never unlinks on failure — that last one
1936
+ * explicitly breaks retry, by design).
1937
+ * 4. Reset `transcribe_status = "pending"`, clear backoff + error fields.
1938
+ * The auto-origin marker is preserved so the worker writes a transcript
1939
+ * note (overwriting this one in place).
1940
+ * 5. Kick the worker if registered; otherwise the sweep picks it up.
1941
+ */
1942
+ async function handleRetryTranscription(
1943
+ store: Store,
1944
+ note: Note,
1945
+ vault: string,
1946
+ ): Promise<Response> {
1947
+ const meta = (note.metadata as Record<string, unknown> | undefined) ?? {};
1948
+ if (typeof meta.transcript_status !== "string") {
1949
+ return json(
1950
+ {
1951
+ error: "invalid_target",
1952
+ message: "Target note is not a transcript note (no transcript_status frontmatter).",
1953
+ },
1954
+ 400,
1955
+ );
1956
+ }
1957
+ if (meta.transcript_status !== "failed") {
1958
+ return json(
1959
+ {
1960
+ error: "not_failed",
1961
+ message: `Transcript note status is "${meta.transcript_status}" — only failed transcripts can be retried.`,
1962
+ transcript_status: meta.transcript_status,
1963
+ },
1964
+ 400,
1965
+ );
1966
+ }
1967
+ const attachmentId = typeof meta.transcript_attachment_id === "string"
1968
+ ? meta.transcript_attachment_id
1969
+ : undefined;
1970
+ if (!attachmentId) {
1971
+ return json(
1972
+ {
1973
+ error: "missing_attachment_id",
1974
+ message: "Transcript note has no `transcript_attachment_id` — can't locate the original audio.",
1975
+ },
1976
+ 400,
1977
+ );
1978
+ }
1979
+ const attachment = await store.getAttachment(attachmentId);
1980
+ if (!attachment) {
1981
+ return json(
1982
+ {
1983
+ error: "attachment_missing",
1984
+ message: `Original audio attachment ${attachmentId} no longer exists in the vault.`,
1985
+ },
1986
+ 404,
1987
+ );
1988
+ }
1989
+ // Audio file existence + safety: defense-in-depth against a bad attachment
1990
+ // row pointing outside the vault assets dir. Same guard as the worker.
1991
+ const assetsRoot = assetsDir(vault);
1992
+ const audioFilePath = normalize(join(assetsRoot, attachment.path));
1993
+ if (!audioFilePath.startsWith(normalize(assetsRoot)) || !existsSync(audioFilePath)) {
1994
+ return json(
1995
+ {
1996
+ error: "audio_missing",
1997
+ message: `Original audio file at "${attachment.path}" no longer exists on disk.`,
1998
+ },
1999
+ 404,
2000
+ );
2001
+ }
2002
+
2003
+ // Reset transcribe_status. Worker reads this row, sees "pending", processes
2004
+ // it. Preserve `transcribe_origin: "auto"` so the worker materializes the
2005
+ // transcript note (overwriting this failed note in place).
2006
+ const attMeta = { ...(attachment.metadata ?? {}) } as Record<string, unknown>;
2007
+ attMeta.transcribe_status = "pending";
2008
+ attMeta.transcribe_requested_at = new Date().toISOString();
2009
+ attMeta.transcribe_origin = "auto";
2010
+ delete attMeta.transcribe_backoff_until;
2011
+ delete attMeta.transcribe_error;
2012
+ delete attMeta.transcribe_error_code;
2013
+ delete attMeta.transcribe_attempts;
2014
+ await store.setAttachmentMetadata(attachment.id, attMeta);
2015
+
2016
+ // Kick the worker for an event-driven re-run (no 30s sweep wait). The
2017
+ // worker re-reads the row + processes immediately. If the worker isn't
2018
+ // registered (scribe not configured this boot), we still reset the row;
2019
+ // the next boot's sweep will pick it up. The 503 path is for callers that
2020
+ // want certainty — but for v0.6 the sweep guarantee is enough.
2021
+ const { getTranscriptionWorker } = await import("./transcription-registry.ts");
2022
+ const worker = getTranscriptionWorker();
2023
+ if (worker) {
2024
+ // Refresh the attachment after the metadata write so the worker's
2025
+ // in-process dedupe check sees pending.
2026
+ const fresh = await store.getAttachment(attachment.id) ?? attachment;
2027
+ // Fire-and-forget — the response shouldn't wait on transcription.
2028
+ void worker.kick(vault, fresh);
2029
+ }
2030
+
2031
+ return json(
2032
+ {
2033
+ status: "queued",
2034
+ attachment_id: attachment.id,
2035
+ attachment_path: attachment.path,
2036
+ transcript_note_id: note.id,
2037
+ worker: worker ? "kicked" : "sweep-only",
2038
+ },
2039
+ 202,
2040
+ );
2041
+ }
2042
+
1826
2043
  // ---------------------------------------------------------------------------
1827
2044
  // Storage (file upload/serve) — kept as-is, Daily needs it
1828
2045
  // ---------------------------------------------------------------------------
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Tests for vault's scribe service-discovery (vault#353).
3
+ *
4
+ * Single decision site for "where does scribe live": env override, then
5
+ * `~/.parachute/services.json`. The cache layer is exercised separately
6
+ * so the resolution rule stays unit-testable without filesystem state.
7
+ */
8
+
9
+ import { describe, test, expect, beforeEach } from "bun:test";
10
+ import { resolveScribeUrl, clearScribeUrlCache } from "./scribe-discovery.ts";
11
+
12
+ function mkManifest(services: Array<{ name: string; port: number; origin?: string }>): typeof import("./services-manifest.ts").readManifest {
13
+ return () => ({
14
+ services: services.map((s) => ({
15
+ name: s.name,
16
+ port: s.port,
17
+ paths: [`/${s.name}`],
18
+ health: "/health",
19
+ version: "0.0.0-test",
20
+ ...(s.origin ? { origin: s.origin } : {}),
21
+ })) as any,
22
+ });
23
+ }
24
+
25
+ beforeEach(() => {
26
+ clearScribeUrlCache();
27
+ });
28
+
29
+ describe("resolveScribeUrl", () => {
30
+ test("returns SCRIBE_URL env var (overrides services.json)", () => {
31
+ const env = { SCRIBE_URL: "http://example.test:9999" } as NodeJS.ProcessEnv;
32
+ const manifest = mkManifest([{ name: "parachute-scribe", port: 1943 }]);
33
+ expect(resolveScribeUrl(env, manifest)).toBe("http://example.test:9999");
34
+ });
35
+
36
+ test("strips trailing slash from SCRIBE_URL env var", () => {
37
+ const env = { SCRIBE_URL: "http://example.test:9999/" } as NodeJS.ProcessEnv;
38
+ const manifest = mkManifest([]);
39
+ expect(resolveScribeUrl(env, manifest)).toBe("http://example.test:9999");
40
+ });
41
+
42
+ test("falls back to services.json parachute-scribe entry", () => {
43
+ const env = {} as NodeJS.ProcessEnv;
44
+ const manifest = mkManifest([{ name: "parachute-scribe", port: 1943 }]);
45
+ expect(resolveScribeUrl(env, manifest)).toBe("http://127.0.0.1:1943");
46
+ });
47
+
48
+ test("honors explicit `origin` on the service entry (v0.7 shape)", () => {
49
+ const env = {} as NodeJS.ProcessEnv;
50
+ const manifest = mkManifest([
51
+ { name: "parachute-scribe", port: 1943, origin: "https://scribe.cloud.example.com" },
52
+ ]);
53
+ expect(resolveScribeUrl(env, manifest)).toBe("https://scribe.cloud.example.com");
54
+ });
55
+
56
+ test("returns undefined when no env override AND no scribe entry", () => {
57
+ const env = {} as NodeJS.ProcessEnv;
58
+ const manifest = mkManifest([{ name: "parachute-vault", port: 1940 }]);
59
+ expect(resolveScribeUrl(env, manifest)).toBeUndefined();
60
+ });
61
+
62
+ test("returns undefined when manifest read throws", () => {
63
+ const env = {} as NodeJS.ProcessEnv;
64
+ const calls: unknown[][] = [];
65
+ const logger = { warn: (...args: unknown[]) => calls.push(args) };
66
+ const manifest = (() => { throw new Error("boom"); }) as unknown as Parameters<typeof resolveScribeUrl>[1];
67
+ expect(resolveScribeUrl(env, manifest, logger)).toBeUndefined();
68
+ expect(calls.length).toBe(1);
69
+ });
70
+
71
+ test("trims whitespace-only SCRIBE_URL as unset", () => {
72
+ const env = { SCRIBE_URL: " " } as NodeJS.ProcessEnv;
73
+ const manifest = mkManifest([{ name: "parachute-scribe", port: 1943 }]);
74
+ // Whitespace-only env falls through to services.json.
75
+ expect(resolveScribeUrl(env, manifest)).toBe("http://127.0.0.1:1943");
76
+ });
77
+ });
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Service discovery for the scribe transcription module.
3
+ *
4
+ * Per the 2026-05-21 vault↔scribe design (Part 2, design question 2), vault
5
+ * locates scribe via `~/.parachute/services.json` — the canonical hub-
6
+ * maintained registry. This module is the single read site so the
7
+ * resolution rule lives in one place.
8
+ *
9
+ * Resolution order (first hit wins):
10
+ *
11
+ * 1. `SCRIBE_URL` env var (operator override; useful for tests, Docker
12
+ * compose, and any deploy where scribe runs at a non-loopback host).
13
+ * 2. Entry `name === "parachute-scribe"` in `~/.parachute/services.json`
14
+ * → construct `http://127.0.0.1:<port>`.
15
+ * 3. `undefined` (auto-transcribe stays a no-op).
16
+ *
17
+ * The bearer token resolution stays in `./scribe-env.ts:resolveScribeAuthToken`.
18
+ * Service discovery is just about WHERE scribe lives; AUTH is a separate
19
+ * concern with its own env-var precedence (SCRIBE_AUTH_TOKEN over the legacy
20
+ * SCRIBE_TOKEN). When the v0.7 hub-issued-JWT path lands, the bearer source
21
+ * changes but the URL source stays the same — one file, one concern.
22
+ *
23
+ * v0.6 deploy is single-container (hub-as-supervisor) so loopback is fine.
24
+ * v0.7 cloud-multi-container will grow an `origin` field on the services.json
25
+ * entry; this resolver will honor it without API changes — `port` becomes
26
+ * a fallback when `origin` isn't set, no breaking change for v0.6 callers.
27
+ */
28
+
29
+ import { readManifest, ServicesManifestError } from "./services-manifest.ts";
30
+
31
+ /**
32
+ * Resolve the scribe base URL (no trailing slash) by consulting the env-var
33
+ * override first, then services.json. Returns `undefined` when scribe isn't
34
+ * configured — callers MUST treat that as "auto-transcribe disabled."
35
+ *
36
+ * The `env` + `readManifestImpl` parameters are injection seams for tests;
37
+ * production callers omit them and pick up `process.env` + the real
38
+ * `~/.parachute/services.json`.
39
+ */
40
+ export function resolveScribeUrl(
41
+ env: NodeJS.ProcessEnv = process.env,
42
+ readManifestImpl: typeof readManifest = readManifest,
43
+ logger: { warn?: (...args: unknown[]) => void } = console,
44
+ ): string | undefined {
45
+ const override = env.SCRIBE_URL?.trim();
46
+ if (override) return override.replace(/\/$/, "");
47
+
48
+ let manifest;
49
+ try {
50
+ manifest = readManifestImpl();
51
+ } catch (err) {
52
+ if (err instanceof ServicesManifestError) {
53
+ logger.warn?.(`[scribe-discovery] services.json unreadable: ${err.message}`);
54
+ } else {
55
+ logger.warn?.(`[scribe-discovery] services.json read failed: ${err}`);
56
+ }
57
+ return undefined;
58
+ }
59
+ const entry = manifest.services.find((s) => s.name === "parachute-scribe");
60
+ if (!entry) return undefined;
61
+ // v0.6 loopback shape; v0.7 will add an explicit `origin` field on the
62
+ // service entry which wins over loopback when present.
63
+ const origin = (entry as { origin?: string }).origin;
64
+ if (typeof origin === "string" && origin.trim()) {
65
+ return origin.trim().replace(/\/$/, "");
66
+ }
67
+ return `http://127.0.0.1:${entry.port}`;
68
+ }
69
+
70
+ /**
71
+ * Process-lifetime cache. Computed at first call (typically during server
72
+ * boot), reused for every subsequent transcription request. Operators who
73
+ * change the scribe URL via `services.json` (re-install of scribe with a
74
+ * different port) need to restart vault; we deliberately don't watch the
75
+ * file because the v0.6 deploy model has a single restart-on-change story.
76
+ *
77
+ * Tests should pass an explicit `env` + `readManifestImpl` to `resolveScribeUrl`
78
+ * directly to bypass the cache.
79
+ */
80
+ let cachedScribeUrl: string | undefined | null = null;
81
+
82
+ export function getCachedScribeUrl(): string | undefined {
83
+ if (cachedScribeUrl === null) {
84
+ cachedScribeUrl = resolveScribeUrl();
85
+ }
86
+ return cachedScribeUrl;
87
+ }
88
+
89
+ export function clearScribeUrlCache(): void {
90
+ cachedScribeUrl = null;
91
+ }
@@ -1,5 +1,5 @@
1
1
  import { describe, test, expect } from "bun:test";
2
- import { resolveScribeAuthToken } from "./scribe-env.ts";
2
+ import { resolveScribeAuthToken, generateScribeBearer, ensureScribeBearer } from "./scribe-env.ts";
3
3
 
4
4
  function captureWarn() {
5
5
  const calls: unknown[][] = [];
@@ -47,3 +47,68 @@ describe("resolveScribeAuthToken", () => {
47
47
  expect(calls.length).toBe(0);
48
48
  });
49
49
  });
50
+
51
+ describe("generateScribeBearer (vault#353)", () => {
52
+ test("returns 32-byte base64url string (~43 chars, no padding)", () => {
53
+ const bearer = generateScribeBearer();
54
+ // 32 bytes base64url-encoded = 43 chars (no `=` padding in base64url).
55
+ expect(bearer.length).toBe(43);
56
+ expect(bearer).toMatch(/^[A-Za-z0-9_-]+$/);
57
+ });
58
+
59
+ test("each call yields a unique value", () => {
60
+ const a = generateScribeBearer();
61
+ const b = generateScribeBearer();
62
+ expect(a).not.toBe(b);
63
+ });
64
+ });
65
+
66
+ describe("ensureScribeBearer (vault#353)", () => {
67
+ test("generates + persists a bearer when neither env var is set", () => {
68
+ const env: Record<string, string> = {};
69
+ const writes: Array<[string, string]> = [];
70
+ const { created, token } = ensureScribeBearer(
71
+ () => ({ ...env }),
72
+ (k, v) => writes.push([k, v]),
73
+ );
74
+ expect(created).toBe(true);
75
+ expect(token.length).toBe(43);
76
+ expect(writes).toEqual([["SCRIBE_AUTH_TOKEN", token]]);
77
+ });
78
+
79
+ test("preserves existing SCRIBE_AUTH_TOKEN (idempotent)", () => {
80
+ const env: Record<string, string> = { SCRIBE_AUTH_TOKEN: "already-set" };
81
+ const writes: Array<[string, string]> = [];
82
+ const { created, token } = ensureScribeBearer(
83
+ () => ({ ...env }),
84
+ (k, v) => writes.push([k, v]),
85
+ );
86
+ expect(created).toBe(false);
87
+ expect(token).toBe("already-set");
88
+ expect(writes.length).toBe(0);
89
+ });
90
+
91
+ test("preserves legacy SCRIBE_TOKEN without rewriting it", () => {
92
+ const env: Record<string, string> = { SCRIBE_TOKEN: "legacy" };
93
+ const writes: Array<[string, string]> = [];
94
+ const { created, token } = ensureScribeBearer(
95
+ () => ({ ...env }),
96
+ (k, v) => writes.push([k, v]),
97
+ );
98
+ expect(created).toBe(false);
99
+ expect(token).toBe("legacy");
100
+ expect(writes.length).toBe(0);
101
+ });
102
+
103
+ test("treats whitespace-only env value as unset (generates fresh)", () => {
104
+ const env: Record<string, string> = { SCRIBE_AUTH_TOKEN: " " };
105
+ const writes: Array<[string, string]> = [];
106
+ const { created, token } = ensureScribeBearer(
107
+ () => ({ ...env }),
108
+ (k, v) => writes.push([k, v]),
109
+ );
110
+ expect(created).toBe(true);
111
+ expect(token.length).toBe(43);
112
+ expect(writes[0]?.[0]).toBe("SCRIBE_AUTH_TOKEN");
113
+ });
114
+ });