@openparachute/vault 0.4.7-rc.2 → 0.4.8-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.parachute/module.json +1 -1
  2. package/README.md +78 -41
  3. package/core/src/connection-pragmas.test.ts +232 -0
  4. package/core/src/core.test.ts +257 -0
  5. package/core/src/cursor.test.ts +160 -0
  6. package/core/src/cursor.ts +272 -0
  7. package/core/src/mcp.ts +51 -7
  8. package/core/src/notes.ts +164 -2
  9. package/core/src/schema.ts +106 -5
  10. package/core/src/store.ts +11 -1
  11. package/core/src/types.ts +32 -0
  12. package/package.json +7 -3
  13. package/src/auth-status.ts +4 -0
  14. package/src/auth.test.ts +5 -112
  15. package/src/auto-transcribe.test.ts +116 -0
  16. package/src/auto-transcribe.ts +48 -0
  17. package/src/backup.ts +17 -3
  18. package/src/cli.ts +95 -66
  19. package/src/config.test.ts +26 -0
  20. package/src/config.ts +53 -1
  21. package/src/db.ts +15 -2
  22. package/src/export-watch.test.ts +21 -0
  23. package/src/mcp-install-interactive.test.ts +23 -2
  24. package/src/mcp-install-interactive.ts +21 -2
  25. package/src/mcp-install.test.ts +40 -0
  26. package/src/mcp-tools.ts +17 -1
  27. package/src/module-config.ts +70 -14
  28. package/src/module-manifest.test.ts +114 -0
  29. package/src/module-manifest.ts +104 -0
  30. package/src/oauth-discovery.ts +95 -0
  31. package/src/owner-auth.ts +22 -149
  32. package/src/routes.ts +268 -51
  33. package/src/routing.test.ts +102 -99
  34. package/src/routing.ts +33 -47
  35. package/src/scribe-discovery.test.ts +77 -0
  36. package/src/scribe-discovery.ts +91 -0
  37. package/src/scribe-env.test.ts +66 -1
  38. package/src/scribe-env.ts +42 -1
  39. package/src/self-register.test.ts +412 -0
  40. package/src/self-register.ts +247 -0
  41. package/src/server.ts +47 -23
  42. package/src/transcript-note.test.ts +171 -0
  43. package/src/transcript-note.ts +189 -0
  44. package/src/transcription-registry.ts +22 -0
  45. package/src/transcription-worker.test.ts +250 -0
  46. package/src/transcription-worker.ts +186 -27
  47. package/src/vault-name.ts +3 -2
  48. package/src/vault.test.ts +347 -0
  49. package/web/ui/dist/assets/index-BOa-JJtV.css +1 -0
  50. package/web/ui/dist/assets/index-BzA5LgE3.js +60 -0
  51. package/web/ui/dist/index.html +14 -0
  52. package/web/ui/tsconfig.json +21 -0
  53. package/src/oauth.test.ts +0 -2156
  54. package/src/oauth.ts +0 -973
package/src/routes.ts CHANGED
@@ -45,6 +45,7 @@ import {
45
45
  import { join, extname, normalize } from "path";
46
46
  import { existsSync, mkdirSync, readFileSync, statSync, unlinkSync, writeFileSync } from "fs";
47
47
  import { vaultDir } from "./config.ts";
48
+ import { shouldAutoTranscribe } from "./auto-transcribe.ts";
48
49
 
49
50
  // ---------------------------------------------------------------------------
50
51
  // Helpers
@@ -509,6 +510,21 @@ async function handleNotesInner(
509
510
  return json(result);
510
511
  }
511
512
 
513
+ // Cursor + full-text search is mutually exclusive (vault#313 reviewer).
514
+ // FTS owns its own ordering (relevance, not updated_at), so a cursor
515
+ // would skip rows. MCP rejects this combo at `core/src/mcp.ts`; REST
516
+ // would otherwise route into the `if (search)` branch below and
517
+ // silently drop the cursor. Reject here for surface parity.
518
+ if (search && parseQuery(url, "cursor")) {
519
+ return json(
520
+ {
521
+ error: "cursor is incompatible with full-text search — FTS has its own ordering. Use date_filter on updated_at for since-last-checked search.",
522
+ code: "INVALID_QUERY",
523
+ },
524
+ 400,
525
+ );
526
+ }
527
+
512
528
  // Full-text search
513
529
  if (search) {
514
530
  const searchTags = parseQueryList(url, "tag");
@@ -564,49 +580,74 @@ async function handleNotesInner(
564
580
  const tags = parseQueryList(url, "tag");
565
581
  const bracket = parseMetaBrackets(url);
566
582
  if (bracket.error) return bracket.error;
583
+ // Opaque cursor for "since last checked" agent loops (vault#313).
584
+ // When present, switches the response shape to {notes, next_cursor}
585
+ // and routes through queryNotesPaged for keyset pagination. Mutually
586
+ // exclusive with the `near` graph-neighborhood scope (rebuilding the
587
+ // neighborhood per page isn't stable) — rejected below.
588
+ const cursorParam = parseQuery(url, "cursor");
589
+ const nearNoteIdEarly = parseQuery(url, "near[note_id]");
590
+ if (cursorParam && nearNoteIdEarly) {
591
+ return json(
592
+ {
593
+ error: "cursor is incompatible with near (graph neighborhood). Resolve the neighborhood first, then iterate with cursor over the resulting note set.",
594
+ code: "INVALID_QUERY",
595
+ },
596
+ 400,
597
+ );
598
+ }
567
599
  let results: Note[];
600
+ let nextCursor: string | null = null;
601
+ const queryOpts = {
602
+ tags,
603
+ tagMatch: (parseQuery(url, "tag_match") as "all" | "any") ?? (tags && tags.length > 1 ? "any" : undefined),
604
+ excludeTags: parseQueryList(url, "exclude_tag"),
605
+ hasTags: parseBoolOrUndef(parseQuery(url, "has_tags")),
606
+ hasLinks: parseBoolOrUndef(parseQuery(url, "has_links")),
607
+ path: parseQuery(url, "path") ?? undefined,
608
+ pathPrefix: parseQuery(url, "path_prefix") ?? undefined,
609
+ // Extension filter (vault#328). Accepts repeated `extension=`
610
+ // params for the array form: `?extension=csv&extension=yaml`.
611
+ // `parseQueryList` already returns undefined when no params
612
+ // are present, so the filter is silently skipped on a plain
613
+ // GET without the extension query.
614
+ extension: parseExtensionFilter(url),
615
+ metadata: bracket.metadata,
616
+ // Date-range precedence chain (highest to lowest):
617
+ // 1. Bracket-style `meta[created_at][gte]=…` (canonical).
618
+ // 2. Flat `date_field=…&date_from=…&date_to=…` (deprecated).
619
+ // 3. Legacy `date_from=…&date_to=…` (no date_field, deprecated)
620
+ // — filters on `n.created_at` by definition.
621
+ // The engine rejects combinations of `dateFilter` with the legacy
622
+ // `dateFrom`/`dateTo`, so we never set both shapes simultaneously.
623
+ ...(bracket.dateFilter
624
+ ? { dateFilter: bracket.dateFilter }
625
+ : parseQuery(url, "date_field")
626
+ ? {
627
+ dateFilter: {
628
+ field: parseQuery(url, "date_field")!,
629
+ from: parseQuery(url, "date_from") ?? undefined,
630
+ to: parseQuery(url, "date_to") ?? undefined,
631
+ },
632
+ }
633
+ : {
634
+ dateFrom: parseQuery(url, "date_from") ?? undefined,
635
+ dateTo: parseQuery(url, "date_to") ?? undefined,
636
+ }),
637
+ sort: (parseQuery(url, "sort") as "asc" | "desc") ?? undefined,
638
+ orderBy: parseQuery(url, "order_by") ?? undefined,
639
+ limit: parseInt10(parseQuery(url, "limit")) ?? 50,
640
+ offset: parseInt10(parseQuery(url, "offset")),
641
+ cursor: cursorParam ?? undefined,
642
+ };
568
643
  try {
569
- results = await store.queryNotes({
570
- tags,
571
- tagMatch: (parseQuery(url, "tag_match") as "all" | "any") ?? (tags && tags.length > 1 ? "any" : undefined),
572
- excludeTags: parseQueryList(url, "exclude_tag"),
573
- hasTags: parseBoolOrUndef(parseQuery(url, "has_tags")),
574
- hasLinks: parseBoolOrUndef(parseQuery(url, "has_links")),
575
- path: parseQuery(url, "path") ?? undefined,
576
- pathPrefix: parseQuery(url, "path_prefix") ?? undefined,
577
- // Extension filter (vault#328). Accepts repeated `extension=`
578
- // params for the array form: `?extension=csv&extension=yaml`.
579
- // `parseQueryList` already returns undefined when no params
580
- // are present, so the filter is silently skipped on a plain
581
- // GET without the extension query.
582
- extension: parseExtensionFilter(url),
583
- metadata: bracket.metadata,
584
- // Date-range precedence chain (highest to lowest):
585
- // 1. Bracket-style `meta[created_at][gte]=…` (canonical).
586
- // 2. Flat `date_field=…&date_from=…&date_to=…` (deprecated).
587
- // 3. Legacy `date_from=…&date_to=…` (no date_field, deprecated)
588
- // — filters on `n.created_at` by definition.
589
- // The engine rejects combinations of `dateFilter` with the legacy
590
- // `dateFrom`/`dateTo`, so we never set both shapes simultaneously.
591
- ...(bracket.dateFilter
592
- ? { dateFilter: bracket.dateFilter }
593
- : parseQuery(url, "date_field")
594
- ? {
595
- dateFilter: {
596
- field: parseQuery(url, "date_field")!,
597
- from: parseQuery(url, "date_from") ?? undefined,
598
- to: parseQuery(url, "date_to") ?? undefined,
599
- },
600
- }
601
- : {
602
- dateFrom: parseQuery(url, "date_from") ?? undefined,
603
- dateTo: parseQuery(url, "date_to") ?? undefined,
604
- }),
605
- sort: (parseQuery(url, "sort") as "asc" | "desc") ?? undefined,
606
- orderBy: parseQuery(url, "order_by") ?? undefined,
607
- limit: parseInt10(parseQuery(url, "limit")) ?? 50,
608
- offset: parseInt10(parseQuery(url, "offset")),
609
- });
644
+ if (cursorParam) {
645
+ const page = await store.queryNotesPaged(queryOpts);
646
+ results = page.notes;
647
+ nextCursor = page.next_cursor;
648
+ } else {
649
+ results = await store.queryNotes(queryOpts);
650
+ }
610
651
  } catch (e: any) {
611
652
  // QueryError (non-indexed order_by, unknown operator, ...) surfaces
612
653
  // here. Duck-type on `name` + `code` — core is a separate module, so
@@ -614,6 +655,14 @@ async function handleNotesInner(
614
655
  if (e && e.name === "QueryError") {
615
656
  return json({ error: e.message, code: e.code ?? "INVALID_QUERY" }, 400);
616
657
  }
658
+ // CursorError carries a structured code (cursor_invalid /
659
+ // cursor_query_mismatch) so the agent loop can distinguish a
660
+ // malformed cursor from a hash-mismatch and react appropriately
661
+ // (the latter typically means the agent changed its filter and
662
+ // should drop the cursor + restart from scratch).
663
+ if (e && e.name === "CursorError") {
664
+ return json({ error: e.message, code: e.code ?? "cursor_invalid" }, 400);
665
+ }
617
666
  throw e;
618
667
  }
619
668
 
@@ -682,9 +731,14 @@ async function handleNotesInner(
682
731
  if (includeAttachments) enriched.attachments = await store.getAttachments(n.id);
683
732
  enrichedOut.push(enriched);
684
733
  }
734
+ // Cursor mode wraps the list in {notes, next_cursor} so an agent
735
+ // loop can chain calls without tracking a watermark client-side.
736
+ // Legacy callers (no `cursor` param) still get the flat array.
737
+ if (cursorParam) return json({ notes: enrichedOut, next_cursor: nextCursor });
685
738
  return json(enrichedOut);
686
739
  }
687
740
 
741
+ if (cursorParam) return json({ notes: output, next_cursor: nextCursor });
688
742
  return json(output);
689
743
  }
690
744
 
@@ -813,19 +867,33 @@ async function handleNotesInner(
813
867
  const body = await req.json() as { path: string; mimeType: string; transcribe?: boolean };
814
868
  if (!body.path || !body.mimeType) return json({ error: "path and mimeType are required" }, 400);
815
869
 
816
- // `transcribe: true` asks the transcription worker to read this audio
817
- // file and replace the note's content with the transcript. The caller
818
- // is declaring "overwrite my current content when the transcript lands"
819
- // we persist that as `transcribe_stub: true` on the note so a later
820
- // user edit (which clears the marker) can opt out before the worker
821
- // runs.
822
- const attMeta = body.transcribe
823
- ? { transcribe_status: "pending" as const, transcribe_requested_at: new Date().toISOString() }
870
+ // Decide whether to enqueue this attachment for transcription. Two paths:
871
+ //
872
+ // - **Explicit caller opt-in (legacy path, Lens flow):** `transcribe: true`
873
+ // on the POST. The note already has a `_Transcript pending._` stub the
874
+ // worker replaces on success `transcribe_origin: "legacy"` preserves
875
+ // the stub-patching behavior.
876
+ // - **Auto-transcribe (vault#353):** mime-type is `audio/*` AND the
877
+ // operator has flipped `auto_transcribe.enabled = true` AND scribe is
878
+ // reachable. The caller didn't opt in explicitly; we infer from the
879
+ // audio mime-type. `transcribe_origin: "auto"` tells the worker to
880
+ // materialize a `<attachment-path>.transcript.md` note on completion.
881
+ //
882
+ // Explicit `transcribe: true` wins — if the caller asked, we honor that
883
+ // regardless of the auto-transcribe toggle (back-compat).
884
+ const explicitOptIn = body.transcribe === true;
885
+ const autoOptIn = !explicitOptIn && shouldAutoTranscribe(body.mimeType);
886
+ const attMeta = (explicitOptIn || autoOptIn)
887
+ ? {
888
+ transcribe_status: "pending" as const,
889
+ transcribe_requested_at: new Date().toISOString(),
890
+ transcribe_origin: (explicitOptIn ? "legacy" : "auto") as "legacy" | "auto",
891
+ }
824
892
  : undefined;
825
893
 
826
894
  const attachment = await store.addAttachment(note.id, body.path, body.mimeType, attMeta);
827
895
 
828
- if (body.transcribe) {
896
+ if (explicitOptIn) {
829
897
  const noteMeta = (note.metadata as Record<string, unknown> | undefined) ?? {};
830
898
  if (noteMeta.transcribe_stub !== true) {
831
899
  await store.updateNote(note.id, {
@@ -874,6 +942,33 @@ async function handleNotesInner(
874
942
  return json({ error: "Method not allowed" }, 405);
875
943
  }
876
944
 
945
+ // POST /notes/:idOrPath/retry-transcription — vault#353 design Q5.
946
+ //
947
+ // Re-runs the auto-transcribe pipeline against the original audio
948
+ // attachment recorded in the transcript note's `transcript_attachment_id`
949
+ // frontmatter. Only valid on transcript notes (the target idOrPath must
950
+ // be a transcript note with `transcript_status: "failed"`); calling on
951
+ // anything else returns 400 with a clear reason.
952
+ //
953
+ // Wire shape:
954
+ // POST .../notes/<idOrPath>/retry-transcription
955
+ // → 202 { attachment_id, transcript_path } when re-enqueued
956
+ // 400 invalid_target (not a transcript note)
957
+ // 400 not_failed (transcript already succeeded; nothing to retry)
958
+ // 404 attachment_missing (transcript_attachment_id row deleted)
959
+ // 404 audio_missing (audio file unlinked from disk)
960
+ // 503 scribe_unavailable (no worker configured this boot)
961
+ if (sub === "/retry-transcription") {
962
+ if (method !== "POST") return json({ error: "Method not allowed" }, 405);
963
+ if (!vault) return json({ error: "Vault context required" }, 400);
964
+ const note = await resolveNote(store, idOrPath);
965
+ if (!note) return json({ error: "Not found" }, 404);
966
+ if (!noteWithinTagScope(note, tagScope.allowed, tagScope.raw)) {
967
+ return json({ error: "Not found" }, 404);
968
+ }
969
+ return handleRetryTranscription(store, note, vault);
970
+ }
971
+
877
972
  if (sub !== "") return json({ error: "Not found" }, 404);
878
973
 
879
974
  // GET /notes/:idOrPath — single note
@@ -1213,7 +1308,7 @@ async function handleNotesInner(
1213
1308
  }
1214
1309
  }
1215
1310
 
1216
- // DELETE /notes/:idOrPath — admin only (enforced at server level)
1311
+ // DELETE /notes/:idOrPath — vault:write (no admin gate; consistent with verbForMethod)
1217
1312
  if (method === "DELETE") {
1218
1313
  const note = await resolveNote(store, idOrPath);
1219
1314
  if (!note) return json({ error: "Not found" }, 404);
@@ -1823,6 +1918,128 @@ ${rendered}
1823
1918
  });
1824
1919
  }
1825
1920
 
1921
+ // ---------------------------------------------------------------------------
1922
+ // Retry transcription (vault#353 design Q5)
1923
+ // ---------------------------------------------------------------------------
1924
+
1925
+ /**
1926
+ * Re-enqueue the original audio attachment for a `transcript_status: failed`
1927
+ * transcript note. Steps:
1928
+ *
1929
+ * 1. Validate target is a transcript note (`transcript_status` set in
1930
+ * metadata) AND that status is `failed`.
1931
+ * 2. Find the original audio attachment by id from
1932
+ * `transcript_attachment_id` frontmatter. 404 if the row's gone.
1933
+ * 3. Validate the audio file still exists on disk (retention=keep is
1934
+ * assumed by the retry contract; retention=until_transcribed unlinks
1935
+ * only on success, retention=never unlinks on failure — that last one
1936
+ * explicitly breaks retry, by design).
1937
+ * 4. Reset `transcribe_status = "pending"`, clear backoff + error fields.
1938
+ * The auto-origin marker is preserved so the worker writes a transcript
1939
+ * note (overwriting this one in place).
1940
+ * 5. Kick the worker if registered; otherwise the sweep picks it up.
1941
+ */
1942
+ async function handleRetryTranscription(
1943
+ store: Store,
1944
+ note: Note,
1945
+ vault: string,
1946
+ ): Promise<Response> {
1947
+ const meta = (note.metadata as Record<string, unknown> | undefined) ?? {};
1948
+ if (typeof meta.transcript_status !== "string") {
1949
+ return json(
1950
+ {
1951
+ error: "invalid_target",
1952
+ message: "Target note is not a transcript note (no transcript_status frontmatter).",
1953
+ },
1954
+ 400,
1955
+ );
1956
+ }
1957
+ if (meta.transcript_status !== "failed") {
1958
+ return json(
1959
+ {
1960
+ error: "not_failed",
1961
+ message: `Transcript note status is "${meta.transcript_status}" — only failed transcripts can be retried.`,
1962
+ transcript_status: meta.transcript_status,
1963
+ },
1964
+ 400,
1965
+ );
1966
+ }
1967
+ const attachmentId = typeof meta.transcript_attachment_id === "string"
1968
+ ? meta.transcript_attachment_id
1969
+ : undefined;
1970
+ if (!attachmentId) {
1971
+ return json(
1972
+ {
1973
+ error: "missing_attachment_id",
1974
+ message: "Transcript note has no `transcript_attachment_id` — can't locate the original audio.",
1975
+ },
1976
+ 400,
1977
+ );
1978
+ }
1979
+ const attachment = await store.getAttachment(attachmentId);
1980
+ if (!attachment) {
1981
+ return json(
1982
+ {
1983
+ error: "attachment_missing",
1984
+ message: `Original audio attachment ${attachmentId} no longer exists in the vault.`,
1985
+ },
1986
+ 404,
1987
+ );
1988
+ }
1989
+ // Audio file existence + safety: defense-in-depth against a bad attachment
1990
+ // row pointing outside the vault assets dir. Same guard as the worker.
1991
+ const assetsRoot = assetsDir(vault);
1992
+ const audioFilePath = normalize(join(assetsRoot, attachment.path));
1993
+ if (!audioFilePath.startsWith(normalize(assetsRoot)) || !existsSync(audioFilePath)) {
1994
+ return json(
1995
+ {
1996
+ error: "audio_missing",
1997
+ message: `Original audio file at "${attachment.path}" no longer exists on disk.`,
1998
+ },
1999
+ 404,
2000
+ );
2001
+ }
2002
+
2003
+ // Reset transcribe_status. Worker reads this row, sees "pending", processes
2004
+ // it. Preserve `transcribe_origin: "auto"` so the worker materializes the
2005
+ // transcript note (overwriting this failed note in place).
2006
+ const attMeta = { ...(attachment.metadata ?? {}) } as Record<string, unknown>;
2007
+ attMeta.transcribe_status = "pending";
2008
+ attMeta.transcribe_requested_at = new Date().toISOString();
2009
+ attMeta.transcribe_origin = "auto";
2010
+ delete attMeta.transcribe_backoff_until;
2011
+ delete attMeta.transcribe_error;
2012
+ delete attMeta.transcribe_error_code;
2013
+ delete attMeta.transcribe_attempts;
2014
+ await store.setAttachmentMetadata(attachment.id, attMeta);
2015
+
2016
+ // Kick the worker for an event-driven re-run (no 30s sweep wait). The
2017
+ // worker re-reads the row + processes immediately. If the worker isn't
2018
+ // registered (scribe not configured this boot), we still reset the row;
2019
+ // the next boot's sweep will pick it up. The 503 path is for callers that
2020
+ // want certainty — but for v0.6 the sweep guarantee is enough.
2021
+ const { getTranscriptionWorker } = await import("./transcription-registry.ts");
2022
+ const worker = getTranscriptionWorker();
2023
+ if (worker) {
2024
+ // Refresh the attachment after the metadata write so the worker's
2025
+ // in-process dedupe check sees pending.
2026
+ const fresh = await store.getAttachment(attachment.id) ?? attachment;
2027
+ // Fire-and-forget — the response shouldn't wait on transcription.
2028
+ void worker.kick(vault, fresh);
2029
+ }
2030
+
2031
+ return json(
2032
+ {
2033
+ status: "queued",
2034
+ attachment_id: attachment.id,
2035
+ attachment_path: attachment.path,
2036
+ transcript_note_id: note.id,
2037
+ worker: worker ? "kicked" : "sweep-only",
2038
+ },
2039
+ 202,
2040
+ );
2041
+ }
2042
+
1826
2043
  // ---------------------------------------------------------------------------
1827
2044
  // Storage (file upload/serve) — kept as-is, Daily needs it
1828
2045
  // ---------------------------------------------------------------------------