@openparachute/vault 0.5.1 → 0.5.2-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/routes.ts CHANGED
@@ -13,11 +13,13 @@
13
13
 
14
14
  import type { Store, Note } from "../core/src/types.ts";
15
15
  import { listUnresolvedWikilinks } from "../core/src/wikilinks.ts";
16
- import { toNoteIndex, filterMetadata, MAX_BATCH_SIZE, validateExtension, ExtensionValidationError } from "../core/src/notes.ts";
16
+ import { getNote, toNoteIndex, filterMetadata, MAX_BATCH_SIZE, validateExtension, ExtensionValidationError } from "../core/src/notes.ts";
17
17
  import { attachValidationStatus } from "../core/src/mcp.ts";
18
18
  import * as linkOps from "../core/src/links.ts";
19
19
  import * as tagSchemaOps from "../core/src/tag-schemas.ts";
20
20
  import {
21
+ buildExpandVisibility,
22
+ filterHydratedLinksByTagScope,
21
23
  filterNotesByTagScope,
22
24
  noteWithinTagScope,
23
25
  tagScopeForbidden,
@@ -44,8 +46,16 @@ import {
44
46
  } from "../core/src/expand.ts";
45
47
  import { join, extname, normalize } from "path";
46
48
  import { existsSync, mkdirSync, readFileSync, statSync, unlinkSync, writeFileSync } from "fs";
47
- import { vaultDir } from "./config.ts";
49
+ import { assetsDir } from "./config.ts";
48
50
  import { shouldAutoTranscribe } from "./auto-transcribe.ts";
51
+ // usage.ts imports `assetsDir` from config.ts (neutral ground), so this import
52
+ // of invalidateUsageCache does NOT form a cycle — routes.ts → usage.ts only.
53
+ import { invalidateUsageCache } from "./usage.ts";
54
+
55
+ // Re-export `assetsDir` (now defined in config.ts) so the existing callers
56
+ // that import it from this module — mirror-deps, mirror-routes, server,
57
+ // triggers, cli — keep working unchanged.
58
+ export { assetsDir };
49
59
 
50
60
  // ---------------------------------------------------------------------------
51
61
  // Helpers
@@ -71,6 +81,22 @@ function parseQuery(url: URL, key: string): string | null {
71
81
  return url.searchParams.get(key);
72
82
  }
73
83
 
84
+ /**
85
+ * Parse `link_count_direction` (vault feedback #4). Defaults to "both";
86
+ * anything other than the three known values falls back to "both" so a
87
+ * typo silently degrades to the documented default rather than erroring.
88
+ *
89
+ * Tag-scope note (symmetric with the MCP param description): `linkCount`
90
+ * is a raw degree that MAY include edges to notes a tag-scoped token can't
91
+ * see — the tag-scope filter runs post-query, over the result notes, not
92
+ * their neighbors. Only the number leaks, not the neighbor.
93
+ */
94
+ function parseLinkCountDirection(url: URL): "both" | "outbound" | "inbound" {
95
+ const v = url.searchParams.get("link_count_direction");
96
+ if (v === "outbound" || v === "inbound") return v;
97
+ return "both";
98
+ }
99
+
74
100
  function parseQueryList(url: URL, key: string): string[] | undefined {
75
101
  const val = url.searchParams.get(key);
76
102
  return val ? val.split(",") : undefined;
@@ -353,6 +379,62 @@ function parseMetaBrackets(url: URL): {
353
379
  return result;
354
380
  }
355
381
 
382
+ /**
383
+ * Parse the `?metadata=<json>` alias on GET /api/notes — the JSON-object form
384
+ * of the metadata filter, symmetric with the nested `metadata` object MCP
385
+ * forwards verbatim (`core/src/mcp.ts`). The value is a JSON object of the form
386
+ * `{"field":{"op":value}}` (operator query) or `{"field":value}` (shorthand
387
+ * equality via the engine's json_extract fallback).
388
+ *
389
+ * This exists because the bracket grammar (`?meta[field][op]=value`) couldn't
390
+ * see a `metadata=` param at all — it was silently dropped, and the query
391
+ * returned ALL tag-matching notes (a silent wrong result, not an error).
392
+ *
393
+ * We do NOT validate operators here — the parsed object lowers straight into
394
+ * `queryNotes`, where `validateOperatorObject` raises a loud 400 on unknown
395
+ * operators (caught by the QueryError handler in handleNotes). We only enforce
396
+ * that the param parses and is a non-null, non-array plain object; anything
397
+ * else is a malformed filter the engine can't consume.
398
+ *
399
+ * An empty object (`metadata={}`) carries no filter intent, so it's treated as
400
+ * absent: it sets no metadata filter AND doesn't trip the both-forms guard, so
401
+ * it composes harmlessly with bracket params.
402
+ *
403
+ * Returns `{ metadata?, error? }`. When `error` is set the caller returns it
404
+ * directly (already shaped as a 400 with `error` + `code`).
405
+ */
406
+ function parseMetadataJsonAlias(url: URL): {
407
+ metadata?: Record<string, unknown>;
408
+ error?: Response;
409
+ } {
410
+ const raw = parseQuery(url, "metadata");
411
+ if (raw === null) return {};
412
+
413
+ const malformed = (detail: string): Response =>
414
+ json(
415
+ {
416
+ error: `metadata query param must be a JSON object of the form {"field":{"op":value}} — ${detail}`,
417
+ code: "INVALID_QUERY",
418
+ },
419
+ 400,
420
+ );
421
+
422
+ let parsed: unknown;
423
+ try {
424
+ parsed = JSON.parse(raw);
425
+ } catch (e) {
426
+ return { error: malformed(`failed to parse: ${e instanceof Error ? e.message : String(e)}`) };
427
+ }
428
+ if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
429
+ const got = Array.isArray(parsed) ? "array" : parsed === null ? "null" : typeof parsed;
430
+ return { error: malformed(`got ${got}`) };
431
+ }
432
+ // Empty object → no filter intent. Return absent so it neither sets a
433
+ // metadata filter nor trips the both-forms guard in the handler.
434
+ if (Object.keys(parsed).length === 0) return {};
435
+ return { metadata: parsed as Record<string, unknown> };
436
+ }
437
+
356
438
  /**
357
439
  * Parse include_metadata query param.
358
440
  * - absent/null → undefined (all metadata, default)
@@ -373,10 +455,20 @@ function parseIncludeMetadata(url: URL): boolean | string[] | undefined {
373
455
  /**
374
456
  * Parse expand_links/expand_depth/expand_mode from query params, returning
375
457
  * an (ExpandContext, depth) pair if expansion is requested, else null.
458
+ *
459
+ * `tagScope` (security review): when the caller is tag-scoped, an `isVisible`
460
+ * predicate is built from the SAME tag-scope allowlist the rest of the
461
+ * handler uses and injected into the expand context. The expander then
462
+ * leaves any `[[wikilink]]` whose target is out of scope UNRESOLVED — so
463
+ * `expand_links=true` can never inline out-of-scope note content (and the
464
+ * out-of-scope case is byte-indistinguishable from a missing target). For
465
+ * an unscoped token the predicate is `undefined` and expansion behaves
466
+ * exactly as before.
376
467
  */
377
468
  function parseExpandParams(
378
469
  url: URL,
379
470
  db: any,
471
+ tagScope: TagScopeCtx = NO_TAG_SCOPE,
380
472
  ): { ctx: ExpandContext; depth: number } | null {
381
473
  if (!parseBool(parseQuery(url, "expand_links"), false)) return null;
382
474
  const modeRaw = parseQuery(url, "expand_mode");
@@ -388,7 +480,8 @@ function parseExpandParams(
388
480
  MAX_EXPAND_DEPTH,
389
481
  ),
390
482
  );
391
- return { ctx: { db, mode, expanded: new Set() }, depth };
483
+ const isVisible = buildExpandVisibility(tagScope.allowed, tagScope.raw);
484
+ return { ctx: { db, mode, expanded: new Set(), ...(isVisible ? { isVisible } : {}) }, depth };
392
485
  }
393
486
 
394
487
 
@@ -495,18 +588,30 @@ async function handleNotesInner(
495
588
  }
496
589
  const includeContent = parseBool(parseQuery(url, "include_content"), true);
497
590
  let result: any = includeContent ? { ...note } : toNoteIndex(note);
498
- const expand = parseExpandParams(url, db);
591
+ const expand = parseExpandParams(url, db, tagScope);
499
592
  if (expand && includeContent && typeof result.content === "string") {
500
593
  expand.ctx.expanded.add(note.id);
501
594
  result.content = expandContent(result.content, expand.ctx, expand.depth);
502
595
  }
503
596
  result = filterMetadata(result, parseIncludeMetadata(url));
504
597
  if (parseBool(parseQuery(url, "include_links"), false)) {
505
- result.links = linkOps.getLinksHydrated(db, note.id);
598
+ // Tag-scope: drop links whose neighbor is out of scope so the
599
+ // hydrated sourceNote/targetNote summaries can't leak out-of-scope
600
+ // ids/paths/tags. No-op for unscoped tokens.
601
+ result.links = filterHydratedLinksByTagScope(
602
+ linkOps.getLinksHydrated(db, note.id),
603
+ tagScope.allowed,
604
+ tagScope.raw,
605
+ );
506
606
  }
507
607
  if (parseBool(parseQuery(url, "include_attachments"), false)) {
508
608
  result.attachments = await store.getAttachments(note.id);
509
609
  }
610
+ // linkCount injected after filterMetadata on purpose — same as
611
+ // links/attachments above; filterMetadata only touches `metadata`.
612
+ if (parseBool(parseQuery(url, "include_link_count"), false)) {
613
+ result.linkCount = linkOps.getLinkCounts(db, [note.id], parseLinkCountDirection(url)).get(note.id) ?? 0;
614
+ }
510
615
  return json(result);
511
616
  }
512
617
 
@@ -537,7 +642,7 @@ async function handleNotesInner(
537
642
  const includeContent = parseBool(parseQuery(url, "include_content"), false);
538
643
  const inclMeta = parseIncludeMetadata(url);
539
644
  let output: any[] = includeContent ? results.map((n) => ({ ...n })) : results.map(toNoteIndex);
540
- const expand = parseExpandParams(url, db);
645
+ const expand = parseExpandParams(url, db, tagScope);
541
646
  if (expand && includeContent) {
542
647
  for (const n of output) expand.ctx.expanded.add(n.id);
543
648
  for (const n of output) {
@@ -549,6 +654,18 @@ async function handleNotesInner(
549
654
  if (inclMeta !== undefined && inclMeta !== true) {
550
655
  output = output.map((n: any) => filterMetadata(n, inclMeta));
551
656
  }
657
+ // Opt-in link degree (vault feedback #4) — one batch count, same as
658
+ // the structured-query path. Runs AFTER filterMetadata on purpose
659
+ // (filterMetadata only touches `metadata`, so linkCount survives —
660
+ // don't casually swap the order).
661
+ if (parseBool(parseQuery(url, "include_link_count"), false)) {
662
+ const counts = linkOps.getLinkCounts(
663
+ db,
664
+ output.map((n: any) => n.id),
665
+ parseLinkCountDirection(url),
666
+ );
667
+ for (const n of output) n.linkCount = counts.get(n.id) ?? 0;
668
+ }
552
669
  return json(output);
553
670
  }
554
671
 
@@ -580,6 +697,27 @@ async function handleNotesInner(
580
697
  const tags = parseQueryList(url, "tag");
581
698
  const bracket = parseMetaBrackets(url);
582
699
  if (bracket.error) return bracket.error;
700
+ // `?metadata=<json>` alias — the JSON-object form of the metadata
701
+ // filter, symmetric with the nested object MCP forwards. Before this,
702
+ // a `metadata=` param was silently dropped (the bracket grammar never
703
+ // matched it), so the query returned ALL tag-matching notes.
704
+ const metadataAlias = parseMetadataJsonAlias(url);
705
+ if (metadataAlias.error) return metadataAlias.error;
706
+ // Reject "both forms" loudly. If a caller passes BOTH the JSON
707
+ // `metadata=` param AND any `meta[...]` bracket param, there's no
708
+ // well-defined merge and silently picking a winner is exactly the
709
+ // class of bug we're fixing. Symmetric with the mixed shorthand/
710
+ // operator rejection inside parseMetaBrackets. Guard stays narrow —
711
+ // only the named `metadata` param triggers it.
712
+ if (metadataAlias.metadata && bracket.metadata) {
713
+ return json(
714
+ {
715
+ error: "pass metadata filters as either the JSON `metadata=` param or bracket `meta[field][op]=` form, not both.",
716
+ code: "INVALID_QUERY",
717
+ },
718
+ 400,
719
+ );
720
+ }
583
721
  // Opaque cursor for "since last checked" agent loops (vault#313).
584
722
  // When present, switches the response shape to {notes, next_cursor}
585
723
  // and routes through queryNotesPaged for keyset pagination. Mutually
@@ -612,7 +750,9 @@ async function handleNotesInner(
612
750
  // are present, so the filter is silently skipped on a plain
613
751
  // GET without the extension query.
614
752
  extension: parseExtensionFilter(url),
615
- metadata: bracket.metadata,
753
+ // Bracket form and JSON-alias form are mutually exclusive (guarded
754
+ // above), so at most one of these is set.
755
+ metadata: bracket.metadata ?? metadataAlias.metadata,
616
756
  // Date-range precedence chain (highest to lowest):
617
757
  // 1. Bracket-style `meta[created_at][gte]=…` (canonical).
618
758
  // 2. Flat `date_field=…&date_from=…&date_to=…` (deprecated).
@@ -677,6 +817,15 @@ async function handleNotesInner(
677
817
  }
678
818
  const depth = Math.min(parseInt10(parseQuery(url, "near[depth]")) ?? 2, 5);
679
819
  const relationship = parseQuery(url, "near[relationship]") ?? undefined;
820
+ // Tag-scope policy for `near[]` (output-filter, not hop-guard): the
821
+ // BFS walks the FULL graph from the anchor, including out-of-scope
822
+ // intermediate hops, then the RESULT set is tag-scope-filtered below
823
+ // (`filterNotesByTagScope`). No out-of-scope content or ids leak —
824
+ // out-of-scope notes never survive into the response. This is
825
+ // ASYMMETRIC with `find-path`, which guards every hop (it returns the
826
+ // path itself, so an out-of-scope intermediary would be a leak there).
827
+ // The asymmetry is deliberate; tracked at vault#439 should we ever want
828
+ // `near[]` to also constrain traversal hops.
680
829
  const traversed = linkOps.traverseLinks(db, anchor.id, { max_depth: depth, relationship });
681
830
  const nearScope = new Set([anchor.id, ...traversed.map((t) => t.noteId)]);
682
831
  results = results.filter((n) => nearScope.has(n.id));
@@ -690,9 +839,10 @@ async function handleNotesInner(
690
839
  const includeContent = parseBool(parseQuery(url, "include_content"), false);
691
840
  const includeLinks = parseBool(parseQuery(url, "include_links"), false);
692
841
  const includeAttachments = parseBool(parseQuery(url, "include_attachments"), false);
842
+ const includeLinkCount = parseBool(parseQuery(url, "include_link_count"), false);
693
843
  const inclMeta = parseIncludeMetadata(url);
694
844
  let output: any[] = includeContent ? results.map((n) => ({ ...n })) : results.map(toNoteIndex);
695
- const expand = parseExpandParams(url, db);
845
+ const expand = parseExpandParams(url, db, tagScope);
696
846
  if (expand && includeContent) {
697
847
  for (const n of output) expand.ctx.expanded.add(n.id);
698
848
  for (const n of output) {
@@ -704,6 +854,24 @@ async function handleNotesInner(
704
854
  if (inclMeta !== undefined && inclMeta !== true) {
705
855
  output = output.map((n: any) => filterMetadata(n, inclMeta));
706
856
  }
857
+ // Opt-in link degree (vault feedback #4). ONE batch count over all
858
+ // result ids — NOT a per-note query — so the field stays O(2 index
859
+ // scans) per request regardless of page size. Mutates `output` in
860
+ // place; injected on the same objects the enrichment loop below
861
+ // touches, the same way `links`/`attachments` are surfaced.
862
+ // Ordering: this runs AFTER the filterMetadata pass above on purpose —
863
+ // filterMetadata only touches the `metadata` key, so a linkCount
864
+ // injected here survives. Don't casually swap the order; injecting
865
+ // before filterMetadata would still survive today but couples the two
866
+ // to filterMetadata's current narrow behavior.
867
+ if (includeLinkCount) {
868
+ const counts = linkOps.getLinkCounts(
869
+ db,
870
+ output.map((n: any) => n.id),
871
+ parseLinkCountDirection(url),
872
+ );
873
+ for (const n of output) n.linkCount = counts.get(n.id) ?? 0;
874
+ }
707
875
 
708
876
  // Graph format — reshape into { nodes, edges }
709
877
  if (parseQuery(url, "format") === "graph") {
@@ -727,7 +895,14 @@ async function handleNotesInner(
727
895
  const enrichedOut: any[] = [];
728
896
  for (const n of output) {
729
897
  const enriched: any = { ...n };
730
- if (includeLinks) enriched.links = linkOps.getLinksHydrated(db, n.id);
898
+ if (includeLinks) {
899
+ // Tag-scope: strip out-of-scope-neighbor links (no-op unscoped).
900
+ enriched.links = filterHydratedLinksByTagScope(
901
+ linkOps.getLinksHydrated(db, n.id),
902
+ tagScope.allowed,
903
+ tagScope.raw,
904
+ );
905
+ }
731
906
  if (includeAttachments) enriched.attachments = await store.getAttachments(n.id);
732
907
  enrichedOut.push(enriched);
733
908
  }
@@ -942,22 +1117,26 @@ async function handleNotesInner(
942
1117
  return json({ error: "Method not allowed" }, 405);
943
1118
  }
944
1119
 
945
- // POST /notes/:idOrPath/retry-transcription — vault#353 design Q5.
1120
+ // POST /notes/:idOrPath/retry-transcription — vault#353 design Q5 + finding F.
946
1121
  //
947
- // Re-runs the auto-transcribe pipeline against the original audio
948
- // attachment recorded in the transcript note's `transcript_attachment_id`
949
- // frontmatter. Only valid on transcript notes (the target idOrPath must
950
- // be a transcript note with `transcript_status: "failed"`); calling on
951
- // anything else returns 400 with a clear reason.
1122
+ // Re-runs transcription against a failed audio attachment. Two target
1123
+ // shapes, dispatched in handleRetryTranscription by whether the note carries
1124
+ // `transcript_status` frontmatter:
1125
+ // - Auto-flow (vault#353): target is a `<audio>.transcript.md` note with
1126
+ // `transcript_status: "failed"`; audio located via
1127
+ // `transcript_attachment_id`, origin preserved as "auto".
1128
+ // - Legacy in-body memo (finding F): target is the memo note itself (no
1129
+ // `transcript_status`); finds its own failed attachment, resets it
1130
+ // preserving `transcribe_origin: "legacy"`, and re-arms `transcribe_stub`.
952
1131
  //
953
1132
  // Wire shape:
954
1133
  // POST .../notes/<idOrPath>/retry-transcription
955
- // → 202 { attachment_id, transcript_path } when re-enqueued
956
- // 400 invalid_target (not a transcript note)
957
- // 400 not_failed (transcript already succeeded; nothing to retry)
958
- // 404 attachment_missing (transcript_attachment_id row deleted)
1134
+ // → 202 { attachment_id, attachment_path, transcript_note_id, worker }
1135
+ // 400 not_failed (auto-flow: transcript already succeeded)
1136
+ // 400 missing_attachment_id (auto-flow: transcript_attachment_id absent)
1137
+ // 400 no_failed_attachment (legacy: no failed audio attachment to retry)
1138
+ // 404 attachment_missing (auto-flow: transcript_attachment_id row deleted)
959
1139
  // 404 audio_missing (audio file unlinked from disk)
960
- // 503 scribe_unavailable (no worker configured this boot)
961
1140
  if (sub === "/retry-transcription") {
962
1141
  if (method !== "POST") return json({ error: "Method not allowed" }, 405);
963
1142
  if (!vault) return json({ error: "Vault context required" }, 400);
@@ -980,18 +1159,26 @@ async function handleNotesInner(
980
1159
  }
981
1160
  const includeContent = parseBool(parseQuery(url, "include_content"), true);
982
1161
  let result: any = includeContent ? { ...note } : toNoteIndex(note);
983
- const expand = parseExpandParams(url, db);
1162
+ const expand = parseExpandParams(url, db, tagScope);
984
1163
  if (expand && includeContent && typeof result.content === "string") {
985
1164
  expand.ctx.expanded.add(note.id);
986
1165
  result.content = expandContent(result.content, expand.ctx, expand.depth);
987
1166
  }
988
1167
  result = filterMetadata(result, parseIncludeMetadata(url));
989
1168
  if (parseBool(parseQuery(url, "include_links"), false)) {
990
- result.links = linkOps.getLinksHydrated(db, note.id);
1169
+ // Tag-scope: drop out-of-scope-neighbor links (no-op unscoped).
1170
+ result.links = filterHydratedLinksByTagScope(
1171
+ linkOps.getLinksHydrated(db, note.id),
1172
+ tagScope.allowed,
1173
+ tagScope.raw,
1174
+ );
991
1175
  }
992
1176
  if (parseBool(parseQuery(url, "include_attachments"), false)) {
993
1177
  result.attachments = await store.getAttachments(note.id);
994
1178
  }
1179
+ if (parseBool(parseQuery(url, "include_link_count"), false)) {
1180
+ result.linkCount = linkOps.getLinkCounts(db, [note.id], parseLinkCountDirection(url)).get(note.id) ?? 0;
1181
+ }
995
1182
  return json(result);
996
1183
  }
997
1184
 
@@ -1256,7 +1443,27 @@ async function handleNotesInner(
1256
1443
  // `toNoteIndex` drops unknown fields).
1257
1444
  const updatedNote = await store.getNote(note.id);
1258
1445
  if (updatedNote === null) return json({ error: "Note disappeared" }, 404);
1259
- const validated = attachValidationStatus(store, db, updatedNote);
1446
+ const validated: any = attachValidationStatus(store, db, updatedNote);
1447
+ // Echo hydrated links when a link mutation was part of this request,
1448
+ // OR the caller explicitly asked for them via `?include_links=true`
1449
+ // (vault feedback #8). Previously the update response omitted links
1450
+ // entirely (`getNote` populates tags but not links), forcing callers
1451
+ // to re-GET with `?include_links=true` just to confirm a link they
1452
+ // had just added/removed. Additive field, scoped to UPDATE: present
1453
+ // only when mutated or requested. Mirrors the GET / query-notes
1454
+ // hydration call form exactly (`linkOps.getLinksHydrated`).
1455
+ const linkMutated = body.links?.add !== undefined || body.links?.remove !== undefined;
1456
+ const includeLinksResp = linkMutated || parseBool(parseQuery(url, "include_links"), false);
1457
+ if (includeLinksResp) {
1458
+ // Tag-scope: strip out-of-scope-neighbor links from the echoed set
1459
+ // (no-op unscoped). A write token tag-scoped to #work mustn't learn
1460
+ // about a #personal note it happened to link to.
1461
+ validated.links = filterHydratedLinksByTagScope(
1462
+ linkOps.getLinksHydrated(db, updatedNote.id),
1463
+ tagScope.allowed,
1464
+ tagScope.raw,
1465
+ );
1466
+ }
1260
1467
  const includeContentResp = body.include_content !== false;
1261
1468
  // `created: false` is appended to every update-path response so
1262
1469
  // sync-loop callers using `if_missing: "create"` can distinguish
@@ -1266,6 +1473,9 @@ async function handleNotesInner(
1266
1473
  const lean: any = toNoteIndex(validated);
1267
1474
  const vs = (validated as any).validation_status;
1268
1475
  if (vs !== undefined) lean.validation_status = vs;
1476
+ // Carry the link echo across the lean conversion — `toNoteIndex`
1477
+ // drops unknown fields, same as the `validation_status` recipe above.
1478
+ if (validated.links !== undefined) lean.links = validated.links;
1269
1479
  lean.created = false;
1270
1480
  return json(lean);
1271
1481
  } catch (e: any) {
@@ -1513,10 +1723,12 @@ export async function handleTags(
1513
1723
  parent_names?: unknown;
1514
1724
  };
1515
1725
 
1516
- // Validate relationships shape + cardinality vocabulary up front so
1517
- // a bad payload returns 400, not a thrown 500.
1726
+ // Validate the relationships payload up front so a bad payload returns
1727
+ // 400, not a thrown 500. `relationships` is an opaque vocabulary map
1728
+ // (relationship-name → arbitrary JSON the app interprets); we only check
1729
+ // that it's a JSON object (a map), then persist verbatim.
1518
1730
  let relationshipsPatch:
1519
- | Record<string, tagSchemaOps.TagRelationship>
1731
+ | tagSchemaOps.TagRelationshipMap
1520
1732
  | null
1521
1733
  | undefined;
1522
1734
  if (body.relationships === null) {
@@ -1729,12 +1941,32 @@ export async function handleVault(
1729
1941
  // Unresolved wikilinks — REST-only (admin/maintenance)
1730
1942
  // ---------------------------------------------------------------------------
1731
1943
 
1732
- export function handleUnresolvedWikilinks(req: Request, store: Store): Response {
1944
+ export function handleUnresolvedWikilinks(
1945
+ req: Request,
1946
+ store: Store,
1947
+ tagScope: TagScopeCtx = NO_TAG_SCOPE,
1948
+ ): Response {
1733
1949
  const url = new URL(req.url);
1734
1950
  const limitStr = url.searchParams.get("limit");
1735
1951
  const limit = limitStr ? parseInt(limitStr, 10) : 50;
1736
1952
  const db = (store as any).db;
1737
- return Response.json(listUnresolvedWikilinks(db, limit));
1953
+ const result = listUnresolvedWikilinks(db, limit);
1954
+
1955
+ // Unscoped token → return as-is (unchanged behavior).
1956
+ if (tagScope.raw === null) return Response.json(result);
1957
+
1958
+ // Tag-scope confidentiality (security review): each unresolved row carries
1959
+ // a `source_id` (+ `source_path`) plus the raw `target_path` wikilink
1960
+ // string. For a tag-scoped token, surface ONLY rows whose SOURCE note is
1961
+ // within the token's tag scope — otherwise we'd leak out-of-scope note IDs
1962
+ // and the wikilink target strings those notes contain. Filter the page and
1963
+ // recompute `count` from the filtered set so the aggregate total of
1964
+ // out-of-scope rows doesn't leak either.
1965
+ const filtered = result.unresolved.filter((row) => {
1966
+ const note = getNote(db, row.source_id);
1967
+ return note !== null && noteWithinTagScope(note, tagScope.allowed, tagScope.raw);
1968
+ });
1969
+ return Response.json({ unresolved: filtered, count: filtered.length });
1738
1970
  }
1739
1971
 
1740
1972
  // ---------------------------------------------------------------------------
@@ -1923,21 +2155,35 @@ ${rendered}
1923
2155
  // ---------------------------------------------------------------------------
1924
2156
 
1925
2157
  /**
1926
- * Re-enqueue the original audio attachment for a `transcript_status: failed`
1927
- * transcript note. Steps:
2158
+ * Re-enqueue the original audio attachment for a failed transcription.
1928
2159
  *
1929
- * 1. Validate target is a transcript note (`transcript_status` set in
1930
- * metadata) AND that status is `failed`.
1931
- * 2. Find the original audio attachment by id from
1932
- * `transcript_attachment_id` frontmatter. 404 if the row's gone.
1933
- * 3. Validate the audio file still exists on disk (retention=keep is
1934
- * assumed by the retry contract; retention=until_transcribed unlinks
1935
- * only on success, retention=never unlinks on failure that last one
1936
- * explicitly breaks retry, by design).
1937
- * 4. Reset `transcribe_status = "pending"`, clear backoff + error fields.
1938
- * The auto-origin marker is preserved so the worker writes a transcript
1939
- * note (overwriting this one in place).
1940
- * 5. Kick the worker if registered; otherwise the sweep picks it up.
2160
+ * Two target shapes are accepted:
2161
+ *
2162
+ * - **Auto-flow (vault#353):** the target is a `<audio>.transcript.md` note
2163
+ * carrying `transcript_status` frontmatter. Requires that status be
2164
+ * `failed`; locates the audio via `transcript_attachment_id`; preserves
2165
+ * `transcribe_origin: "auto"` so a retried success overwrites this
2166
+ * transcript note in place. Behavior is byte-identical to the original
2167
+ * vault#353 contract.
2168
+ *
2169
+ * - **Legacy in-body memo (finding F):** the target is the memo note
2170
+ * itself no `transcript_status` frontmatter. The original capture body
2171
+ * holds `![[<audio>]]` + a `_Transcription unavailable._` marker (written
2172
+ * by the worker on terminal failure). We find the note's own failed audio
2173
+ * attachment, reset it to pending **preserving `transcribe_origin:
2174
+ * "legacy"`** (forcing "auto" would switch to the sibling-transcript-note
2175
+ * shape and orphan the in-body embed), and **re-stamp `transcribe_stub:
2176
+ * true`** on the note. The stub re-arm is load-bearing: the legacy success
2177
+ * path early-returns unless `transcribe_stub === true`, so without it the
2178
+ * retried success would never write the transcript back into the body.
2179
+ * On success the worker replaces the `_Transcription unavailable._` marker
2180
+ * with the transcript in place, yielding the same final shape a first-try
2181
+ * success would.
2182
+ *
2183
+ * Common steps for both: validate the audio attachment row exists (404 if
2184
+ * gone) and its file is still on disk (404 if unlinked — e.g. retention=never
2185
+ * already dropped it), reset the transcribe_status fields, then kick the
2186
+ * worker if registered (otherwise the sweep picks it up).
1941
2187
  */
1942
2188
  async function handleRetryTranscription(
1943
2189
  store: Store,
@@ -1945,15 +2191,13 @@ async function handleRetryTranscription(
1945
2191
  vault: string,
1946
2192
  ): Promise<Response> {
1947
2193
  const meta = (note.metadata as Record<string, unknown> | undefined) ?? {};
2194
+
2195
+ // Legacy in-body memo: no `transcript_status` frontmatter. The note owns
2196
+ // the failed audio attachment directly; there's no sibling transcript note.
1948
2197
  if (typeof meta.transcript_status !== "string") {
1949
- return json(
1950
- {
1951
- error: "invalid_target",
1952
- message: "Target note is not a transcript note (no transcript_status frontmatter).",
1953
- },
1954
- 400,
1955
- );
2198
+ return handleRetryLegacyInBody(store, note, vault);
1956
2199
  }
2200
+
1957
2201
  if (meta.transcript_status !== "failed") {
1958
2202
  return json(
1959
2203
  {
@@ -2040,13 +2284,171 @@ async function handleRetryTranscription(
2040
2284
  );
2041
2285
  }
2042
2286
 
2287
+ /**
2288
+ * Retry path for a legacy in-body voice memo (finding F). The target note is
2289
+ * the memo itself (no `transcript_status` frontmatter); it directly owns the
2290
+ * audio attachment whose transcription failed.
2291
+ *
2292
+ * Steps:
2293
+ * 1. Find the note's own audio attachment with `transcribe_status ===
2294
+ * "failed"`. 400 `no_failed_attachment` if none — there's nothing to
2295
+ * retry.
2296
+ * 2. Validate the audio file is still on disk (404 `audio_missing`).
2297
+ * 3. Reset the attachment to pending, **preserving `transcribe_origin:
2298
+ * "legacy"`** (never force "auto" — that switches to the sibling-
2299
+ * transcript-note shape and orphans the in-body `![[memo]]` embed).
2300
+ * 4. **Re-stamp `transcribe_stub: true`** on the note. The legacy worker
2301
+ * success path early-returns unless the note carries this flag (it was
2302
+ * cleared when the failure marker was written), so re-arming it is what
2303
+ * lets the retried success replace the `_Transcription unavailable._`
2304
+ * marker with the transcript.
2305
+ * 5. Kick the worker if registered; otherwise the sweep picks it up.
2306
+ */
2307
+ async function handleRetryLegacyInBody(
2308
+ store: Store,
2309
+ note: Note,
2310
+ vault: string,
2311
+ ): Promise<Response> {
2312
+ const attachments = await store.getAttachments(note.id);
2313
+ const failed = attachments.find((a) => {
2314
+ const m = (a.metadata as Record<string, unknown> | undefined) ?? {};
2315
+ return m.transcribe_status === "failed";
2316
+ });
2317
+ if (!failed) {
2318
+ return json(
2319
+ {
2320
+ error: "no_failed_attachment",
2321
+ message:
2322
+ "Target note is not a transcript note and has no audio attachment with a failed transcription to retry.",
2323
+ },
2324
+ 400,
2325
+ );
2326
+ }
2327
+
2328
+ // Audio file existence + safety: defense-in-depth against a bad attachment
2329
+ // row pointing outside the vault assets dir. Same guard as the worker.
2330
+ const assetsRoot = assetsDir(vault);
2331
+ const audioFilePath = normalize(join(assetsRoot, failed.path));
2332
+ if (!audioFilePath.startsWith(normalize(assetsRoot)) || !existsSync(audioFilePath)) {
2333
+ return json(
2334
+ {
2335
+ error: "audio_missing",
2336
+ message: `Original audio file at "${failed.path}" no longer exists on disk.`,
2337
+ },
2338
+ 404,
2339
+ );
2340
+ }
2341
+
2342
+ // Reset the attachment back to pending. Preserve `transcribe_origin:
2343
+ // "legacy"` (a default of `undefined` is also legacy, but make it explicit
2344
+ // so a retried row reads unambiguously) — forcing "auto" here would make
2345
+ // the worker materialize a sibling transcript note instead of patching the
2346
+ // in-body embed, orphaning the memo.
2347
+ const attMeta = { ...(failed.metadata ?? {}) } as Record<string, unknown>;
2348
+ attMeta.transcribe_status = "pending";
2349
+ attMeta.transcribe_requested_at = new Date().toISOString();
2350
+ attMeta.transcribe_origin = "legacy";
2351
+ delete attMeta.transcribe_backoff_until;
2352
+ delete attMeta.transcribe_error;
2353
+ delete attMeta.transcribe_error_code;
2354
+ delete attMeta.transcribe_attempts;
2355
+ await store.setAttachmentMetadata(failed.id, attMeta);
2356
+
2357
+ // Re-arm the stub on the note. The worker's legacy success path gates on
2358
+ // `transcribe_stub === true` and CLEARED it when it wrote the failure
2359
+ // marker; without re-stamping it the retried success early-returns and
2360
+ // never writes the transcript back into the body. Use skipUpdatedAt so the
2361
+ // note's modification time still reflects user intent, matching the
2362
+ // worker's own note writes.
2363
+ //
2364
+ // OC-guarded (vault#435): this read-transform-write would otherwise clobber
2365
+ // a user edit landing between `resolveNote` (above) and this write. Thread
2366
+ // `if_updated_at` and retry once on conflict — re-read, re-apply the
2367
+ // metadata-only re-stamp against the fresh note, write with the fresh
2368
+ // precondition. A second conflict surfaces as 409 so the user can retry.
2369
+ // Only the `transcribe_stub` flag is stamped (never content), so re-applying
2370
+ // against the fresh note is always the correct surgical transform.
2371
+ const restampStub = (current: Note): Record<string, unknown> => ({
2372
+ ...((current.metadata as Record<string, unknown> | undefined) ?? {}),
2373
+ transcribe_stub: true,
2374
+ });
2375
+ try {
2376
+ try {
2377
+ await store.updateNote(note.id, {
2378
+ metadata: restampStub(note),
2379
+ skipUpdatedAt: true,
2380
+ if_updated_at: note.updatedAt,
2381
+ });
2382
+ } catch (err: any) {
2383
+ if (!err || err.code !== "CONFLICT") throw err;
2384
+ // Conflict — re-read, re-apply the stub re-stamp, retry once.
2385
+ const fresh = await store.getNote(note.id);
2386
+ if (!fresh) {
2387
+ return json(
2388
+ { error: "note_missing", message: "Target note disappeared during retry." },
2389
+ 404,
2390
+ );
2391
+ }
2392
+ await store.updateNote(fresh.id, {
2393
+ metadata: restampStub(fresh),
2394
+ skipUpdatedAt: true,
2395
+ if_updated_at: fresh.updatedAt,
2396
+ });
2397
+ }
2398
+ } catch (err: any) {
2399
+ if (err && err.code === "CONFLICT") {
2400
+ // Double conflict — the note kept changing under us. It's a user-facing
2401
+ // request; return 409 so the caller can retry against fresh state. The
2402
+ // attachment was already reset to pending above; a successful re-stamp
2403
+ // on the user's next retry (or the next sweep, if they re-arm the stub)
2404
+ // will let the worker patch the transcript in.
2405
+ return json(
2406
+ {
2407
+ error_type: "conflict",
2408
+ error: "conflict",
2409
+ note_id: note.id,
2410
+ current_updated_at: err.current_updated_at ?? null,
2411
+ your_updated_at: err.expected_updated_at,
2412
+ // Mirror the standard PATCH 409 shape (see the notes-update handler
2413
+ // above) — both `your_updated_at` and `expected_updated_at` carry the
2414
+ // same value, kept for shape-congruence with existing callers.
2415
+ expected_updated_at: err.expected_updated_at,
2416
+ message:
2417
+ "Note was modified concurrently while arming the retry; re-fetch and try again.",
2418
+ },
2419
+ 409,
2420
+ );
2421
+ }
2422
+ throw err;
2423
+ }
2424
+
2425
+ const { getTranscriptionWorker } = await import("./transcription-registry.ts");
2426
+ const worker = getTranscriptionWorker();
2427
+ if (worker) {
2428
+ const fresh = await store.getAttachment(failed.id) ?? failed;
2429
+ void worker.kick(vault, fresh);
2430
+ }
2431
+
2432
+ return json(
2433
+ {
2434
+ status: "queued",
2435
+ attachment_id: failed.id,
2436
+ attachment_path: failed.path,
2437
+ transcript_note_id: note.id,
2438
+ worker: worker ? "kicked" : "sweep-only",
2439
+ },
2440
+ 202,
2441
+ );
2442
+ }
2443
+
2043
2444
  // ---------------------------------------------------------------------------
2044
2445
  // Storage (file upload/serve) — kept as-is, Daily needs it
2446
+ //
2447
+ // `assetsDir` moved to config.ts (next to the other path helpers) to break the
2448
+ // usage.ts↔routes.ts import cycle; it's re-exported from this module's top so
2449
+ // existing importers are unaffected.
2045
2450
  // ---------------------------------------------------------------------------
2046
2451
 
2047
- export function assetsDir(vault: string): string {
2048
- return process.env.ASSETS_DIR ?? join(vaultDir(vault), "assets");
2049
- }
2050
2452
  const MAX_UPLOAD_BYTES = 100 * 1024 * 1024; // 100MB
2051
2453
 
2052
2454
  // Storage allowlist policy:
@@ -2112,10 +2514,41 @@ export async function handleStorage(
2112
2514
  const relativePath = `${date}/${filename}`;
2113
2515
  const mimeType = MIME_TYPES[ext] ?? "application/octet-stream";
2114
2516
 
2517
+ // Invalidate the usage dir-walk cache for this vault — the new attachment
2518
+ // changed the assets-directory footprint, so the next /.parachute/usage
2519
+ // read must re-walk rather than report a stale (smaller) number. Without
2520
+ // this hook the cache's 60s TTL would briefly under-report after an
2521
+ // upload. (usage.ts:invalidateUsageCache is a no-op-cheap map delete.)
2522
+ invalidateUsageCache(vault);
2523
+
2115
2524
  return json({ path: relativePath, size: buffer.length, mimeType }, 201);
2116
2525
  }
2117
2526
 
2118
- const fileMatch = path.match(/^\/([^/]+)\/(.+)$/);
2527
+ // Decode percent-encoding BEFORE matching. `path` arrives from
2528
+ // `url.pathname`, which (per WHATWG) keeps an encoded `%2F` slash literal —
2529
+ // so a caller requesting `/api/storage/<date>%2F<file>` would never satisfy
2530
+ // the literal-slash match below and would fall through to the 404. Decoding
2531
+ // first accepts both the literal-slash and `%2F`-encoded forms, and yields
2532
+ // the literal-slash path that the DB stores (`${date}/${filename}`) so the
2533
+ // tag-scope reverse-lookup matches. This intentionally diverges from the
2534
+ // single-note routes, which decode their *first* segment only and therefore
2535
+ // REQUIRE `%2F` for slashes-in-an-id — a trap-grade asymmetry we accept here
2536
+ // because storage paths are always multi-segment date/file pairs.
2537
+ //
2538
+ // Guard-safety (verified): the traversal guard below operates on the
2539
+ // post-`normalize(join())` filesystem path, so a decoded `..` is still
2540
+ // caught → 403. Decode is idempotent for today's unencoded callers
2541
+ // (filenames are `<Date.now()>-<uuid>.<ext>` — no stray `%`). A malformed
2542
+ // `%` (e.g. `2026%2`) throws → 404, consistent with the no-existence-oracle
2543
+ // stance (and an improvement over the prior catch-all 500).
2544
+ let decodedPath: string;
2545
+ try {
2546
+ decodedPath = decodeURIComponent(path);
2547
+ } catch {
2548
+ return json({ error: "Not found" }, 404);
2549
+ }
2550
+
2551
+ const fileMatch = decodedPath.match(/^\/([^/]+)\/(.+)$/);
2119
2552
  if (req.method === "GET" && fileMatch) {
2120
2553
  const reqPath = `${fileMatch[1]}/${fileMatch[2]}`;
2121
2554
  const filePath = normalize(join(assets, reqPath));