@openparachute/vault 0.5.1-rc.2 → 0.5.2-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/src/mcp.ts CHANGED
@@ -99,13 +99,31 @@ function removeWikilinkBrackets(content: string, targetPath: string): string {
99
99
  // Tool generation
100
100
  // ---------------------------------------------------------------------------
101
101
 
102
+ /**
103
+ * Options for {@link generateMcpTools}.
104
+ *
105
+ * `expandVisibility` (vault security review) is an OPTIONAL per-note
106
+ * visibility predicate threaded into the wikilink-expansion context for
107
+ * `query-notes`. When provided, `expand_links` inlining leaves any wikilink
108
+ * whose target fails the predicate UNRESOLVED — so a tag-scoped MCP session
109
+ * can't inline out-of-scope note content during expansion (the filtering
110
+ * happens DURING expansion, not after). Core stays scope-unaware: it
111
+ * receives a plain `(note) => boolean` closure and never imports the
112
+ * server's tag-scope module. Omitted (every internal / unscoped caller) →
113
+ * expansion behaves exactly as before.
114
+ */
115
+ export interface GenerateMcpToolsOpts {
116
+ expandVisibility?: (note: Note) => boolean;
117
+ }
118
+
102
119
  /**
103
120
  * Generate the consolidated MCP tools for a vault. Surface (10):
104
121
  * query-notes, create-note, update-note, delete-note, list-tags, update-tag,
105
122
  * delete-tag, find-path, vault-info, prune-schema (admin).
106
123
  */
107
- export function generateMcpTools(store: Store): McpToolDef[] {
124
+ export function generateMcpTools(store: Store, opts?: GenerateMcpToolsOpts): McpToolDef[] {
108
125
  const db: Database = (store as any).db;
126
+ const expandVisibility = opts?.expandVisibility;
109
127
 
110
128
  return [
111
129
 
@@ -178,7 +196,7 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
178
196
  type: "object",
179
197
  description: "Filter by metadata values. Each value is either a primitive (exact match, scans JSON) or an operator object: `{eq|ne|gt|gte|lt|lte|in|not_in|exists: value}`. Operator objects require the field to be declared `indexed: true` in a tag schema — they route through the backing B-tree index. Multiple operators on one field AND together (e.g. `{gt: 5, lt: 10}`). `in`/`not_in` take arrays; `exists` takes a boolean.",
180
198
  },
181
- order_by: { type: "string", description: "Sort by an indexed metadata field instead of `created_at`. Field must be declared `indexed: true`; errors otherwise. Direction is taken from `sort` (default 'asc'); `created_at` is appended as a stable tiebreaker." },
199
+ order_by: { type: "string", description: "Sort by an indexed metadata field instead of `created_at`. Field must be declared `indexed: true`; errors otherwise. The special value `link_count` sorts by link DEGREE (both-directions raw row count) — no declaration needed — matching the `include_link_count` field for every note. Direction is taken from `sort` (default 'asc'); `created_at` is appended as a stable tiebreaker." },
182
200
  date_from: { type: "string", description: "Start date (ISO, inclusive). Filters on `created_at` (vault ingestion time). Shorthand for `date_filter: { field: 'created_at', from }`." },
183
201
  date_to: { type: "string", description: "End date (ISO, exclusive). Filters on `created_at` (vault ingestion time). Shorthand for `date_filter: { field: 'created_at', to }`." },
184
202
  date_filter: {
@@ -217,6 +235,17 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
217
235
  description: "Control metadata in response: true (all, default), false (none), or array of field names to include",
218
236
  },
219
237
  include_links: { type: "boolean", description: "Include inbound + outbound links per note (default: false)" },
238
+ include_link_count: {
239
+ type: "boolean",
240
+ description:
241
+ "Include the note's link DEGREE as a `linkCount` field, without hauling the link objects (default: false). Degree is a raw row count: outbound (source) + inbound (target). A self-loop counts as 2. Cheap COUNT over indexes; batched once per request. For a tag-scoped token, `linkCount` is the raw degree and MAY include edges to notes the token can't see — only the number leaks, not the neighbor.",
242
+ },
243
+ link_count_direction: {
244
+ type: "string",
245
+ enum: ["both", "outbound", "inbound"],
246
+ description:
247
+ "Which edges `include_link_count` counts: both (default), outbound only (source_id), or inbound only (target_id). order_by=link_count always uses the both-directions degree.",
248
+ },
220
249
  include_attachments: { type: "boolean", description: "Include attachment records (default: false)" },
221
250
  expand_links: { type: "boolean", description: "Inline [[wikilinks]] in returned content (default: false). Has no effect if content is not included (e.g., default list mode with include_content=false); wikilinks inside fenced or inline code are not expanded." },
222
251
  expand_depth: { type: "number", description: "Recursion depth for link expansion (default 1, max 3). Only meaningful in 'full' mode — 'summary' mode does not recurse." },
@@ -235,7 +264,16 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
235
264
  ),
236
265
  );
237
266
  const expandCtx: ExpandContext | null = expandLinks
238
- ? { db, mode: expandMode, expanded: new Set() }
267
+ ? {
268
+ db,
269
+ mode: expandMode,
270
+ expanded: new Set(),
271
+ // Tag-scope confidentiality (security review): when a visibility
272
+ // predicate was injected, wikilinks to out-of-scope notes are
273
+ // left unresolved DURING inlining — never embedded. Unscoped
274
+ // callers pass no predicate and inlining is unchanged.
275
+ ...(expandVisibility ? { isVisible: expandVisibility } : {}),
276
+ }
239
277
  : null;
240
278
 
241
279
  // --- Single note by ID/path ---
@@ -256,10 +294,26 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
256
294
  if (params.include_attachments) {
257
295
  result.attachments = await store.getAttachments(note.id);
258
296
  }
297
+ // linkCount injected after filterMetadata on purpose — same as
298
+ // links/attachments above; filterMetadata only touches `metadata`.
299
+ if (params.include_link_count) {
300
+ const dir = normalizeLinkCountDirection(params.link_count_direction);
301
+ result.linkCount = linkOps.getLinkCounts(db, [note.id], dir).get(note.id) ?? 0;
302
+ }
259
303
  return result;
260
304
  }
261
305
 
262
306
  // --- Build near-scope (graph-filtered set of allowed IDs) ---
307
+ //
308
+ // Tag-scope policy for `near[]` (output-filter, not hop-guard): core
309
+ // is scope-unaware, so this BFS walks the FULL graph from the anchor —
310
+ // including out-of-scope intermediate hops. For a tag-scoped session
311
+ // the server's `applyTagScopeWrappers` (mcp-tools.ts) tag-filters the
312
+ // RESULT list AFTER execute, so out-of-scope notes never survive into
313
+ // the response — no content/ids leak. This is ASYMMETRIC with
314
+ // `find-path`, which guards every hop (it returns the path itself, so
315
+ // an out-of-scope intermediary would be a leak there). The asymmetry is
316
+ // deliberate; tracked at vault#439.
263
317
  let nearScope: Set<string> | null = null;
264
318
  if (params.near) {
265
319
  const near = params.near as { note_id: string; depth?: number; relationship?: string };
@@ -390,6 +444,19 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
390
444
  output = output.map((n: any) => filterMetadata(n, includeMetadata));
391
445
  }
392
446
 
447
+ // --- Opt-in link degree (vault feedback #4) ---
448
+ // ONE batch count over all result ids (NOT per-note), so the field
449
+ // stays O(2 index scans) per request regardless of page size.
450
+ // Injected on the same objects the enrichment loop copies below.
451
+ // Ordering: runs AFTER the filterMetadata pass above on purpose —
452
+ // filterMetadata only touches the `metadata` key, so linkCount
453
+ // survives. Don't casually swap the order.
454
+ if (params.include_link_count) {
455
+ const dir = normalizeLinkCountDirection(params.link_count_direction);
456
+ const counts = linkOps.getLinkCounts(db, output.map((n: any) => n.id), dir);
457
+ for (const n of output) n.linkCount = counts.get(n.id) ?? 0;
458
+ }
459
+
393
460
  // --- Hydrate links/attachments per note if requested ---
394
461
  if (params.include_links || params.include_attachments) {
395
462
  const enrichedOut: any[] = [];
@@ -543,7 +610,7 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
543
610
  - \`links: { add: [{ target, relationship }], remove: [{ target, relationship }] }\` — add/remove links
544
611
  - When removing a wikilink-type link, \`[[brackets]]\` are also removed from content.
545
612
  - For batch: pass a \`notes\` array, each with an \`id\` field.
546
- - **Optimistic concurrency is required by default.** Pass \`if_updated_at\` with the \`updated_at\` value you last read — the update is rejected with a conflict error if the note has changed since. Re-read, reconcile, and retry. To skip the safety check (e.g. bulk migration), pass \`force: true\` instead; the update then runs unconditionally. \`append\` / \`prepend\` only updates are exempt from the precondition (no-conflict-by-design).
613
+ - **Optimistic concurrency is required by default.** Pass \`if_updated_at\` with the \`updated_at\` value you last read — the update is rejected with a conflict error if the note has changed since. Re-read, reconcile, and retry. To skip the safety check (e.g. bulk migration), pass \`force: true\` instead; the update then runs unconditionally. \`force\` only waives the *requirement to supply* \`if_updated_at\` — if you pass both, the precondition you supplied still applies and a mismatch returns a conflict error. \`append\` / \`prepend\` only updates are exempt from the precondition (no-conflict-by-design).
547
614
  - **Idempotent upsert via \`if_missing: "create"\`** — when the note doesn't exist, create it from this same payload (content/path/tags/metadata become the create fields; OC precondition skipped — nothing to conflict with). Response carries \`created: true\`. Useful for nightly sync loops that don't know ahead of time whether the note exists. Default \`"fail"\` (current behavior — missing note errors). See vault#309.
548
615
  - \`include_content\` (default \`true\`) — set \`false\` to receive a lean index shape (\`id\`, \`path\`, \`createdAt\`, \`updatedAt\`, \`tags\`, \`metadata\`, \`byteSize\`, \`preview\`) instead of full content. Useful for agents making frequent small edits to large notes (e.g. via \`append\` or \`content_edit\`) where re-receiving the body is the dominant cost. \`validation_status\` is preserved on the lean shape when present.`,
549
616
  inputSchema: {
@@ -567,7 +634,7 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
567
634
  metadata: { type: "object", description: "Metadata to merge (keys are merged, not replaced wholesale)" },
568
635
  created_at: { type: "string", description: "New created_at timestamp" },
569
636
  if_updated_at: { type: "string", description: "Optimistic concurrency check: the updated_at value you last read. Rejects with a conflict error if the note has been modified since. Required unless `force: true` is set or the call is `append`/`prepend`-only." },
570
- force: { type: "boolean", description: "Override the required `if_updated_at` check and run the update unconditionally. Use only for bulk migrations or scripted writes where concurrency is known-safe." },
637
+ force: { type: "boolean", description: "Waive the *requirement to supply* `if_updated_at` and run the update unconditionally. Use only for bulk migrations or scripted writes where concurrency is known-safe. Note: this does not override an `if_updated_at` you actually pass — if you supply both, the precondition still applies and a mismatch returns a conflict error." },
571
638
  if_missing: { type: "string", enum: ["fail", "create"], description: "What to do when the note (by `id`/path) doesn't exist. `\"fail\"` (default) — error, current behavior. `\"create\"` — create the note from this same payload (content/path/tags/metadata become the create fields; the response carries `created: true`). Skips the `if_updated_at` precondition on the create branch (nothing to conflict with). Idempotent for sync loops that don't know ahead of time whether the note exists. See vault#309." },
572
639
  tags: {
573
640
  type: "object",
@@ -609,6 +676,10 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
609
676
  type: "boolean",
610
677
  description: "Response shape opt-out. Default `true` (returns the full Note with content). Set `false` to receive the lean index shape (drops `content`, adds `byteSize` and a whitespace-collapsed `preview`). `validation_status` is preserved on the lean shape when present. Applies uniformly to single and batch responses.",
611
678
  },
679
+ include_links: {
680
+ type: "boolean",
681
+ description: "Echo the note's hydrated inbound + outbound links on the response (vault feedback #8). Links are *also* echoed automatically whenever the update itself mutated links (`links.add`/`links.remove`), so you rarely need to set this — its purpose is to fetch the current link set on an update that didn't touch links. Default: `false` (and absent from the response unless mutated or requested). Mirrors `query-notes`'s `include_links`. This top-level flag applies to the single-note form only; for a batch, set `include_links` on each note object in `notes` (a top-level `include_links` is ignored when `notes` is present).",
682
+ },
612
683
  // Batch
613
684
  notes: {
614
685
  type: "array",
@@ -632,10 +703,11 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
632
703
  metadata: { type: "object" },
633
704
  created_at: { type: "string" },
634
705
  if_updated_at: { type: "string", description: "Optimistic concurrency check for this item; rejects with a conflict error if the note has been modified since. Required unless `force: true` is set on this item or the item is `append`/`prepend`-only." },
635
- force: { type: "boolean", description: "Override the required `if_updated_at` check for this item." },
706
+ force: { type: "boolean", description: "Waive the *requirement to supply* `if_updated_at` for this item. Does not override an `if_updated_at` you actually pass — a supplied precondition still applies and a mismatch conflicts." },
636
707
  if_missing: { type: "string", enum: ["fail", "create"], description: "Per-item: see top-level `if_missing` docs. Each batch item carries its own setting." },
637
708
  tags: { type: "object" },
638
709
  links: { type: "object" },
710
+ include_links: { type: "boolean", description: "Per-item: echo hydrated links on this item's response (vault feedback #8). Also implied when this item mutates links." },
639
711
  },
640
712
  required: ["id"],
641
713
  },
@@ -657,6 +729,15 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
657
729
  // sync-loop caller (Gitcoin Brain et al) reads this to know which
658
730
  // path fired without doing a separate query. vault#309.
659
731
  const createdIds = new Set<string>();
732
+ // Track which note IDs should echo hydrated links on the response.
733
+ // A note qualifies when this request mutated its links
734
+ // (`links.add`/`links.remove`) OR the caller set `include_links`.
735
+ // vault feedback #8 — previously the update response omitted links
736
+ // entirely, forcing a re-query just to confirm a link the caller had
737
+ // just added/removed. Per-item on batch. Note IDs (not item indices)
738
+ // key this so the create-on-missing branch, which assigns the id
739
+ // late, can register correctly.
740
+ const echoLinkIds = new Set<string>();
660
741
  // Wrap multi-item batches in a SQLite transaction so any mid-batch
661
742
  // failure (precondition error, content_edit miss, ConflictError, …)
662
743
  // rolls back every prior mutation in the batch — see #236.
@@ -745,6 +826,11 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
745
826
  const fresh = noteOps.getNote(db, created.id) ?? created;
746
827
  updated.push(fresh);
747
828
  createdIds.add(fresh.id);
829
+ // Echo links if this create-on-missing declared `links.add`
830
+ // (the only link op honored on create) or asked explicitly.
831
+ if (linksAdd !== undefined || item.include_links === true) {
832
+ echoLinkIds.add(fresh.id);
833
+ }
748
834
  continue;
749
835
  }
750
836
  // Fallthrough: not-found + no if_missing → existing error
@@ -907,6 +993,13 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
907
993
  }
908
994
  }
909
995
 
996
+ // Echo links if this update mutated them (`links.add`/`links.remove`)
997
+ // or the caller asked explicitly. vault feedback #8.
998
+ const linkMutated = (item.links as any)?.add !== undefined || (item.links as any)?.remove !== undefined;
999
+ if (linkMutated || item.include_links === true) {
1000
+ echoLinkIds.add(note.id);
1001
+ }
1002
+
910
1003
  // Re-read for final state
911
1004
  updated.push(noteOps.getNote(db, note.id) ?? result);
912
1005
  }
@@ -929,11 +1022,23 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
929
1022
  const final = updated.map((n) => {
930
1023
  const validated = attachValidationStatus(store, db, n);
931
1024
  const created = createdIds.has(n.id);
932
- if (includeContent) return { ...validated, created } as Note & { created: boolean };
1025
+ // Echo hydrated links when this note was flagged for it (mutated
1026
+ // its links or `include_links` was set). Additive key, present only
1027
+ // when triggered — mirrors the GET / query-notes shape exactly via
1028
+ // the shared `linkOps.getLinksHydrated` call. vault feedback #8.
1029
+ const echoLinks = echoLinkIds.has(n.id);
1030
+ if (includeContent) {
1031
+ const full: any = { ...validated, created };
1032
+ if (echoLinks) full.links = linkOps.getLinksHydrated(db, n.id);
1033
+ return full as Note & { created: boolean };
1034
+ }
933
1035
  const lean: any = noteOps.toNoteIndex(validated);
934
1036
  const vs = (validated as any).validation_status;
935
1037
  if (vs !== undefined) lean.validation_status = vs;
936
1038
  lean.created = created;
1039
+ // Carry the link echo across the lean conversion — `toNoteIndex`
1040
+ // drops unknown fields.
1041
+ if (echoLinks) lean.links = linkOps.getLinksHydrated(db, n.id);
937
1042
  return lean;
938
1043
  });
939
1044
  return batch ? final : final[0];
@@ -1029,7 +1134,7 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
1029
1134
  {
1030
1135
  name: "update-tag",
1031
1136
  requiredVerb: "write",
1032
- description: "Create or update a tag's identity row: description, indexed-field schemas, typed-link relationships, and hierarchy parents. If the tag doesn't exist, it's created. Fields are merged (new keys added, existing keys replaced); relationships and parent_names are replaced wholesale when provided. Pass null for fields/relationships/parent_names to clear that column. See parachute-patterns/patterns/tag-data-model.md.",
1137
+ description: "Create or update a tag's identity row: description, indexed-field schemas, relationship-vocabulary map, and hierarchy parents. If the tag doesn't exist, it's created. Fields are merged (new keys added, existing keys replaced); relationships and parent_names are replaced wholesale when provided. Pass null for fields/relationships/parent_names to clear that column. See parachute-patterns/patterns/tag-data-model.md.",
1033
1138
  inputSchema: {
1034
1139
  type: "object",
1035
1140
  properties: {
@@ -1051,16 +1156,8 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
1051
1156
  },
1052
1157
  relationships: {
1053
1158
  type: "object",
1054
- description: 'Typed-link declarations. Each value declares { target_tag, cardinality, description? }. Cardinality is one of: one | optional | many | many-required. Phase 1: informational, not enforced at write time. E.g., { "lives_in": { "target_tag": "place", "cardinality": "one" } }',
1055
- additionalProperties: {
1056
- type: "object",
1057
- properties: {
1058
- target_tag: { type: "string", description: "Tag the relationship points at" },
1059
- cardinality: { type: "string", enum: ["one", "optional", "many", "many-required"], description: "How many targets this relationship may have" },
1060
- description: { type: "string", description: "Why this relationship exists; surfaced to AI clients" },
1061
- },
1062
- required: ["target_tag", "cardinality"],
1063
- },
1159
+ description: 'Opaque relationship-vocabulary map: keys are relationship names, values are arbitrary JSON the declaring app interprets. Vault stores and returns the values verbatim and does NOT enforce any inner shape — only that this is a JSON object (a map), not an array or primitive. Replaces any prior map wholesale when provided; pass null to clear. The historical typed shape { "lives_in": { "target_tag": "place", "cardinality": "one" } } is still a valid value, as is any app-defined shape e.g. { "works-on": { "from": "person", "to": "project" } }.',
1160
+ additionalProperties: true,
1064
1161
  },
1065
1162
  parent_names: {
1066
1163
  type: "array",
@@ -1124,10 +1221,11 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
1124
1221
  }
1125
1222
  }
1126
1223
 
1127
- // ---- relationships: replace wholesale when provided. Validate
1128
- // shape + cardinality vocabulary before persisting so a malformed
1129
- // payload can't leave the row in an inconsistent state.
1130
- let relationshipsPatch: Record<string, tagSchemaOps.TagRelationship> | null | undefined;
1224
+ // ---- relationships: replace wholesale when provided. `relationships`
1225
+ // is an opaque vocabulary map (relationship-name arbitrary JSON the
1226
+ // app interprets). Validate only that it's a JSON object (a map), then
1227
+ // persist verbatim no inner-shape enforcement.
1228
+ let relationshipsPatch: tagSchemaOps.TagRelationshipMap | null | undefined;
1131
1229
  if (params.relationships === null) {
1132
1230
  relationshipsPatch = null;
1133
1231
  } else if (params.relationships !== undefined) {
@@ -1382,6 +1480,16 @@ function normalizeTags(tag: unknown): string[] | undefined {
1382
1480
  return [tag as string];
1383
1481
  }
1384
1482
 
1483
+ /**
1484
+ * Coerce the `link_count_direction` MCP param to a known value, defaulting
1485
+ * to "both" (matches the REST `parseLinkCountDirection` fallback). A typo
1486
+ * silently degrades to the documented default rather than erroring.
1487
+ */
1488
+ function normalizeLinkCountDirection(v: unknown): "both" | "outbound" | "inbound" {
1489
+ if (v === "outbound" || v === "inbound") return v;
1490
+ return "both";
1491
+ }
1492
+
1385
1493
  // Re-exported for backward compat; defined in notes.ts alongside the
1386
1494
  // conditional-UPDATE implementation that raises it. AmbiguousPathError
1387
1495
  // joins the set (vault#331 N2) so external callers can `instanceof`
package/core/src/notes.ts CHANGED
@@ -736,6 +736,32 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
736
736
  // be at the mercy of SQLite's row order and the next page could
737
737
  // miss or duplicate one.
738
738
  orderBy = "n.updated_at ASC, n.id ASC";
739
+ } else if (opts.orderBy === "link_count") {
740
+ // `link_count` is a pseudo-field — like `created_at`/`updated_at` in the
741
+ // dateFilter block above, it bypasses `requireIndexedField` (it's not a
742
+ // metadata column). Sort by link DEGREE using the SAME directional-sum
743
+ // definition as the `linkCount` response field (see `getLinkCounts` in
744
+ // links.ts): two correlated COUNT subqueries summed. This MUST stay a
745
+ // sum of two directional counts — a single
746
+ // `COUNT(*) ... WHERE source_id=n.id OR target_id=n.id` would count a
747
+ // self-loop ONCE (degree 1) and DIVERGE from the field's degree-2. Both
748
+ // subqueries ride the existing `idx_links_source` / `idx_links_target`
749
+ // B-trees. `created_at` stays the stable tiebreaker.
750
+ //
751
+ // Always the both-directions degree — inbound-only ordering is a future
752
+ // extension and is not built here.
753
+ //
754
+ // Perf caveat: these are correlated subqueries, evaluated once per
755
+ // candidate row. At small-to-moderate vault sizes (tens of thousands of
756
+ // notes) that's fine — each subquery is an O(log n) index probe. At very
757
+ // large vault sizes the per-row scan cost grows; the upgrade path is a
758
+ // maintained `link_count` counter column on `notes`, incremented in
759
+ // `createLink` and decremented in `deleteLink`, then ordered directly.
760
+ // NOT built now — flagged so a future contributor sees the lever.
761
+ orderBy =
762
+ `((SELECT COUNT(*) FROM links WHERE source_id = n.id) ` +
763
+ `+ (SELECT COUNT(*) FROM links WHERE target_id = n.id)) ${direction}, ` +
764
+ `n.created_at ${direction}`;
739
765
  } else if (opts.orderBy) {
740
766
  requireIndexedField(db, opts.orderBy);
741
767
  // `orderBy` came from indexed_fields (validated on declaration), so
@@ -1553,6 +1579,15 @@ export function getVaultStats(
1553
1579
  const linkCountRow = db.prepare("SELECT COUNT(*) as c FROM links").get() as { c: number };
1554
1580
  const linkCount = linkCountRow.c;
1555
1581
 
1582
+ // Total content bytes. CAST(content AS BLOB) forces SQLite's LENGTH() to
1583
+ // count UTF-8 BYTES rather than characters (bare LENGTH on TEXT returns a
1584
+ // char count, which undercounts multibyte content). COALESCE because SUM
1585
+ // over zero rows is NULL. See VaultStats.contentBytes for the rationale.
1586
+ const contentBytesRow = db
1587
+ .prepare("SELECT COALESCE(SUM(LENGTH(CAST(content AS BLOB))), 0) as b FROM notes")
1588
+ .get() as { b: number };
1589
+ const contentBytes = contentBytesRow.b;
1590
+
1556
1591
  return {
1557
1592
  totalNotes,
1558
1593
  earliestNote: earliestRow
@@ -1566,6 +1601,7 @@ export function getVaultStats(
1566
1601
  tagCount,
1567
1602
  attachmentCount,
1568
1603
  linkCount,
1604
+ contentBytes,
1569
1605
  };
1570
1606
  }
1571
1607