@openparachute/vault 0.4.7-rc.2 → 0.4.8-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.parachute/module.json +1 -1
  2. package/README.md +78 -41
  3. package/core/src/connection-pragmas.test.ts +232 -0
  4. package/core/src/core.test.ts +257 -0
  5. package/core/src/cursor.test.ts +160 -0
  6. package/core/src/cursor.ts +272 -0
  7. package/core/src/mcp.ts +51 -7
  8. package/core/src/notes.ts +164 -2
  9. package/core/src/schema.ts +106 -5
  10. package/core/src/store.ts +11 -1
  11. package/core/src/types.ts +32 -0
  12. package/package.json +7 -3
  13. package/src/auth-status.ts +4 -0
  14. package/src/auth.test.ts +5 -112
  15. package/src/auto-transcribe.test.ts +116 -0
  16. package/src/auto-transcribe.ts +48 -0
  17. package/src/backup.ts +17 -3
  18. package/src/cli.ts +95 -66
  19. package/src/config.test.ts +26 -0
  20. package/src/config.ts +53 -1
  21. package/src/db.ts +15 -2
  22. package/src/export-watch.test.ts +21 -0
  23. package/src/mcp-install-interactive.test.ts +23 -2
  24. package/src/mcp-install-interactive.ts +21 -2
  25. package/src/mcp-install.test.ts +40 -0
  26. package/src/mcp-tools.ts +17 -1
  27. package/src/module-config.ts +70 -14
  28. package/src/module-manifest.test.ts +114 -0
  29. package/src/module-manifest.ts +104 -0
  30. package/src/oauth-discovery.ts +95 -0
  31. package/src/owner-auth.ts +22 -149
  32. package/src/routes.ts +268 -51
  33. package/src/routing.test.ts +102 -99
  34. package/src/routing.ts +33 -47
  35. package/src/scribe-discovery.test.ts +77 -0
  36. package/src/scribe-discovery.ts +91 -0
  37. package/src/scribe-env.test.ts +66 -1
  38. package/src/scribe-env.ts +42 -1
  39. package/src/self-register.test.ts +412 -0
  40. package/src/self-register.ts +247 -0
  41. package/src/server.ts +47 -23
  42. package/src/transcript-note.test.ts +171 -0
  43. package/src/transcript-note.ts +189 -0
  44. package/src/transcription-registry.ts +22 -0
  45. package/src/transcription-worker.test.ts +250 -0
  46. package/src/transcription-worker.ts +186 -27
  47. package/src/vault-name.ts +3 -2
  48. package/src/vault.test.ts +347 -0
  49. package/web/ui/dist/assets/index-BOa-JJtV.css +1 -0
  50. package/web/ui/dist/assets/index-BzA5LgE3.js +60 -0
  51. package/web/ui/dist/index.html +14 -0
  52. package/web/ui/tsconfig.json +21 -0
  53. package/src/oauth.test.ts +0 -2156
  54. package/src/oauth.ts +0 -973
@@ -0,0 +1,272 @@
1
+ /**
2
+ * Opaque cursors for `query-notes` (vault#313).
3
+ *
4
+ * Agent loops want "give me notes I haven't seen since last call." Today's
5
+ * pattern — pass `dateFilter: { field: "updated_at", from: <iso> }` and
6
+ * track the timestamp client-side — is brittle: the client has to remember
7
+ * the watermark, two notes at the same millisecond may collide, and a
8
+ * second call landed mid-millisecond can miss or double-count rows.
9
+ *
10
+ * The opaque-cursor pattern (Stripe, GitHub, et al.) fixes this. The server
11
+ * returns a `next_cursor: string` on each query response; the client passes
12
+ * it back on the next call and the server resumes from exactly where it
13
+ * left off. The cursor is base64url-encoded JSON the client must not
14
+ * inspect — internal layout can evolve without breaking callers.
15
+ *
16
+ * # Cursor payload
17
+ *
18
+ * ```ts
19
+ * {
20
+ * v: 1, // schema version
21
+ * last_updated_at: number, // millisecond epoch of the last seen note
22
+ * last_id: string, // ID of the last seen note — tiebreaker
23
+ * query_hash: string, // sha256 of normalized query params (hex)
24
+ * }
25
+ * ```
26
+ *
27
+ * - `last_updated_at` is millisecond epoch (not ISO) so cursor bytes stay
28
+ * compact and the tiebreaker math is integer.
29
+ * - `last_id` is the tiebreaker — when two notes share `updated_at`, the
30
+ * keyset query advances `id > last_id` at that timestamp so neither is
31
+ * skipped nor returned twice.
32
+ * - `query_hash` binds the cursor to the exact query it was minted for.
33
+ * Passing a cursor minted on `tag: "foo"` into a call for `tag: "bar"`
34
+ * would silently return the wrong page; mismatch raises a structured
35
+ * 400 (`cursor_query_mismatch`) instead.
36
+ *
37
+ * # Why JSON inside base64url
38
+ *
39
+ * A flat-string format (`<ts>:<id>:<hash>`) is two characters shorter but
40
+ * forecloses on optional fields. JSON gives us a schema-versioned envelope
41
+ * — if v2 needs additional state (e.g. a search-relevance secondary key),
42
+ * old clients keep working and new clients can read both.
43
+ *
44
+ * # Race safety
45
+ *
46
+ * The cursor stores the maximum-`updated_at`+`id` of the LAST returned
47
+ * page. The next call's keyset predicate is:
48
+ *
49
+ * (updated_at > last_updated_at)
50
+ * OR (updated_at = last_updated_at AND id > last_id)
51
+ *
52
+ * A note written between calls A and B at a brand-new `updated_at` is
53
+ * picked up by the first half of the predicate. A note written at the
54
+ * exact same `updated_at` as the cursor's watermark (uncommon — wall-clock
55
+ * collisions are rare at millisecond resolution but not impossible) is
56
+ * picked up by the tiebreaker because the SQL `ORDER BY updated_at ASC,
57
+ * id ASC` ensures stable interleaving with the prior page. Without the
58
+ * tiebreaker, two notes sharing an `updated_at` would be at the mercy of
59
+ * SQLite's row order, which is "stable in practice" but not contract.
60
+ */
61
+
62
+ import { createHash } from "node:crypto";
63
+
64
+ export const CURSOR_VERSION = 1;
65
+
66
+ export interface CursorPayload {
67
+ /** Schema version. Bumped if the cursor layout changes incompatibly. */
68
+ v: number;
69
+ /** Millisecond epoch of the last note returned. */
70
+ last_updated_at: number;
71
+ /** ID of the last note returned — tiebreaker for same-ms collisions. */
72
+ last_id: string;
73
+ /** sha256(hex) of normalized query params. Mismatch → cursor_query_mismatch. */
74
+ query_hash: string;
75
+ }
76
+
77
+ /**
78
+ * Thrown when a caller passes a malformed or stale cursor. The wrapping
79
+ * layer (MCP / REST) catches and surfaces a 400 with the structured code
80
+ * — callers should drop the cursor and restart the iteration.
81
+ */
82
+ export class CursorError extends Error {
83
+ override name = "CursorError";
84
+ code: "cursor_invalid" | "cursor_query_mismatch";
85
+ constructor(message: string, code: "cursor_invalid" | "cursor_query_mismatch") {
86
+ super(message);
87
+ this.code = code;
88
+ }
89
+ }
90
+
91
+ /** Encode a cursor payload to a base64url-safe opaque string. */
92
+ export function encodeCursor(payload: CursorPayload): string {
93
+ const json = JSON.stringify(payload);
94
+ return Buffer.from(json, "utf8").toString("base64url");
95
+ }
96
+
97
+ /** Decode a cursor string. Throws `CursorError` on any structural problem. */
98
+ export function decodeCursor(cursor: string): CursorPayload {
99
+ if (typeof cursor !== "string" || cursor.length === 0) {
100
+ throw new CursorError("cursor must be a non-empty string", "cursor_invalid");
101
+ }
102
+ let json: string;
103
+ try {
104
+ json = Buffer.from(cursor, "base64url").toString("utf8");
105
+ } catch {
106
+ throw new CursorError("cursor is not valid base64url", "cursor_invalid");
107
+ }
108
+ let parsed: unknown;
109
+ try {
110
+ parsed = JSON.parse(json);
111
+ } catch {
112
+ throw new CursorError("cursor payload is not valid JSON", "cursor_invalid");
113
+ }
114
+ if (!parsed || typeof parsed !== "object") {
115
+ throw new CursorError("cursor payload must be an object", "cursor_invalid");
116
+ }
117
+ const p = parsed as Record<string, unknown>;
118
+ if (typeof p.v !== "number" || p.v !== CURSOR_VERSION) {
119
+ throw new CursorError(
120
+ `cursor schema version mismatch (expected ${CURSOR_VERSION}, got ${String(p.v)})`,
121
+ "cursor_invalid",
122
+ );
123
+ }
124
+ if (typeof p.last_updated_at !== "number" || !Number.isFinite(p.last_updated_at)) {
125
+ throw new CursorError("cursor.last_updated_at must be a finite number", "cursor_invalid");
126
+ }
127
+ if (typeof p.last_id !== "string") {
128
+ throw new CursorError("cursor.last_id must be a string", "cursor_invalid");
129
+ }
130
+ if (typeof p.query_hash !== "string" || p.query_hash.length === 0) {
131
+ throw new CursorError("cursor.query_hash must be a non-empty string", "cursor_invalid");
132
+ }
133
+ return {
134
+ v: p.v,
135
+ last_updated_at: p.last_updated_at,
136
+ last_id: p.last_id,
137
+ query_hash: p.query_hash,
138
+ };
139
+ }
140
+
141
+ /**
142
+ * Shape of query parameters that participate in the query-hash.
143
+ *
144
+ * Pagination / cursor parameters themselves are excluded — bumping `limit`
145
+ * or advancing the cursor must NOT invalidate the cursor. Output-shape
146
+ * parameters (`include_content`, etc.) are also excluded — they don't
147
+ * affect *which* rows are returned, just how each row is rendered.
148
+ *
149
+ * The fields here are the *result-set-affecting* inputs. Any future filter
150
+ * added to `QueryOpts` should also be added here.
151
+ */
152
+ export interface QueryHashInputs {
153
+ tags?: string[];
154
+ tagMatch?: "all" | "any";
155
+ excludeTags?: string[];
156
+ hasTags?: boolean;
157
+ hasLinks?: boolean;
158
+ path?: string;
159
+ pathPrefix?: string;
160
+ extension?: string | string[];
161
+ ids?: string[];
162
+ metadata?: Record<string, unknown>;
163
+ dateFrom?: string;
164
+ dateTo?: string;
165
+ dateFilter?: { field?: string; from?: string; to?: string };
166
+ sort?: "asc" | "desc";
167
+ orderBy?: string;
168
+ }
169
+
170
+ /**
171
+ * Compute a stable hash of the query parameters.
172
+ *
173
+ * Stability matters: a caller that passes `{tag: "x", path_prefix: "p"}`
174
+ * on call 1 and `{path_prefix: "p", tag: "x"}` on call 2 (same query,
175
+ * different object-key order) must get the same hash. We achieve this
176
+ * by canonicalizing — sorting array fields (where order is irrelevant),
177
+ * recursively sorting object keys, and stringifying with a deterministic
178
+ * key order.
179
+ *
180
+ * `undefined` fields are dropped before hashing. An empty `tags: []` and
181
+ * an unset `tags` produce the same hash (both mean "no tag filter"), so
182
+ * a caller that conditionally sets it doesn't accidentally invalidate
183
+ * their cursor.
184
+ *
185
+ * Returned as a hex sha256 digest — 64 chars, fits comfortably in the
186
+ * base64url cursor envelope.
187
+ */
188
+ export function computeQueryHash(inputs: QueryHashInputs): string {
189
+ const canonical = canonicalize(inputs);
190
+ const json = JSON.stringify(canonical);
191
+ return createHash("sha256").update(json, "utf8").digest("hex");
192
+ }
193
+
194
+ /**
195
+ * Canonicalize a value for stable hashing.
196
+ *
197
+ * - Drops `undefined` properties (object keys with `undefined` values).
198
+ * - Drops empty arrays at the top level (treated equivalent to unset).
199
+ * - Sorts string-array fields where order doesn't affect query semantics
200
+ * (`tags`, `excludeTags`, `ids`, `extension` when array-shaped).
201
+ * - Recursively sorts plain-object keys so JSON.stringify is order-stable.
202
+ * - Primitives and arrays of primitives pass through unchanged (after the
203
+ * array-sort rule above).
204
+ *
205
+ * Inside `metadata`, sub-object keys (operator-clause shapes like
206
+ * `{eq, gte, lt}`) are sorted too — the engine treats `{gte: 5, lt: 10}`
207
+ * and `{lt: 10, gte: 5}` identically, so the cursor binding should as well.
208
+ */
209
+ function canonicalize(value: unknown): unknown {
210
+ if (value === null || value === undefined) return null;
211
+ if (typeof value !== "object") return value;
212
+ if (Array.isArray(value)) {
213
+ // Don't sort arbitrary arrays — order may be semantic (e.g. an `in`
214
+ // operator's array value is order-irrelevant to SQLite, but cursor
215
+ // semantics defer to the caller). For the known order-irrelevant
216
+ // string-array fields we sort at the top-level canonicalization;
217
+ // deep arrays pass through unchanged so a caller's intent is preserved.
218
+ return (value as unknown[]).map((v) => canonicalize(v));
219
+ }
220
+ // Plain object. Sort keys, drop undefineds, sort known order-irrelevant
221
+ // string-array fields.
222
+ const ORDER_IRRELEVANT_STRING_ARRAYS = new Set([
223
+ "tags",
224
+ "excludeTags",
225
+ "ids",
226
+ "extension",
227
+ ]);
228
+ const out: Record<string, unknown> = {};
229
+ const keys = Object.keys(value as object).sort();
230
+ for (const k of keys) {
231
+ const v = (value as Record<string, unknown>)[k];
232
+ if (v === undefined) continue;
233
+ if (Array.isArray(v) && v.length === 0) continue;
234
+ if (ORDER_IRRELEVANT_STRING_ARRAYS.has(k) && Array.isArray(v) && v.every((x) => typeof x === "string")) {
235
+ out[k] = [...(v as string[])].sort();
236
+ continue;
237
+ }
238
+ out[k] = canonicalize(v);
239
+ }
240
+ return out;
241
+ }
242
+
243
+ /**
244
+ * Parse an ISO-8601 timestamp to millisecond epoch.
245
+ *
246
+ * SQLite stores `updated_at` as a string ISO timestamp (set on insert /
247
+ * update by the store layer). The cursor pipes that string out as a
248
+ * millisecond integer for compact serialization. This helper exists so
249
+ * the call sites (mint-cursor + decode-cursor-into-SQL-predicate) share
250
+ * exactly one conversion, with NaN guarded.
251
+ */
252
+ export function isoToMillis(iso: string): number {
253
+ const ms = Date.parse(iso);
254
+ if (!Number.isFinite(ms)) {
255
+ throw new CursorError(`invalid ISO timestamp for cursor: ${iso}`, "cursor_invalid");
256
+ }
257
+ return ms;
258
+ }
259
+
260
+ /**
261
+ * Convert millisecond epoch back to an ISO-8601 timestamp string.
262
+ *
263
+ * Used to translate the cursor's `last_updated_at` into the form SQLite
264
+ * compares (`n.updated_at` is a TEXT column carrying ISO strings). ISO
265
+ * timestamps sort correctly lexicographically when they're all in the same
266
+ * canonical form (Z-suffixed, fixed millisecond precision) — every
267
+ * timestamp vault mints goes through `new Date(...).toISOString()` so the
268
+ * lex-order matches the millis-order.
269
+ */
270
+ export function millisToIso(ms: number): string {
271
+ return new Date(ms).toISOString();
272
+ }
package/core/src/mcp.ts CHANGED
@@ -2,6 +2,7 @@ import { Database } from "bun:sqlite";
2
2
  import type { Store, Note } from "./types.js";
3
3
  import * as noteOps from "./notes.js";
4
4
  import { filterMetadata, MAX_BATCH_SIZE, validateExtension, ExtensionValidationError } from "./notes.js";
5
+ import { QueryError } from "./query-operators.js";
5
6
  import * as linkOps from "./links.js";
6
7
  import * as tagSchemaOps from "./tag-schemas.js";
7
8
  import type { TagFieldSchema } from "./tag-schemas.js";
@@ -189,6 +190,11 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
189
190
  sort: { type: "string", enum: ["asc", "desc"], description: "Sort by created_at" },
190
191
  limit: { type: "number", description: "Max results (default 50)" },
191
192
  offset: { type: "number", description: "Pagination offset (default 0)" },
193
+ cursor: {
194
+ type: "string",
195
+ description:
196
+ "Opaque cursor for 'since last checked' agent loops (vault#313). First call: omit. The response will include `next_cursor` — pass it on the subsequent call to receive only notes created or updated since the prior page. The cursor binds to the query's filters (tag, path, metadata, etc.); changing them between calls returns a structured `cursor_query_mismatch` error. Pagination via cursor orders results by `updated_at ASC` and is mutually exclusive with `order_by` and `sort: \"desc\"`. The response shape switches to `{notes, next_cursor}` when this parameter is present.",
197
+ },
192
198
  include_content: { type: "boolean", description: "Include note content (default: true for single, false for list)" },
193
199
  include_metadata: {
194
200
  oneOf: [
@@ -254,8 +260,32 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
254
260
  nearScope = new Set([anchor.id, ...traversed.map((t) => t.noteId)]);
255
261
  }
256
262
 
263
+ // --- Cursor mode (vault#313) ---
264
+ // When the caller passes `cursor`, the response shape switches to
265
+ // `{notes, next_cursor}` and `queryNotesPaged` handles the keyset
266
+ // pagination. Cursor mode is incompatible with full-text search
267
+ // (FTS owns its own ordering — relevance, not updated_at) and
268
+ // graph-neighborhood scoping (`near` would have to rebuild the
269
+ // neighborhood every call to be cursor-stable; we punt for now).
270
+ // Both surface as INVALID_QUERY rather than silently returning
271
+ // wrong rows.
272
+ const cursorMode = typeof params.cursor === "string" && params.cursor.length > 0;
273
+ if (cursorMode && params.search) {
274
+ throw new QueryError(
275
+ `cursor is incompatible with full-text search — FTS has its own ordering. Use date_filter on updated_at for since-last-checked search.`,
276
+ "INVALID_QUERY",
277
+ );
278
+ }
279
+ if (cursorMode && params.near) {
280
+ throw new QueryError(
281
+ `cursor is incompatible with near (graph neighborhood). Resolve the neighborhood first, then iterate with cursor + ids.`,
282
+ "INVALID_QUERY",
283
+ );
284
+ }
285
+
257
286
  // --- Full-text search ---
258
287
  let results: Note[];
288
+ let nextCursor: string | null = null;
259
289
  if (params.search) {
260
290
  // Normalize tag param
261
291
  const tags = normalizeTags(params.tag);
@@ -277,12 +307,13 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
277
307
  // unknown keys silently; aliasing here closes the silent-no-op gap.
278
308
  const excludeTagsRaw = params.exclude_tags ?? params.excludeTags ?? params.exclude_tag;
279
309
  const excludeTags = normalizeTags(excludeTagsRaw);
280
- // Route through `store.queryNotes` (not `noteOps.queryNotes`) so
281
- // tag-hierarchy expansion fires for MCP callers the same as for
282
- // HTTP REST callers — `tag: "manual"` matches descendants declared
283
- // via `_tags/*` config notes. The previous direct-noteOps call
284
- // bypassed the wrapper and silently dropped hierarchy expansion.
285
- results = await store.queryNotes({
310
+ // Route through `store.queryNotes`/`queryNotesPaged` (not the raw
311
+ // `noteOps` exports) so tag-hierarchy expansion fires for MCP
312
+ // callers the same as for HTTP REST callers — `tag: "manual"`
313
+ // matches descendants declared via `_tags/*` config notes. The
314
+ // previous direct-noteOps call bypassed the wrapper and silently
315
+ // dropped hierarchy expansion.
316
+ const queryOpts = {
286
317
  tags,
287
318
  tagMatch: (params.tag_match as "all" | "any") ?? (tags && tags.length > 1 ? "any" : undefined),
288
319
  excludeTags,
@@ -307,7 +338,15 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
307
338
  orderBy: params.order_by as string | undefined,
308
339
  limit: (params.limit as number) ?? 50,
309
340
  offset: params.offset as number | undefined,
310
- });
341
+ cursor: cursorMode ? (params.cursor as string) : undefined,
342
+ };
343
+ if (cursorMode) {
344
+ const page = await store.queryNotesPaged(queryOpts);
345
+ results = page.notes;
346
+ nextCursor = page.next_cursor;
347
+ } else {
348
+ results = await store.queryNotes(queryOpts);
349
+ }
311
350
  }
312
351
 
313
352
  // For full-text search the post-filter is still the right shape — FTS
@@ -347,9 +386,14 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
347
386
  if (params.include_attachments) enriched.attachments = await store.getAttachments(n.id);
348
387
  enrichedOut.push(enriched);
349
388
  }
389
+ // Cursor mode wraps the list in `{notes, next_cursor}` so callers can
390
+ // chain calls without tracking a watermark client-side. Legacy
391
+ // callers (no `cursor` param) still get the flat array.
392
+ if (cursorMode) return { notes: enrichedOut, next_cursor: nextCursor };
350
393
  return enrichedOut;
351
394
  }
352
395
 
396
+ if (cursorMode) return { notes: output, next_cursor: nextCursor };
353
397
  return output;
354
398
  },
355
399
  },
package/core/src/notes.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { Database, type SQLQueryBindings } from "bun:sqlite";
2
- import type { Note, NoteIndex, QueryOpts, VaultStats } from "./types.js";
2
+ import type { Note, NoteIndex, QueryOpts, QueryNotesPage, VaultStats } from "./types.js";
3
3
  import { normalizePath } from "./paths.js";
4
4
  import {
5
5
  buildOperatorClause,
@@ -7,6 +7,17 @@ import {
7
7
  QueryError,
8
8
  requireIndexedField,
9
9
  } from "./query-operators.js";
10
+ import {
11
+ CURSOR_VERSION,
12
+ CursorError,
13
+ computeQueryHash,
14
+ decodeCursor,
15
+ encodeCursor,
16
+ isoToMillis,
17
+ millisToIso,
18
+ type CursorPayload,
19
+ type QueryHashInputs,
20
+ } from "./cursor.js";
10
21
 
11
22
  let idCounter = 0;
12
23
 
@@ -663,9 +674,68 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
663
674
  }
664
675
  }
665
676
 
677
+ // ---- Cursor predicate (vault#313) ----
678
+ //
679
+ // When a cursor is present, decode it, verify its query_hash matches the
680
+ // current query, and add a keyset predicate of the form:
681
+ //
682
+ // (updated_at > last_updated_at)
683
+ // OR (updated_at = last_updated_at AND id > last_id)
684
+ //
685
+ // The cursor also forces ORDER BY n.updated_at ASC, n.id ASC so the
686
+ // watermark math is sound — paginating by updated_at while ordering
687
+ // by created_at would skip rows whose update timestamp differs from
688
+ // their creation timestamp. `orderBy` and `sort: "desc"` are mutually
689
+ // exclusive with cursor mode (a "since last checked" loop wants
690
+ // ascending updated_at, full stop); we reject with INVALID_QUERY so
691
+ // callers don't silently get a broken iteration.
692
+ let cursorPayload: CursorPayload | null = null;
693
+ if (opts.cursor) {
694
+ if (opts.orderBy) {
695
+ throw new QueryError(
696
+ `cursor and order_by are mutually exclusive — cursor pagination forces order by updated_at`,
697
+ "INVALID_QUERY",
698
+ );
699
+ }
700
+ if (opts.sort === "desc") {
701
+ throw new QueryError(
702
+ `cursor pagination requires ascending sort by updated_at — descending sort with a cursor would skip newly-written rows`,
703
+ "INVALID_QUERY",
704
+ );
705
+ }
706
+ cursorPayload = decodeCursor(opts.cursor);
707
+ const expectedHash = computeQueryHash(toQueryHashInputs(opts));
708
+ if (cursorPayload.query_hash !== expectedHash) {
709
+ throw new CursorError(
710
+ `cursor was minted for a different query — drop the cursor and restart iteration`,
711
+ "cursor_query_mismatch",
712
+ );
713
+ }
714
+ // Translate the millis watermark back to an ISO string for the SQL
715
+ // comparison. SQLite's `n.updated_at` is TEXT in canonical ISO form
716
+ // (the store's `toISOString()` output), and ISO timestamps sort
717
+ // lexicographically in the same order as their millisecond epochs
718
+ // when they all use the same canonical form — which every timestamp
719
+ // vault mints does. Cursors minted on heterogeneous timestamps
720
+ // (e.g. an import that preserved unusual formatting) are still
721
+ // safe: we round-trip the cursor's millis through `new Date()`'s
722
+ // canonical ISO so the comparison is apples-to-apples.
723
+ const cursorIso = millisToIso(cursorPayload.last_updated_at);
724
+ conditions.push(
725
+ "(n.updated_at > ? OR (n.updated_at = ? AND n.id > ?))",
726
+ );
727
+ params.push(cursorIso, cursorIso, cursorPayload.last_id);
728
+ }
729
+
666
730
  const direction = opts.sort === "desc" ? "DESC" : "ASC";
667
731
  let orderBy: string;
668
- if (opts.orderBy) {
732
+ if (opts.cursor) {
733
+ // Cursor mode forces a deterministic keyset order. `id` is the
734
+ // tiebreaker — without it, two notes sharing an `updated_at` would
735
+ // be at the mercy of SQLite's row order and the next page could
736
+ // miss or duplicate one.
737
+ orderBy = "n.updated_at ASC, n.id ASC";
738
+ } else if (opts.orderBy) {
669
739
  requireIndexedField(db, opts.orderBy);
670
740
  // `orderBy` came from indexed_fields (validated on declaration), so
671
741
  // the column name is safe to interpolate. Append created_at as a
@@ -697,6 +767,98 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
697
767
  });
698
768
  }
699
769
 
770
+ /**
771
+ * Extract the result-set-affecting subset of `QueryOpts` for cursor hashing.
772
+ *
773
+ * `cursor`, `limit`, `offset`, `_tagsExpanded` (internal cache key) are
774
+ * excluded — they don't change which rows match, just how many or how
775
+ * the iteration advances. See `core/src/cursor.ts` for the rationale.
776
+ */
777
+ function toQueryHashInputs(opts: QueryOpts): QueryHashInputs {
778
+ return {
779
+ tags: opts.tags,
780
+ tagMatch: opts.tagMatch,
781
+ excludeTags: opts.excludeTags,
782
+ hasTags: opts.hasTags,
783
+ hasLinks: opts.hasLinks,
784
+ path: opts.path,
785
+ pathPrefix: opts.pathPrefix,
786
+ extension: opts.extension,
787
+ ids: opts.ids,
788
+ metadata: opts.metadata,
789
+ dateFrom: opts.dateFrom,
790
+ dateTo: opts.dateTo,
791
+ dateFilter: opts.dateFilter,
792
+ sort: opts.sort,
793
+ orderBy: opts.orderBy,
794
+ };
795
+ }
796
+
797
+ /**
798
+ * Cursor-paginated wrapper around `queryNotes` (vault#313).
799
+ *
800
+ * Always returns `{ notes, next_cursor }`. `next_cursor` advances even on
801
+ * an empty result page — the caller can persist a single watermark and
802
+ * keep polling without special-casing the empty-page condition. The
803
+ * empty-page cursor's `last_updated_at` is the larger of:
804
+ * - the prior cursor's `last_updated_at` (when `opts.cursor` was set), or
805
+ * - the prior cursor's `last_updated_at` (defaults to 0 when not).
806
+ *
807
+ * Holding the watermark at the prior value on an empty page is the
808
+ * conservative choice: if a note is written between this call and the
809
+ * next at a timestamp BEFORE wall-clock-now (clock skew, batch import
810
+ * with explicit `created_at`), advancing the watermark to `now()` would
811
+ * skip it. The watermark advances only when actual rows are returned.
812
+ *
813
+ * First-call semantics (`opts.cursor` absent): query_hash is computed
814
+ * from the result-set-affecting opts and bound into the minted cursor.
815
+ * If zero rows match, the returned cursor encodes
816
+ * `last_updated_at = 0, last_id = ""` so the next call returns
817
+ * everything written since (the keyset predicate
818
+ * `updated_at > 0 OR (updated_at = 0 AND id > "")` matches every row
819
+ * with a non-null `updated_at` greater than the unix epoch).
820
+ */
821
+ export function queryNotesPaged(db: Database, opts: QueryOpts): QueryNotesPage {
822
+ const notes = queryNotes(db, opts);
823
+ const queryHash = computeQueryHash(toQueryHashInputs(opts));
824
+
825
+ // Watermark math: pick the larger of (last returned row, prior cursor
826
+ // watermark, sentinel). When the page is empty, fall back to the prior
827
+ // cursor's watermark — see the JSDoc rationale above.
828
+ let lastUpdatedAt = 0;
829
+ let lastId = "";
830
+ if (opts.cursor) {
831
+ // Re-decode (we already validated in queryNotes); this is cheap.
832
+ const prior = decodeCursor(opts.cursor);
833
+ lastUpdatedAt = prior.last_updated_at;
834
+ lastId = prior.last_id;
835
+ }
836
+ if (notes.length > 0) {
837
+ // queryNotes with a cursor orders by (updated_at ASC, id ASC), so
838
+ // the last note in the array is the new watermark. When no cursor
839
+ // was passed, the SQL is ordered by created_at; we still want the
840
+ // cursor to advance to the MAX (updated_at, id) of this page so
841
+ // the next call resumes correctly. Compute the max explicitly.
842
+ for (const note of notes) {
843
+ const updatedIso = note.updatedAt ?? note.createdAt;
844
+ const ms = isoToMillis(updatedIso);
845
+ if (ms > lastUpdatedAt || (ms === lastUpdatedAt && note.id > lastId)) {
846
+ lastUpdatedAt = ms;
847
+ lastId = note.id;
848
+ }
849
+ }
850
+ }
851
+
852
+ const next_cursor = encodeCursor({
853
+ v: CURSOR_VERSION,
854
+ last_updated_at: lastUpdatedAt,
855
+ last_id: lastId,
856
+ query_hash: queryHash,
857
+ });
858
+
859
+ return { notes, next_cursor };
860
+ }
861
+
700
862
  export function searchNotes(
701
863
  db: Database,
702
864
  query: string,