@openparachute/vault 0.4.7-rc.2 → 0.4.8-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.parachute/module.json +1 -1
- package/README.md +78 -41
- package/core/src/connection-pragmas.test.ts +232 -0
- package/core/src/core.test.ts +257 -0
- package/core/src/cursor.test.ts +160 -0
- package/core/src/cursor.ts +272 -0
- package/core/src/mcp.ts +51 -7
- package/core/src/notes.ts +164 -2
- package/core/src/schema.ts +106 -5
- package/core/src/store.ts +11 -1
- package/core/src/types.ts +32 -0
- package/package.json +7 -3
- package/src/auth-status.ts +4 -0
- package/src/auth.test.ts +5 -112
- package/src/auto-transcribe.test.ts +116 -0
- package/src/auto-transcribe.ts +48 -0
- package/src/backup.ts +17 -3
- package/src/cli.ts +95 -66
- package/src/config.test.ts +26 -0
- package/src/config.ts +53 -1
- package/src/db.ts +15 -2
- package/src/export-watch.test.ts +21 -0
- package/src/mcp-install-interactive.test.ts +23 -2
- package/src/mcp-install-interactive.ts +21 -2
- package/src/mcp-install.test.ts +40 -0
- package/src/mcp-tools.ts +17 -1
- package/src/module-config.ts +70 -14
- package/src/module-manifest.test.ts +114 -0
- package/src/module-manifest.ts +104 -0
- package/src/oauth-discovery.ts +95 -0
- package/src/owner-auth.ts +22 -149
- package/src/routes.ts +268 -51
- package/src/routing.test.ts +102 -99
- package/src/routing.ts +33 -47
- package/src/scribe-discovery.test.ts +77 -0
- package/src/scribe-discovery.ts +91 -0
- package/src/scribe-env.test.ts +66 -1
- package/src/scribe-env.ts +42 -1
- package/src/self-register.test.ts +412 -0
- package/src/self-register.ts +247 -0
- package/src/server.ts +47 -23
- package/src/transcript-note.test.ts +171 -0
- package/src/transcript-note.ts +189 -0
- package/src/transcription-registry.ts +22 -0
- package/src/transcription-worker.test.ts +250 -0
- package/src/transcription-worker.ts +186 -27
- package/src/vault-name.ts +3 -2
- package/src/vault.test.ts +347 -0
- package/web/ui/dist/assets/index-BOa-JJtV.css +1 -0
- package/web/ui/dist/assets/index-BzA5LgE3.js +60 -0
- package/web/ui/dist/index.html +14 -0
- package/web/ui/tsconfig.json +21 -0
- package/src/oauth.test.ts +0 -2156
- package/src/oauth.ts +0 -973
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Opaque cursors for `query-notes` (vault#313).
|
|
3
|
+
*
|
|
4
|
+
* Agent loops want "give me notes I haven't seen since last call." Today's
|
|
5
|
+
* pattern — pass `dateFilter: { field: "updated_at", from: <iso> }` and
|
|
6
|
+
* track the timestamp client-side — is brittle: the client has to remember
|
|
7
|
+
* the watermark, two notes at the same millisecond may collide, and a
|
|
8
|
+
* second call landed mid-millisecond can miss or double-count rows.
|
|
9
|
+
*
|
|
10
|
+
* The opaque-cursor pattern (Stripe, GitHub, et al.) fixes this. The server
|
|
11
|
+
* returns a `next_cursor: string` on each query response; the client passes
|
|
12
|
+
* it back on the next call and the server resumes from exactly where it
|
|
13
|
+
* left off. The cursor is base64url-encoded JSON the client must not
|
|
14
|
+
* inspect — internal layout can evolve without breaking callers.
|
|
15
|
+
*
|
|
16
|
+
* # Cursor payload
|
|
17
|
+
*
|
|
18
|
+
* ```ts
|
|
19
|
+
* {
|
|
20
|
+
* v: 1, // schema version
|
|
21
|
+
* last_updated_at: number, // millisecond epoch of the last seen note
|
|
22
|
+
* last_id: string, // ID of the last seen note — tiebreaker
|
|
23
|
+
* query_hash: string, // sha256 of normalized query params (hex)
|
|
24
|
+
* }
|
|
25
|
+
* ```
|
|
26
|
+
*
|
|
27
|
+
* - `last_updated_at` is millisecond epoch (not ISO) so cursor bytes stay
|
|
28
|
+
* compact and the tiebreaker math is integer.
|
|
29
|
+
* - `last_id` is the tiebreaker — when two notes share `updated_at`, the
|
|
30
|
+
* keyset query advances `id > last_id` at that timestamp so neither is
|
|
31
|
+
* skipped nor returned twice.
|
|
32
|
+
* - `query_hash` binds the cursor to the exact query it was minted for.
|
|
33
|
+
* Passing a cursor minted on `tag: "foo"` into a call for `tag: "bar"`
|
|
34
|
+
* would silently return the wrong page; mismatch raises a structured
|
|
35
|
+
* 400 (`cursor_query_mismatch`) instead.
|
|
36
|
+
*
|
|
37
|
+
* # Why JSON inside base64url
|
|
38
|
+
*
|
|
39
|
+
* A flat-string format (`<ts>:<id>:<hash>`) is two characters shorter but
|
|
40
|
+
* forecloses on optional fields. JSON gives us a schema-versioned envelope
|
|
41
|
+
* — if v2 needs additional state (e.g. a search-relevance secondary key),
|
|
42
|
+
* old clients keep working and new clients can read both.
|
|
43
|
+
*
|
|
44
|
+
* # Race safety
|
|
45
|
+
*
|
|
46
|
+
* The cursor stores the maximum-`updated_at`+`id` of the LAST returned
|
|
47
|
+
* page. The next call's keyset predicate is:
|
|
48
|
+
*
|
|
49
|
+
* (updated_at > last_updated_at)
|
|
50
|
+
* OR (updated_at = last_updated_at AND id > last_id)
|
|
51
|
+
*
|
|
52
|
+
* A note written between calls A and B at a brand-new `updated_at` is
|
|
53
|
+
* picked up by the first half of the predicate. A note written at the
|
|
54
|
+
* exact same `updated_at` as the cursor's watermark (uncommon — wall-clock
|
|
55
|
+
* collisions are rare at millisecond resolution but not impossible) is
|
|
56
|
+
* picked up by the tiebreaker because the SQL `ORDER BY updated_at ASC,
|
|
57
|
+
* id ASC` ensures stable interleaving with the prior page. Without the
|
|
58
|
+
* tiebreaker, two notes sharing an `updated_at` would be at the mercy of
|
|
59
|
+
* SQLite's row order, which is "stable in practice" but not contract.
|
|
60
|
+
*/
|
|
61
|
+
|
|
62
|
+
import { createHash } from "node:crypto";
|
|
63
|
+
|
|
64
|
+
export const CURSOR_VERSION = 1;
|
|
65
|
+
|
|
66
|
+
export interface CursorPayload {
|
|
67
|
+
/** Schema version. Bumped if the cursor layout changes incompatibly. */
|
|
68
|
+
v: number;
|
|
69
|
+
/** Millisecond epoch of the last note returned. */
|
|
70
|
+
last_updated_at: number;
|
|
71
|
+
/** ID of the last note returned — tiebreaker for same-ms collisions. */
|
|
72
|
+
last_id: string;
|
|
73
|
+
/** sha256(hex) of normalized query params. Mismatch → cursor_query_mismatch. */
|
|
74
|
+
query_hash: string;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Thrown when a caller passes a malformed or stale cursor. The wrapping
|
|
79
|
+
* layer (MCP / REST) catches and surfaces a 400 with the structured code
|
|
80
|
+
* — callers should drop the cursor and restart the iteration.
|
|
81
|
+
*/
|
|
82
|
+
export class CursorError extends Error {
|
|
83
|
+
override name = "CursorError";
|
|
84
|
+
code: "cursor_invalid" | "cursor_query_mismatch";
|
|
85
|
+
constructor(message: string, code: "cursor_invalid" | "cursor_query_mismatch") {
|
|
86
|
+
super(message);
|
|
87
|
+
this.code = code;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/** Encode a cursor payload to a base64url-safe opaque string. */
|
|
92
|
+
export function encodeCursor(payload: CursorPayload): string {
|
|
93
|
+
const json = JSON.stringify(payload);
|
|
94
|
+
return Buffer.from(json, "utf8").toString("base64url");
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/** Decode a cursor string. Throws `CursorError` on any structural problem. */
|
|
98
|
+
export function decodeCursor(cursor: string): CursorPayload {
|
|
99
|
+
if (typeof cursor !== "string" || cursor.length === 0) {
|
|
100
|
+
throw new CursorError("cursor must be a non-empty string", "cursor_invalid");
|
|
101
|
+
}
|
|
102
|
+
let json: string;
|
|
103
|
+
try {
|
|
104
|
+
json = Buffer.from(cursor, "base64url").toString("utf8");
|
|
105
|
+
} catch {
|
|
106
|
+
throw new CursorError("cursor is not valid base64url", "cursor_invalid");
|
|
107
|
+
}
|
|
108
|
+
let parsed: unknown;
|
|
109
|
+
try {
|
|
110
|
+
parsed = JSON.parse(json);
|
|
111
|
+
} catch {
|
|
112
|
+
throw new CursorError("cursor payload is not valid JSON", "cursor_invalid");
|
|
113
|
+
}
|
|
114
|
+
if (!parsed || typeof parsed !== "object") {
|
|
115
|
+
throw new CursorError("cursor payload must be an object", "cursor_invalid");
|
|
116
|
+
}
|
|
117
|
+
const p = parsed as Record<string, unknown>;
|
|
118
|
+
if (typeof p.v !== "number" || p.v !== CURSOR_VERSION) {
|
|
119
|
+
throw new CursorError(
|
|
120
|
+
`cursor schema version mismatch (expected ${CURSOR_VERSION}, got ${String(p.v)})`,
|
|
121
|
+
"cursor_invalid",
|
|
122
|
+
);
|
|
123
|
+
}
|
|
124
|
+
if (typeof p.last_updated_at !== "number" || !Number.isFinite(p.last_updated_at)) {
|
|
125
|
+
throw new CursorError("cursor.last_updated_at must be a finite number", "cursor_invalid");
|
|
126
|
+
}
|
|
127
|
+
if (typeof p.last_id !== "string") {
|
|
128
|
+
throw new CursorError("cursor.last_id must be a string", "cursor_invalid");
|
|
129
|
+
}
|
|
130
|
+
if (typeof p.query_hash !== "string" || p.query_hash.length === 0) {
|
|
131
|
+
throw new CursorError("cursor.query_hash must be a non-empty string", "cursor_invalid");
|
|
132
|
+
}
|
|
133
|
+
return {
|
|
134
|
+
v: p.v,
|
|
135
|
+
last_updated_at: p.last_updated_at,
|
|
136
|
+
last_id: p.last_id,
|
|
137
|
+
query_hash: p.query_hash,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Shape of query parameters that participate in the query-hash.
|
|
143
|
+
*
|
|
144
|
+
* Pagination / cursor parameters themselves are excluded — bumping `limit`
|
|
145
|
+
* or advancing the cursor must NOT invalidate the cursor. Output-shape
|
|
146
|
+
* parameters (`include_content`, etc.) are also excluded — they don't
|
|
147
|
+
* affect *which* rows are returned, just how each row is rendered.
|
|
148
|
+
*
|
|
149
|
+
* The fields here are the *result-set-affecting* inputs. Any future filter
|
|
150
|
+
* added to `QueryOpts` should also be added here.
|
|
151
|
+
*/
|
|
152
|
+
export interface QueryHashInputs {
|
|
153
|
+
tags?: string[];
|
|
154
|
+
tagMatch?: "all" | "any";
|
|
155
|
+
excludeTags?: string[];
|
|
156
|
+
hasTags?: boolean;
|
|
157
|
+
hasLinks?: boolean;
|
|
158
|
+
path?: string;
|
|
159
|
+
pathPrefix?: string;
|
|
160
|
+
extension?: string | string[];
|
|
161
|
+
ids?: string[];
|
|
162
|
+
metadata?: Record<string, unknown>;
|
|
163
|
+
dateFrom?: string;
|
|
164
|
+
dateTo?: string;
|
|
165
|
+
dateFilter?: { field?: string; from?: string; to?: string };
|
|
166
|
+
sort?: "asc" | "desc";
|
|
167
|
+
orderBy?: string;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Compute a stable hash of the query parameters.
|
|
172
|
+
*
|
|
173
|
+
* Stability matters: a caller that passes `{tag: "x", path_prefix: "p"}`
|
|
174
|
+
* on call 1 and `{path_prefix: "p", tag: "x"}` on call 2 (same query,
|
|
175
|
+
* different object-key order) must get the same hash. We achieve this
|
|
176
|
+
* by canonicalizing — sorting array fields (where order is irrelevant),
|
|
177
|
+
* recursively sorting object keys, and stringifying with a deterministic
|
|
178
|
+
* key order.
|
|
179
|
+
*
|
|
180
|
+
* `undefined` fields are dropped before hashing. An empty `tags: []` and
|
|
181
|
+
* an unset `tags` produce the same hash (both mean "no tag filter"), so
|
|
182
|
+
* a caller that conditionally sets it doesn't accidentally invalidate
|
|
183
|
+
* their cursor.
|
|
184
|
+
*
|
|
185
|
+
* Returned as a hex sha256 digest — 64 chars, fits comfortably in the
|
|
186
|
+
* base64url cursor envelope.
|
|
187
|
+
*/
|
|
188
|
+
export function computeQueryHash(inputs: QueryHashInputs): string {
|
|
189
|
+
const canonical = canonicalize(inputs);
|
|
190
|
+
const json = JSON.stringify(canonical);
|
|
191
|
+
return createHash("sha256").update(json, "utf8").digest("hex");
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Canonicalize a value for stable hashing.
|
|
196
|
+
*
|
|
197
|
+
* - Drops `undefined` properties (object keys with `undefined` values).
|
|
198
|
+
* - Drops empty arrays at the top level (treated equivalent to unset).
|
|
199
|
+
* - Sorts string-array fields where order doesn't affect query semantics
|
|
200
|
+
* (`tags`, `excludeTags`, `ids`, `extension` when array-shaped).
|
|
201
|
+
* - Recursively sorts plain-object keys so JSON.stringify is order-stable.
|
|
202
|
+
* - Primitives and arrays of primitives pass through unchanged (after the
|
|
203
|
+
* array-sort rule above).
|
|
204
|
+
*
|
|
205
|
+
* Inside `metadata`, sub-object keys (operator-clause shapes like
|
|
206
|
+
* `{eq, gte, lt}`) are sorted too — the engine treats `{gte: 5, lt: 10}`
|
|
207
|
+
* and `{lt: 10, gte: 5}` identically, so the cursor binding should as well.
|
|
208
|
+
*/
|
|
209
|
+
function canonicalize(value: unknown): unknown {
|
|
210
|
+
if (value === null || value === undefined) return null;
|
|
211
|
+
if (typeof value !== "object") return value;
|
|
212
|
+
if (Array.isArray(value)) {
|
|
213
|
+
// Don't sort arbitrary arrays — order may be semantic (e.g. an `in`
|
|
214
|
+
// operator's array value is order-irrelevant to SQLite, but cursor
|
|
215
|
+
// semantics defer to the caller). For the known order-irrelevant
|
|
216
|
+
// string-array fields we sort at the top-level canonicalization;
|
|
217
|
+
// deep arrays pass through unchanged so a caller's intent is preserved.
|
|
218
|
+
return (value as unknown[]).map((v) => canonicalize(v));
|
|
219
|
+
}
|
|
220
|
+
// Plain object. Sort keys, drop undefineds, sort known order-irrelevant
|
|
221
|
+
// string-array fields.
|
|
222
|
+
const ORDER_IRRELEVANT_STRING_ARRAYS = new Set([
|
|
223
|
+
"tags",
|
|
224
|
+
"excludeTags",
|
|
225
|
+
"ids",
|
|
226
|
+
"extension",
|
|
227
|
+
]);
|
|
228
|
+
const out: Record<string, unknown> = {};
|
|
229
|
+
const keys = Object.keys(value as object).sort();
|
|
230
|
+
for (const k of keys) {
|
|
231
|
+
const v = (value as Record<string, unknown>)[k];
|
|
232
|
+
if (v === undefined) continue;
|
|
233
|
+
if (Array.isArray(v) && v.length === 0) continue;
|
|
234
|
+
if (ORDER_IRRELEVANT_STRING_ARRAYS.has(k) && Array.isArray(v) && v.every((x) => typeof x === "string")) {
|
|
235
|
+
out[k] = [...(v as string[])].sort();
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
out[k] = canonicalize(v);
|
|
239
|
+
}
|
|
240
|
+
return out;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Parse an ISO-8601 timestamp to millisecond epoch.
|
|
245
|
+
*
|
|
246
|
+
* SQLite stores `updated_at` as a string ISO timestamp (set on insert /
|
|
247
|
+
* update by the store layer). The cursor pipes that string out as a
|
|
248
|
+
* millisecond integer for compact serialization. This helper exists so
|
|
249
|
+
* the call sites (mint-cursor + decode-cursor-into-SQL-predicate) share
|
|
250
|
+
* exactly one conversion, with NaN guarded.
|
|
251
|
+
*/
|
|
252
|
+
export function isoToMillis(iso: string): number {
|
|
253
|
+
const ms = Date.parse(iso);
|
|
254
|
+
if (!Number.isFinite(ms)) {
|
|
255
|
+
throw new CursorError(`invalid ISO timestamp for cursor: ${iso}`, "cursor_invalid");
|
|
256
|
+
}
|
|
257
|
+
return ms;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Convert millisecond epoch back to an ISO-8601 timestamp string.
|
|
262
|
+
*
|
|
263
|
+
* Used to translate the cursor's `last_updated_at` into the form SQLite
|
|
264
|
+
* compares (`n.updated_at` is a TEXT column carrying ISO strings). ISO
|
|
265
|
+
* timestamps sort correctly lexicographically when they're all in the same
|
|
266
|
+
* canonical form (Z-suffixed, fixed millisecond precision) — every
|
|
267
|
+
* timestamp vault mints goes through `new Date(...).toISOString()` so the
|
|
268
|
+
* lex-order matches the millis-order.
|
|
269
|
+
*/
|
|
270
|
+
export function millisToIso(ms: number): string {
|
|
271
|
+
return new Date(ms).toISOString();
|
|
272
|
+
}
|
package/core/src/mcp.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { Database } from "bun:sqlite";
|
|
|
2
2
|
import type { Store, Note } from "./types.js";
|
|
3
3
|
import * as noteOps from "./notes.js";
|
|
4
4
|
import { filterMetadata, MAX_BATCH_SIZE, validateExtension, ExtensionValidationError } from "./notes.js";
|
|
5
|
+
import { QueryError } from "./query-operators.js";
|
|
5
6
|
import * as linkOps from "./links.js";
|
|
6
7
|
import * as tagSchemaOps from "./tag-schemas.js";
|
|
7
8
|
import type { TagFieldSchema } from "./tag-schemas.js";
|
|
@@ -189,6 +190,11 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
|
|
|
189
190
|
sort: { type: "string", enum: ["asc", "desc"], description: "Sort by created_at" },
|
|
190
191
|
limit: { type: "number", description: "Max results (default 50)" },
|
|
191
192
|
offset: { type: "number", description: "Pagination offset (default 0)" },
|
|
193
|
+
cursor: {
|
|
194
|
+
type: "string",
|
|
195
|
+
description:
|
|
196
|
+
"Opaque cursor for 'since last checked' agent loops (vault#313). First call: omit. The response will include `next_cursor` — pass it on the subsequent call to receive only notes created or updated since the prior page. The cursor binds to the query's filters (tag, path, metadata, etc.); changing them between calls returns a structured `cursor_query_mismatch` error. Pagination via cursor orders results by `updated_at ASC` and is mutually exclusive with `order_by` and `sort: \"desc\"`. The response shape switches to `{notes, next_cursor}` when this parameter is present.",
|
|
197
|
+
},
|
|
192
198
|
include_content: { type: "boolean", description: "Include note content (default: true for single, false for list)" },
|
|
193
199
|
include_metadata: {
|
|
194
200
|
oneOf: [
|
|
@@ -254,8 +260,32 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
|
|
|
254
260
|
nearScope = new Set([anchor.id, ...traversed.map((t) => t.noteId)]);
|
|
255
261
|
}
|
|
256
262
|
|
|
263
|
+
// --- Cursor mode (vault#313) ---
|
|
264
|
+
// When the caller passes `cursor`, the response shape switches to
|
|
265
|
+
// `{notes, next_cursor}` and `queryNotesPaged` handles the keyset
|
|
266
|
+
// pagination. Cursor mode is incompatible with full-text search
|
|
267
|
+
// (FTS owns its own ordering — relevance, not updated_at) and
|
|
268
|
+
// graph-neighborhood scoping (`near` would have to rebuild the
|
|
269
|
+
// neighborhood every call to be cursor-stable; we punt for now).
|
|
270
|
+
// Both surface as INVALID_QUERY rather than silently returning
|
|
271
|
+
// wrong rows.
|
|
272
|
+
const cursorMode = typeof params.cursor === "string" && params.cursor.length > 0;
|
|
273
|
+
if (cursorMode && params.search) {
|
|
274
|
+
throw new QueryError(
|
|
275
|
+
`cursor is incompatible with full-text search — FTS has its own ordering. Use date_filter on updated_at for since-last-checked search.`,
|
|
276
|
+
"INVALID_QUERY",
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
if (cursorMode && params.near) {
|
|
280
|
+
throw new QueryError(
|
|
281
|
+
`cursor is incompatible with near (graph neighborhood). Resolve the neighborhood first, then iterate with cursor + ids.`,
|
|
282
|
+
"INVALID_QUERY",
|
|
283
|
+
);
|
|
284
|
+
}
|
|
285
|
+
|
|
257
286
|
// --- Full-text search ---
|
|
258
287
|
let results: Note[];
|
|
288
|
+
let nextCursor: string | null = null;
|
|
259
289
|
if (params.search) {
|
|
260
290
|
// Normalize tag param
|
|
261
291
|
const tags = normalizeTags(params.tag);
|
|
@@ -277,12 +307,13 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
|
|
|
277
307
|
// unknown keys silently; aliasing here closes the silent-no-op gap.
|
|
278
308
|
const excludeTagsRaw = params.exclude_tags ?? params.excludeTags ?? params.exclude_tag;
|
|
279
309
|
const excludeTags = normalizeTags(excludeTagsRaw);
|
|
280
|
-
// Route through `store.queryNotes` (not
|
|
281
|
-
// tag-hierarchy expansion fires for MCP
|
|
282
|
-
// HTTP REST callers — `tag: "manual"`
|
|
283
|
-
// via `_tags/*` config notes. The
|
|
284
|
-
// bypassed the wrapper and silently
|
|
285
|
-
|
|
310
|
+
// Route through `store.queryNotes`/`queryNotesPaged` (not the raw
|
|
311
|
+
// `noteOps` exports) so tag-hierarchy expansion fires for MCP
|
|
312
|
+
// callers the same as for HTTP REST callers — `tag: "manual"`
|
|
313
|
+
// matches descendants declared via `_tags/*` config notes. The
|
|
314
|
+
// previous direct-noteOps call bypassed the wrapper and silently
|
|
315
|
+
// dropped hierarchy expansion.
|
|
316
|
+
const queryOpts = {
|
|
286
317
|
tags,
|
|
287
318
|
tagMatch: (params.tag_match as "all" | "any") ?? (tags && tags.length > 1 ? "any" : undefined),
|
|
288
319
|
excludeTags,
|
|
@@ -307,7 +338,15 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
|
|
|
307
338
|
orderBy: params.order_by as string | undefined,
|
|
308
339
|
limit: (params.limit as number) ?? 50,
|
|
309
340
|
offset: params.offset as number | undefined,
|
|
310
|
-
|
|
341
|
+
cursor: cursorMode ? (params.cursor as string) : undefined,
|
|
342
|
+
};
|
|
343
|
+
if (cursorMode) {
|
|
344
|
+
const page = await store.queryNotesPaged(queryOpts);
|
|
345
|
+
results = page.notes;
|
|
346
|
+
nextCursor = page.next_cursor;
|
|
347
|
+
} else {
|
|
348
|
+
results = await store.queryNotes(queryOpts);
|
|
349
|
+
}
|
|
311
350
|
}
|
|
312
351
|
|
|
313
352
|
// For full-text search the post-filter is still the right shape — FTS
|
|
@@ -347,9 +386,14 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
|
|
|
347
386
|
if (params.include_attachments) enriched.attachments = await store.getAttachments(n.id);
|
|
348
387
|
enrichedOut.push(enriched);
|
|
349
388
|
}
|
|
389
|
+
// Cursor mode wraps the list in `{notes, next_cursor}` so callers can
|
|
390
|
+
// chain calls without tracking a watermark client-side. Legacy
|
|
391
|
+
// callers (no `cursor` param) still get the flat array.
|
|
392
|
+
if (cursorMode) return { notes: enrichedOut, next_cursor: nextCursor };
|
|
350
393
|
return enrichedOut;
|
|
351
394
|
}
|
|
352
395
|
|
|
396
|
+
if (cursorMode) return { notes: output, next_cursor: nextCursor };
|
|
353
397
|
return output;
|
|
354
398
|
},
|
|
355
399
|
},
|
package/core/src/notes.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Database, type SQLQueryBindings } from "bun:sqlite";
|
|
2
|
-
import type { Note, NoteIndex, QueryOpts, VaultStats } from "./types.js";
|
|
2
|
+
import type { Note, NoteIndex, QueryOpts, QueryNotesPage, VaultStats } from "./types.js";
|
|
3
3
|
import { normalizePath } from "./paths.js";
|
|
4
4
|
import {
|
|
5
5
|
buildOperatorClause,
|
|
@@ -7,6 +7,17 @@ import {
|
|
|
7
7
|
QueryError,
|
|
8
8
|
requireIndexedField,
|
|
9
9
|
} from "./query-operators.js";
|
|
10
|
+
import {
|
|
11
|
+
CURSOR_VERSION,
|
|
12
|
+
CursorError,
|
|
13
|
+
computeQueryHash,
|
|
14
|
+
decodeCursor,
|
|
15
|
+
encodeCursor,
|
|
16
|
+
isoToMillis,
|
|
17
|
+
millisToIso,
|
|
18
|
+
type CursorPayload,
|
|
19
|
+
type QueryHashInputs,
|
|
20
|
+
} from "./cursor.js";
|
|
10
21
|
|
|
11
22
|
let idCounter = 0;
|
|
12
23
|
|
|
@@ -663,9 +674,68 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
|
|
|
663
674
|
}
|
|
664
675
|
}
|
|
665
676
|
|
|
677
|
+
// ---- Cursor predicate (vault#313) ----
|
|
678
|
+
//
|
|
679
|
+
// When a cursor is present, decode it, verify its query_hash matches the
|
|
680
|
+
// current query, and add a keyset predicate of the form:
|
|
681
|
+
//
|
|
682
|
+
// (updated_at > last_updated_at)
|
|
683
|
+
// OR (updated_at = last_updated_at AND id > last_id)
|
|
684
|
+
//
|
|
685
|
+
// The cursor also forces ORDER BY n.updated_at ASC, n.id ASC so the
|
|
686
|
+
// watermark math is sound — paginating by updated_at while ordering
|
|
687
|
+
// by created_at would skip rows whose update timestamp differs from
|
|
688
|
+
// their creation timestamp. `orderBy` and `sort: "desc"` are mutually
|
|
689
|
+
// exclusive with cursor mode (a "since last checked" loop wants
|
|
690
|
+
// ascending updated_at, full stop); we reject with INVALID_QUERY so
|
|
691
|
+
// callers don't silently get a broken iteration.
|
|
692
|
+
let cursorPayload: CursorPayload | null = null;
|
|
693
|
+
if (opts.cursor) {
|
|
694
|
+
if (opts.orderBy) {
|
|
695
|
+
throw new QueryError(
|
|
696
|
+
`cursor and order_by are mutually exclusive — cursor pagination forces order by updated_at`,
|
|
697
|
+
"INVALID_QUERY",
|
|
698
|
+
);
|
|
699
|
+
}
|
|
700
|
+
if (opts.sort === "desc") {
|
|
701
|
+
throw new QueryError(
|
|
702
|
+
`cursor pagination requires ascending sort by updated_at — descending sort with a cursor would skip newly-written rows`,
|
|
703
|
+
"INVALID_QUERY",
|
|
704
|
+
);
|
|
705
|
+
}
|
|
706
|
+
cursorPayload = decodeCursor(opts.cursor);
|
|
707
|
+
const expectedHash = computeQueryHash(toQueryHashInputs(opts));
|
|
708
|
+
if (cursorPayload.query_hash !== expectedHash) {
|
|
709
|
+
throw new CursorError(
|
|
710
|
+
`cursor was minted for a different query — drop the cursor and restart iteration`,
|
|
711
|
+
"cursor_query_mismatch",
|
|
712
|
+
);
|
|
713
|
+
}
|
|
714
|
+
// Translate the millis watermark back to an ISO string for the SQL
|
|
715
|
+
// comparison. SQLite's `n.updated_at` is TEXT in canonical ISO form
|
|
716
|
+
// (the store's `toISOString()` output), and ISO timestamps sort
|
|
717
|
+
// lexicographically in the same order as their millisecond epochs
|
|
718
|
+
// when they all use the same canonical form — which every timestamp
|
|
719
|
+
// vault mints does. Cursors minted on heterogeneous timestamps
|
|
720
|
+
// (e.g. an import that preserved unusual formatting) are still
|
|
721
|
+
// safe: we round-trip the cursor's millis through `new Date()`'s
|
|
722
|
+
// canonical ISO so the comparison is apples-to-apples.
|
|
723
|
+
const cursorIso = millisToIso(cursorPayload.last_updated_at);
|
|
724
|
+
conditions.push(
|
|
725
|
+
"(n.updated_at > ? OR (n.updated_at = ? AND n.id > ?))",
|
|
726
|
+
);
|
|
727
|
+
params.push(cursorIso, cursorIso, cursorPayload.last_id);
|
|
728
|
+
}
|
|
729
|
+
|
|
666
730
|
const direction = opts.sort === "desc" ? "DESC" : "ASC";
|
|
667
731
|
let orderBy: string;
|
|
668
|
-
if (opts.
|
|
732
|
+
if (opts.cursor) {
|
|
733
|
+
// Cursor mode forces a deterministic keyset order. `id` is the
|
|
734
|
+
// tiebreaker — without it, two notes sharing an `updated_at` would
|
|
735
|
+
// be at the mercy of SQLite's row order and the next page could
|
|
736
|
+
// miss or duplicate one.
|
|
737
|
+
orderBy = "n.updated_at ASC, n.id ASC";
|
|
738
|
+
} else if (opts.orderBy) {
|
|
669
739
|
requireIndexedField(db, opts.orderBy);
|
|
670
740
|
// `orderBy` came from indexed_fields (validated on declaration), so
|
|
671
741
|
// the column name is safe to interpolate. Append created_at as a
|
|
@@ -697,6 +767,98 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
|
|
|
697
767
|
});
|
|
698
768
|
}
|
|
699
769
|
|
|
770
|
+
/**
|
|
771
|
+
* Extract the result-set-affecting subset of `QueryOpts` for cursor hashing.
|
|
772
|
+
*
|
|
773
|
+
* `cursor`, `limit`, `offset`, `_tagsExpanded` (internal cache key) are
|
|
774
|
+
* excluded — they don't change which rows match, just how many or how
|
|
775
|
+
* the iteration advances. See `core/src/cursor.ts` for the rationale.
|
|
776
|
+
*/
|
|
777
|
+
function toQueryHashInputs(opts: QueryOpts): QueryHashInputs {
|
|
778
|
+
return {
|
|
779
|
+
tags: opts.tags,
|
|
780
|
+
tagMatch: opts.tagMatch,
|
|
781
|
+
excludeTags: opts.excludeTags,
|
|
782
|
+
hasTags: opts.hasTags,
|
|
783
|
+
hasLinks: opts.hasLinks,
|
|
784
|
+
path: opts.path,
|
|
785
|
+
pathPrefix: opts.pathPrefix,
|
|
786
|
+
extension: opts.extension,
|
|
787
|
+
ids: opts.ids,
|
|
788
|
+
metadata: opts.metadata,
|
|
789
|
+
dateFrom: opts.dateFrom,
|
|
790
|
+
dateTo: opts.dateTo,
|
|
791
|
+
dateFilter: opts.dateFilter,
|
|
792
|
+
sort: opts.sort,
|
|
793
|
+
orderBy: opts.orderBy,
|
|
794
|
+
};
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
/**
|
|
798
|
+
* Cursor-paginated wrapper around `queryNotes` (vault#313).
|
|
799
|
+
*
|
|
800
|
+
* Always returns `{ notes, next_cursor }`. `next_cursor` advances even on
|
|
801
|
+
* an empty result page — the caller can persist a single watermark and
|
|
802
|
+
* keep polling without special-casing the empty-page condition. The
|
|
803
|
+
* empty-page cursor's `last_updated_at` is the larger of:
|
|
804
|
+
* - the prior cursor's `last_updated_at` (when `opts.cursor` was set), or
|
|
805
|
+
* - the prior cursor's `last_updated_at` (defaults to 0 when not).
|
|
806
|
+
*
|
|
807
|
+
* Holding the watermark at the prior value on an empty page is the
|
|
808
|
+
* conservative choice: if a note is written between this call and the
|
|
809
|
+
* next at a timestamp BEFORE wall-clock-now (clock skew, batch import
|
|
810
|
+
* with explicit `created_at`), advancing the watermark to `now()` would
|
|
811
|
+
* skip it. The watermark advances only when actual rows are returned.
|
|
812
|
+
*
|
|
813
|
+
* First-call semantics (`opts.cursor` absent): query_hash is computed
|
|
814
|
+
* from the result-set-affecting opts and bound into the minted cursor.
|
|
815
|
+
* If zero rows match, the returned cursor encodes
|
|
816
|
+
* `last_updated_at = 0, last_id = ""` so the next call returns
|
|
817
|
+
* everything written since (the keyset predicate
|
|
818
|
+
* `updated_at > 0 OR (updated_at = 0 AND id > "")` matches every row
|
|
819
|
+
* with a non-null `updated_at` greater than the unix epoch).
|
|
820
|
+
*/
|
|
821
|
+
export function queryNotesPaged(db: Database, opts: QueryOpts): QueryNotesPage {
|
|
822
|
+
const notes = queryNotes(db, opts);
|
|
823
|
+
const queryHash = computeQueryHash(toQueryHashInputs(opts));
|
|
824
|
+
|
|
825
|
+
// Watermark math: pick the larger of (last returned row, prior cursor
|
|
826
|
+
// watermark, sentinel). When the page is empty, fall back to the prior
|
|
827
|
+
// cursor's watermark — see the JSDoc rationale above.
|
|
828
|
+
let lastUpdatedAt = 0;
|
|
829
|
+
let lastId = "";
|
|
830
|
+
if (opts.cursor) {
|
|
831
|
+
// Re-decode (we already validated in queryNotes); this is cheap.
|
|
832
|
+
const prior = decodeCursor(opts.cursor);
|
|
833
|
+
lastUpdatedAt = prior.last_updated_at;
|
|
834
|
+
lastId = prior.last_id;
|
|
835
|
+
}
|
|
836
|
+
if (notes.length > 0) {
|
|
837
|
+
// queryNotes with a cursor orders by (updated_at ASC, id ASC), so
|
|
838
|
+
// the last note in the array is the new watermark. When no cursor
|
|
839
|
+
// was passed, the SQL is ordered by created_at; we still want the
|
|
840
|
+
// cursor to advance to the MAX (updated_at, id) of this page so
|
|
841
|
+
// the next call resumes correctly. Compute the max explicitly.
|
|
842
|
+
for (const note of notes) {
|
|
843
|
+
const updatedIso = note.updatedAt ?? note.createdAt;
|
|
844
|
+
const ms = isoToMillis(updatedIso);
|
|
845
|
+
if (ms > lastUpdatedAt || (ms === lastUpdatedAt && note.id > lastId)) {
|
|
846
|
+
lastUpdatedAt = ms;
|
|
847
|
+
lastId = note.id;
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
const next_cursor = encodeCursor({
|
|
853
|
+
v: CURSOR_VERSION,
|
|
854
|
+
last_updated_at: lastUpdatedAt,
|
|
855
|
+
last_id: lastId,
|
|
856
|
+
query_hash: queryHash,
|
|
857
|
+
});
|
|
858
|
+
|
|
859
|
+
return { notes, next_cursor };
|
|
860
|
+
}
|
|
861
|
+
|
|
700
862
|
export function searchNotes(
|
|
701
863
|
db: Database,
|
|
702
864
|
query: string,
|