@openparachute/vault 0.6.0 → 0.6.2-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -6
- package/core/src/content-range.test.ts +374 -0
- package/core/src/content-range.ts +185 -0
- package/core/src/links.ts +76 -21
- package/core/src/mcp.ts +53 -1
- package/core/src/notes.ts +128 -40
- package/core/src/query-perf-routing.test.ts +208 -0
- package/core/src/schema.ts +30 -1
- package/package.json +1 -1
- package/src/cli.ts +90 -25
- package/src/content-range-routes.test.ts +178 -0
- package/src/github-device-flow.test.ts +265 -6
- package/src/github-device-flow.ts +297 -45
- package/src/init-summary.test.ts +125 -125
- package/src/init-summary.ts +89 -54
- package/src/init.test.ts +128 -0
- package/src/mirror-credentials.test.ts +20 -0
- package/src/mirror-credentials.ts +6 -2
- package/src/mirror-remote-guard.test.ts +269 -0
- package/src/mirror-remote-guard.ts +273 -0
- package/src/mirror-routes.test.ts +1118 -46
- package/src/mirror-routes.ts +405 -32
- package/src/routes.ts +69 -3
- package/src/routing.ts +8 -0
- package/src/vault.test.ts +56 -0
- package/web/ui/dist/assets/index-BPgyIjR7.js +61 -0
- package/web/ui/dist/index.html +1 -1
- package/web/ui/dist/assets/index-CGL256oe.js +0 -60
package/core/src/links.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Database } from "bun:sqlite";
|
|
2
2
|
import type { Link, NoteSummary, HydratedLink } from "./types.js";
|
|
3
|
-
import {
|
|
3
|
+
import { getNoteTagsForNotes } from "./notes.js";
|
|
4
4
|
|
|
5
5
|
export function createLink(
|
|
6
6
|
db: Database,
|
|
@@ -103,28 +103,25 @@ function parseMetadata(raw: string | null): Record<string, unknown> | undefined
|
|
|
103
103
|
try { return JSON.parse(raw); } catch { return undefined; }
|
|
104
104
|
}
|
|
105
105
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
"SELECT id, path, metadata, created_at, updated_at FROM notes WHERE id = ?",
|
|
109
|
-
).get(noteId) as SummaryRow | null;
|
|
110
|
-
if (!row) return undefined;
|
|
111
|
-
return {
|
|
112
|
-
id: row.id,
|
|
113
|
-
path: row.path ?? undefined,
|
|
114
|
-
metadata: parseMetadata(row.metadata),
|
|
115
|
-
createdAt: row.created_at,
|
|
116
|
-
updatedAt: row.updated_at ?? undefined,
|
|
117
|
-
tags: getNoteTags(db, row.id),
|
|
118
|
-
};
|
|
119
|
-
}
|
|
106
|
+
/** IN-list chunk size — matches getLinkCounts' conservative bound-variable floor. */
|
|
107
|
+
const IN_CHUNK = 900;
|
|
120
108
|
|
|
121
109
|
function getNoteSummaries(db: Database, noteIds: string[]): Map<string, NoteSummary> {
|
|
122
110
|
const map = new Map<string, NoteSummary>();
|
|
123
111
|
if (noteIds.length === 0) return map;
|
|
124
|
-
const
|
|
125
|
-
const rows =
|
|
126
|
-
|
|
127
|
-
|
|
112
|
+
const ids = [...new Set(noteIds)];
|
|
113
|
+
const rows: SummaryRow[] = [];
|
|
114
|
+
for (let i = 0; i < ids.length; i += IN_CHUNK) {
|
|
115
|
+
const chunk = ids.slice(i, i + IN_CHUNK);
|
|
116
|
+
const placeholders = chunk.map(() => "?").join(", ");
|
|
117
|
+
rows.push(...db.prepare(
|
|
118
|
+
`SELECT id, path, metadata, created_at, updated_at FROM notes WHERE id IN (${placeholders})`,
|
|
119
|
+
).all(...chunk) as SummaryRow[]);
|
|
120
|
+
}
|
|
121
|
+
// ONE batched tag lookup for every summary on the page — this used to be
|
|
122
|
+
// a per-summary query, which made hydrating a well-linked note cost
|
|
123
|
+
// O(linked notes) round-trips (2026-06-10 perf measurements).
|
|
124
|
+
const tagsById = getNoteTagsForNotes(db, rows.map((r) => r.id));
|
|
128
125
|
for (const row of rows) {
|
|
129
126
|
map.set(row.id, {
|
|
130
127
|
id: row.id,
|
|
@@ -132,7 +129,7 @@ function getNoteSummaries(db: Database, noteIds: string[]): Map<string, NoteSumm
|
|
|
132
129
|
metadata: parseMetadata(row.metadata),
|
|
133
130
|
createdAt: row.created_at,
|
|
134
131
|
updatedAt: row.updated_at ?? undefined,
|
|
135
|
-
tags:
|
|
132
|
+
tags: tagsById.get(row.id) ?? [],
|
|
136
133
|
});
|
|
137
134
|
}
|
|
138
135
|
return map;
|
|
@@ -148,8 +145,11 @@ export function getLinksHydrated(
|
|
|
148
145
|
opts?: { direction?: "outbound" | "inbound" | "both"; include_content?: boolean },
|
|
149
146
|
): HydratedLink[] {
|
|
150
147
|
const links = getLinks(db, noteId, opts);
|
|
148
|
+
return hydrateLinks(db, links);
|
|
149
|
+
}
|
|
151
150
|
|
|
152
|
-
|
|
151
|
+
/** Attach source/target note summaries to a set of links (batched). */
|
|
152
|
+
function hydrateLinks(db: Database, links: Link[]): HydratedLink[] {
|
|
153
153
|
const noteIds = new Set<string>();
|
|
154
154
|
for (const link of links) {
|
|
155
155
|
noteIds.add(link.sourceId);
|
|
@@ -165,6 +165,61 @@ export function getLinksHydrated(
|
|
|
165
165
|
}));
|
|
166
166
|
}
|
|
167
167
|
|
|
168
|
+
/**
|
|
169
|
+
* Batch variant of `getLinksHydrated` for the `include_links` enrichment
|
|
170
|
+
* loops (MCP query-notes list path, REST GET /api/notes): hydrates links for
|
|
171
|
+
* a whole PAGE of notes in a constant number of queries — two indexed
|
|
172
|
+
* IN-list scans over `links` per chunk, one summary fetch, one batched tag
|
|
173
|
+
* lookup — instead of (1 link query + 1 summary query + N tag queries) per
|
|
174
|
+
* note. See the 2026-06-10 perf measurements (include_links scaled
|
|
175
|
+
* per-returned-note).
|
|
176
|
+
*
|
|
177
|
+
* Returns a map keyed by every requested note id (empty array when the note
|
|
178
|
+
* has no links). Each note's list contains links touching it in either
|
|
179
|
+
* direction, ordered created_at DESC — same contract as the single-note
|
|
180
|
+
* `getLinksHydrated`. A link between two notes that are BOTH on the page
|
|
181
|
+
* appears in both notes' lists, exactly as the per-note calls produced.
|
|
182
|
+
*/
|
|
183
|
+
export function getLinksHydratedForNotes(
|
|
184
|
+
db: Database,
|
|
185
|
+
noteIds: string[],
|
|
186
|
+
): Map<string, HydratedLink[]> {
|
|
187
|
+
const result = new Map<string, HydratedLink[]>();
|
|
188
|
+
if (noteIds.length === 0) return result;
|
|
189
|
+
const ids = [...new Set(noteIds)];
|
|
190
|
+
for (const id of ids) result.set(id, []);
|
|
191
|
+
|
|
192
|
+
// Collect every link touching any requested note, deduped on the
|
|
193
|
+
// (source, target, relationship) primary key so a link whose endpoints
|
|
194
|
+
// are both on the page is fetched once.
|
|
195
|
+
const rowsByKey = new Map<string, LinkRow>();
|
|
196
|
+
for (let i = 0; i < ids.length; i += IN_CHUNK) {
|
|
197
|
+
const chunk = ids.slice(i, i + IN_CHUNK);
|
|
198
|
+
const placeholders = chunk.map(() => "?").join(", ");
|
|
199
|
+
for (const column of ["source_id", "target_id"] as const) {
|
|
200
|
+
const rows = db.prepare(
|
|
201
|
+
`SELECT * FROM links WHERE ${column} IN (${placeholders})`,
|
|
202
|
+
).all(...chunk) as LinkRow[];
|
|
203
|
+
for (const row of rows) {
|
|
204
|
+
rowsByKey.set(`${row.source_id}|${row.target_id}|${row.relationship}`, row);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Stable sort newest-first to mirror the single-note SQL's
|
|
210
|
+
// ORDER BY created_at DESC (ISO timestamps sort lexicographically).
|
|
211
|
+
const links = [...rowsByKey.values()]
|
|
212
|
+
.sort((a, b) => (a.created_at < b.created_at ? 1 : a.created_at > b.created_at ? -1 : 0))
|
|
213
|
+
.map(rowToLink);
|
|
214
|
+
|
|
215
|
+
const hydrated = hydrateLinks(db, links);
|
|
216
|
+
for (const link of hydrated) {
|
|
217
|
+
result.get(link.sourceId)?.push(link);
|
|
218
|
+
if (link.targetId !== link.sourceId) result.get(link.targetId)?.push(link);
|
|
219
|
+
}
|
|
220
|
+
return result;
|
|
221
|
+
}
|
|
222
|
+
|
|
168
223
|
/**
|
|
169
224
|
* Batch link-degree counter (vault feedback #4).
|
|
170
225
|
*
|
package/core/src/mcp.ts
CHANGED
|
@@ -15,6 +15,12 @@ import {
|
|
|
15
15
|
type ExpandContext,
|
|
16
16
|
type ExpandMode,
|
|
17
17
|
} from "./expand.js";
|
|
18
|
+
import {
|
|
19
|
+
parseContentRange,
|
|
20
|
+
applyContentRange,
|
|
21
|
+
contentRangeRequiresContent,
|
|
22
|
+
MIN_CONTENT_LENGTH,
|
|
23
|
+
} from "./content-range.js";
|
|
18
24
|
|
|
19
25
|
export interface McpToolDef {
|
|
20
26
|
name: string;
|
|
@@ -153,6 +159,8 @@ export function generateMcpTools(store: Store, opts?: GenerateMcpToolsOpts): Mcp
|
|
|
153
159
|
|
|
154
160
|
Defaults: include_content=true for single note, false for lists. include_links=false. tag_match="any".
|
|
155
161
|
|
|
162
|
+
Large notes: pass \`content_offset\` / \`content_length\` (UTF-8 bytes) for a bounded read of note content — the response carries the slice plus \`content_total_length\` and \`content_next_offset\` (null when complete). Loop, feeding \`content_next_offset\` back as \`content_offset\`, to read a note too large for one response.
|
|
163
|
+
|
|
156
164
|
Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returned content. Tune with \`expand_depth\` (1–3, default 1) and \`expand_mode\` ("full" inlines full content, "summary" inlines only metadata.summary). Expansions are deduplicated across the query and cycle-guarded.`,
|
|
157
165
|
inputSchema: {
|
|
158
166
|
type: "object",
|
|
@@ -243,6 +251,16 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
|
|
|
243
251
|
"Opaque cursor for 'since last checked' agent loops (vault#313). First call: omit. The response will include `next_cursor` — pass it on the subsequent call to receive only notes created or updated since the prior page. The cursor binds to the query's filters (tag, path, metadata, etc.); changing them between calls returns a structured `cursor_query_mismatch` error. Pagination via cursor orders results by `updated_at ASC` and is mutually exclusive with `order_by` and `sort: \"desc\"`. The response shape switches to `{notes, next_cursor}` when this parameter is present.",
|
|
244
252
|
},
|
|
245
253
|
include_content: { type: "boolean", description: "Include note content (default: true for single, false for list)" },
|
|
254
|
+
content_offset: {
|
|
255
|
+
type: "number",
|
|
256
|
+
description:
|
|
257
|
+
"Byte offset (UTF-8) into note content to start reading from (default 0). For reading a note too large for one response: pass the previous response's `content_next_offset` here to continue. An offset landing mid-codepoint is aligned DOWN to the codepoint's leading byte (chained `content_next_offset` values are always aligned); the effective start is echoed back as `content_offset` on the response. Requires content in the response — errors when combined with include_content=false (or a list query without include_content=true).",
|
|
258
|
+
},
|
|
259
|
+
content_length: {
|
|
260
|
+
type: "number",
|
|
261
|
+
description:
|
|
262
|
+
`Maximum bytes (UTF-8) of note content to return (minimum ${MIN_CONTENT_LENGTH}). When this or content_offset is set, the returned \`content\` is the byte slice and the response gains \`content_offset\` (effective start), \`content_total_length\` (full content size in bytes), and \`content_next_offset\` (pass back as content_offset to continue; null when the slice reaches the end). Slices end on a UTF-8 codepoint boundary, so a slice may be up to 3 bytes under the budget — never over. Concatenating the slices from offset 0 through content_next_offset=null reconstructs the content byte-for-byte. On list queries the same window applies to each note's content independently. When expand_links=true the range applies to the returned (expanded) content.`,
|
|
263
|
+
},
|
|
246
264
|
include_metadata: {
|
|
247
265
|
oneOf: [
|
|
248
266
|
{ type: "boolean" },
|
|
@@ -292,17 +310,31 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
|
|
|
292
310
|
}
|
|
293
311
|
: null;
|
|
294
312
|
|
|
313
|
+
// --- Content range (bounded reads for large notes) ---
|
|
314
|
+
// Validates loudly: bad values throw QueryError here, before any
|
|
315
|
+
// query work. Null when neither param is present — response shape
|
|
316
|
+
// stays byte-identical to the no-pagination behavior.
|
|
317
|
+
const contentRange = parseContentRange(params.content_offset, params.content_length);
|
|
318
|
+
|
|
295
319
|
// --- Single note by ID/path ---
|
|
296
320
|
if (params.id) {
|
|
297
321
|
const note = resolveNote(db, params.id as string);
|
|
298
322
|
if (!note) return { error: "Note not found", id: params.id };
|
|
299
323
|
const includeContent = params.include_content !== false; // default true for single
|
|
324
|
+
// Range params are meaningless on a content-less shape — error
|
|
325
|
+
// rather than silently ignore (same loud-validation policy as
|
|
326
|
+
// `expand`).
|
|
327
|
+
if (contentRange && !includeContent) throw contentRangeRequiresContent();
|
|
300
328
|
let result: any = includeContent ? { ...note } : noteOps.toNoteIndex(note);
|
|
301
329
|
if (expandCtx && includeContent && typeof result.content === "string") {
|
|
302
330
|
// Mark the top-level note as already expanded so it can't recursively inline itself.
|
|
303
331
|
expandCtx.expanded.add(note.id);
|
|
304
332
|
result.content = expandContent(result.content, expandCtx, expandDepth);
|
|
305
333
|
}
|
|
334
|
+
// Range applies to the FINAL returned content — after wikilink
|
|
335
|
+
// expansion — so the window the client pages through is the same
|
|
336
|
+
// document it would have received unpaged.
|
|
337
|
+
if (contentRange && includeContent) applyContentRange(result, contentRange);
|
|
306
338
|
result = filterMetadata(result, params.include_metadata as boolean | string[] | undefined);
|
|
307
339
|
if (params.include_links) {
|
|
308
340
|
result.links = linkOps.getLinksHydrated(db, note.id);
|
|
@@ -457,6 +489,10 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
|
|
|
457
489
|
|
|
458
490
|
// --- Format output ---
|
|
459
491
|
const includeContent = params.include_content === true; // default false for list
|
|
492
|
+
// Range params require content in the response — on lists that
|
|
493
|
+
// means an explicit include_content=true (the lean default carries
|
|
494
|
+
// no content to slice). Error rather than silently ignore.
|
|
495
|
+
if (contentRange && !includeContent) throw contentRangeRequiresContent();
|
|
460
496
|
const includeMetadata = params.include_metadata as boolean | string[] | undefined;
|
|
461
497
|
let output: any[] = includeContent ? results.map((n) => ({ ...n })) : results.map(noteOps.toNoteIndex);
|
|
462
498
|
|
|
@@ -471,6 +507,15 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
|
|
|
471
507
|
}
|
|
472
508
|
}
|
|
473
509
|
|
|
510
|
+
// --- Content range (per-note, post-expansion) ---
|
|
511
|
+
// The same byte window applies to EACH note's content independently
|
|
512
|
+
// — the primary use is a single large note, but list mode keeps the
|
|
513
|
+
// simple per-note semantic (every note reports its own
|
|
514
|
+
// content_total_length / content_next_offset).
|
|
515
|
+
if (contentRange && includeContent) {
|
|
516
|
+
for (const n of output) applyContentRange(n, contentRange);
|
|
517
|
+
}
|
|
518
|
+
|
|
474
519
|
// --- Apply metadata filtering ---
|
|
475
520
|
if (includeMetadata !== undefined && includeMetadata !== true) {
|
|
476
521
|
output = output.map((n: any) => filterMetadata(n, includeMetadata));
|
|
@@ -491,10 +536,17 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
|
|
|
491
536
|
|
|
492
537
|
// --- Hydrate links/attachments per note if requested ---
|
|
493
538
|
if (params.include_links || params.include_attachments) {
|
|
539
|
+
// Links hydrate for the WHOLE page in a constant number of
|
|
540
|
+
// queries (see getLinksHydratedForNotes) — the per-note variant
|
|
541
|
+
// cost (1 link query + 1 summary query + N tag queries) × page
|
|
542
|
+
// size. 2026-06-10 perf measurements.
|
|
543
|
+
const linksByNote = params.include_links
|
|
544
|
+
? linkOps.getLinksHydratedForNotes(db, (output as any[]).map((n: any) => n.id))
|
|
545
|
+
: null;
|
|
494
546
|
const enrichedOut: any[] = [];
|
|
495
547
|
for (const n of output as any[]) {
|
|
496
548
|
const enriched: any = { ...n };
|
|
497
|
-
if (
|
|
549
|
+
if (linksByNote) enriched.links = linksByNote.get(n.id) ?? [];
|
|
498
550
|
if (params.include_attachments) enriched.attachments = await store.getAttachments(n.id);
|
|
499
551
|
enrichedOut.push(enriched);
|
|
500
552
|
}
|
package/core/src/notes.ts
CHANGED
|
@@ -18,7 +18,7 @@ import {
|
|
|
18
18
|
type CursorPayload,
|
|
19
19
|
type QueryHashInputs,
|
|
20
20
|
} from "./cursor.js";
|
|
21
|
-
import { releaseField } from "./indexed-fields.js";
|
|
21
|
+
import { getIndexedField, releaseField } from "./indexed-fields.js";
|
|
22
22
|
|
|
23
23
|
let idCounter = 0;
|
|
24
24
|
|
|
@@ -142,11 +142,7 @@ export function getNotes(db: Database, ids: string[]): Note[] {
|
|
|
142
142
|
const rows = db.prepare(
|
|
143
143
|
`SELECT * FROM notes WHERE id IN (${placeholders}) ORDER BY created_at`,
|
|
144
144
|
).all(...ids) as NoteRow[];
|
|
145
|
-
return rows
|
|
146
|
-
const note = rowToNote(row);
|
|
147
|
-
note.tags = getNoteTags(db, note.id);
|
|
148
|
-
return note;
|
|
149
|
-
});
|
|
145
|
+
return notesWithTags(db, rows);
|
|
150
146
|
}
|
|
151
147
|
|
|
152
148
|
/**
|
|
@@ -489,7 +485,6 @@ export function deleteNote(db: Database, id: string): void {
|
|
|
489
485
|
export function queryNotes(db: Database, opts: QueryOpts): Note[] {
|
|
490
486
|
const conditions: string[] = [];
|
|
491
487
|
const params: SQLQueryBindings[] = [];
|
|
492
|
-
const joins: string[] = [];
|
|
493
488
|
|
|
494
489
|
// Include tags — "all" (default): must have ALL tags; "any": must have ANY tag.
|
|
495
490
|
// The `_tagsExpanded` internal field carries per-input-tag descendant sets
|
|
@@ -498,6 +493,15 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
|
|
|
498
493
|
// `{manual, voice, text, ...}` per declared `_tags/*` config notes. Falls
|
|
499
494
|
// back to `[opts.tags[i]]` (single-element set) when no expansion is set,
|
|
500
495
|
// preserving the original semantics.
|
|
496
|
+
//
|
|
497
|
+
// Membership is expressed as a SEMIJOIN (`n.id IN (SELECT note_id ...)`),
|
|
498
|
+
// not a `JOIN note_tags`. A JOIN multiplies rows when a note carries
|
|
499
|
+
// several matching tags, which forced `SELECT DISTINCT n.*` — and that
|
|
500
|
+
// DISTINCT materialized every candidate's FULL row (content included)
|
|
501
|
+
// into a temp B-tree before LIMIT could apply, making large-tag queries
|
|
502
|
+
// cost O(candidates × row size) regardless of limit. The IN-subquery
|
|
503
|
+
// rides idx_note_tags_tag, produces each note id at most once, and lets
|
|
504
|
+
// the whole query drop DISTINCT. See the 2026-06-10 perf measurements.
|
|
501
505
|
if (opts.tags && opts.tags.length > 0) {
|
|
502
506
|
const tagSets: string[][] = (opts as QueryOpts & { _tagsExpanded?: string[][] })._tagsExpanded
|
|
503
507
|
?? opts.tags.map((t) => [t]);
|
|
@@ -508,17 +512,16 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
|
|
|
508
512
|
const flat = Array.from(new Set(tagSets.flat()));
|
|
509
513
|
if (flat.length > 0) {
|
|
510
514
|
const placeholders = flat.map(() => "?").join(", ");
|
|
511
|
-
|
|
515
|
+
conditions.push(`n.id IN (SELECT note_id FROM note_tags WHERE tag_name IN (${placeholders}))`);
|
|
512
516
|
params.push(...flat);
|
|
513
517
|
}
|
|
514
518
|
} else {
|
|
515
|
-
// "all": one
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
if (set.length === 0) continue;
|
|
519
|
-
const alias = `nt${i}`;
|
|
519
|
+
// "all": one membership clause per input tag, each accepting the
|
|
520
|
+
// input or any descendant.
|
|
521
|
+
for (const set of tagSets) {
|
|
522
|
+
if (!set || set.length === 0) continue;
|
|
520
523
|
const placeholders = set.map(() => "?").join(", ");
|
|
521
|
-
|
|
524
|
+
conditions.push(`n.id IN (SELECT note_id FROM note_tags WHERE tag_name IN (${placeholders}))`);
|
|
522
525
|
params.push(...set);
|
|
523
526
|
}
|
|
524
527
|
}
|
|
@@ -601,6 +604,20 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
|
|
|
601
604
|
// Metadata filters — operator objects route through the indexed generated
|
|
602
605
|
// column (fast, loud errors on non-indexed fields); primitives keep the
|
|
603
606
|
// existing JSON-scan exact-match behavior for backcompat.
|
|
607
|
+
//
|
|
608
|
+
// Plain-equality fast path (2026-06-10 perf measurements): when the field
|
|
609
|
+
// happens to be indexed, a plain `{field: value}` equality used to pay the
|
|
610
|
+
// same full-table json_extract scan as a non-indexed field — 280× slower
|
|
611
|
+
// than the operator form `{field: {eq: value}}` ON THE SAME column. We now
|
|
612
|
+
// prepend an indexed-prefilter conjunct (`"meta_<field>" = ?`) so the
|
|
613
|
+
// B-tree narrows the candidates, while KEEPING the original json_extract
|
|
614
|
+
// clause as a residual predicate. The conjunction is result-identical to
|
|
615
|
+
// the scan by construction: any row the scan matches also satisfies the
|
|
616
|
+
// prefilter (the generated column is the same json_extract under the
|
|
617
|
+
// column's type affinity), and rows where the affinity-converted column
|
|
618
|
+
// matches but the raw extraction doesn't (e.g. JSON number 5 vs query
|
|
619
|
+
// string "5") are excluded by the residual — exactly as the scan excluded
|
|
620
|
+
// them. Pinned by query-plain-eq-routing.test.ts.
|
|
604
621
|
if (opts.metadata) {
|
|
605
622
|
for (const [key, value] of Object.entries(opts.metadata)) {
|
|
606
623
|
if (isOperatorObject(value)) {
|
|
@@ -612,8 +629,17 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
|
|
|
612
629
|
conditions.push(sql);
|
|
613
630
|
params.push(...opParams);
|
|
614
631
|
} else {
|
|
615
|
-
|
|
616
|
-
|
|
632
|
+
const bound = typeof value === "string" ? value : JSON.stringify(value);
|
|
633
|
+
// `getIndexedField` returning a row proves `key` was validated by
|
|
634
|
+
// FIELD_NAME_RE at declaration time, so interpolating the column
|
|
635
|
+
// name is safe — same justification as buildOperatorClause.
|
|
636
|
+
if (getIndexedField(db, key)) {
|
|
637
|
+
conditions.push(`("meta_${key}" = ? AND json_extract(n.metadata, '$.' || ?) = ?)`);
|
|
638
|
+
params.push(bound, key, bound);
|
|
639
|
+
} else {
|
|
640
|
+
conditions.push(`json_extract(n.metadata, '$.' || ?) = ?`);
|
|
641
|
+
params.push(key, bound);
|
|
642
|
+
}
|
|
617
643
|
}
|
|
618
644
|
}
|
|
619
645
|
}
|
|
@@ -768,30 +794,89 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
|
|
|
768
794
|
// the column name is safe to interpolate. Append created_at as a
|
|
769
795
|
// stable tiebreaker so two rows with the same indexed value have a
|
|
770
796
|
// deterministic order.
|
|
771
|
-
orderBy = `"meta_${opts.orderBy}" ${direction}, n.created_at ${direction}`;
|
|
797
|
+
orderBy = `"meta_${opts.orderBy}" ${direction}, n.created_at ${direction}, n.id ${direction}`;
|
|
772
798
|
} else {
|
|
773
|
-
|
|
799
|
+
// id tiebreaker: same-millisecond inserts get deterministic relative
|
|
800
|
+
// order — load-bearing now that the two-phase page fetch makes
|
|
801
|
+
// pagination ordering the contract (#485 review nit).
|
|
802
|
+
orderBy = `n.created_at ${direction}, n.id ${direction}`;
|
|
774
803
|
}
|
|
775
804
|
const limit = typeof opts.limit === "number" ? opts.limit : 100;
|
|
776
805
|
const offset = typeof opts.offset === "number" ? opts.offset : 0;
|
|
777
806
|
|
|
778
807
|
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
779
808
|
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
809
|
+
// Two-phase "deferred join" page fetch (2026-06-10 perf measurements).
|
|
810
|
+
//
|
|
811
|
+
// Phase 1 selects ONLY `n.id` — the ORDER BY temp B-tree (when one is
|
|
812
|
+
// needed) holds narrow id/sort-key entries instead of full note rows, so
|
|
813
|
+
// sort/materialization cost no longer scales with content size. With the
|
|
814
|
+
// tag semijoin above there is no row multiplication, so no DISTINCT.
|
|
815
|
+
//
|
|
816
|
+
// Phase 2 fetches full rows for just the page (≤ limit ids) and re-orders
|
|
817
|
+
// to the phase-1 order; tags are hydrated in ONE batched query instead of
|
|
818
|
+
// one query per returned note.
|
|
819
|
+
const idSql = `
|
|
820
|
+
SELECT n.id FROM notes n
|
|
783
821
|
${whereClause}
|
|
784
822
|
ORDER BY ${orderBy}
|
|
785
823
|
LIMIT ? OFFSET ?
|
|
786
824
|
`;
|
|
787
825
|
params.push(limit, offset);
|
|
788
826
|
|
|
789
|
-
const
|
|
790
|
-
return
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
827
|
+
const idRows = db.prepare(idSql).all(...params) as { id: string }[];
|
|
828
|
+
return fetchNotesByIdsOrdered(db, idRows.map((r) => r.id));
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
/** Chunk size for IN-list queries — comfortably under SQLite's conservative
|
|
832
|
+
* 999 bound-variable floor (older builds), matching getLinkCounts. */
|
|
833
|
+
const IN_CHUNK = 900;
|
|
834
|
+
|
|
835
|
+
/**
|
|
836
|
+
* Fetch full note rows for `ids`, preserving the input order, with tags
|
|
837
|
+
* hydrated via ONE batched query per chunk (not one per note). Ids not
|
|
838
|
+
* found (deleted between phases) are silently dropped.
|
|
839
|
+
*/
|
|
840
|
+
function fetchNotesByIdsOrdered(db: Database, ids: string[]): Note[] {
|
|
841
|
+
if (ids.length === 0) return [];
|
|
842
|
+
const rowsById = new Map<string, NoteRow>();
|
|
843
|
+
for (let i = 0; i < ids.length; i += IN_CHUNK) {
|
|
844
|
+
const chunk = ids.slice(i, i + IN_CHUNK);
|
|
845
|
+
const placeholders = chunk.map(() => "?").join(", ");
|
|
846
|
+
const rows = db.prepare(
|
|
847
|
+
`SELECT * FROM notes WHERE id IN (${placeholders})`,
|
|
848
|
+
).all(...chunk) as NoteRow[];
|
|
849
|
+
for (const row of rows) rowsById.set(row.id, row);
|
|
850
|
+
}
|
|
851
|
+
const notes: Note[] = [];
|
|
852
|
+
for (const id of ids) {
|
|
853
|
+
const row = rowsById.get(id);
|
|
854
|
+
if (row) notes.push(rowToNote(row));
|
|
855
|
+
}
|
|
856
|
+
const tagsById = getNoteTagsForNotes(db, notes.map((n) => n.id));
|
|
857
|
+
for (const note of notes) note.tags = tagsById.get(note.id) ?? [];
|
|
858
|
+
return notes;
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
/**
|
|
862
|
+
* Batched tag lookup: tags for many notes in one IN-list query per chunk.
|
|
863
|
+
* Per-note arrays are sorted by tag_name — identical to `getNoteTags`.
|
|
864
|
+
* Every requested id is present in the map (empty array when untagged).
|
|
865
|
+
*/
|
|
866
|
+
export function getNoteTagsForNotes(db: Database, noteIds: string[]): Map<string, string[]> {
|
|
867
|
+
const map = new Map<string, string[]>();
|
|
868
|
+
if (noteIds.length === 0) return map;
|
|
869
|
+
const ids = [...new Set(noteIds)];
|
|
870
|
+
for (const id of ids) map.set(id, []);
|
|
871
|
+
for (let i = 0; i < ids.length; i += IN_CHUNK) {
|
|
872
|
+
const chunk = ids.slice(i, i + IN_CHUNK);
|
|
873
|
+
const placeholders = chunk.map(() => "?").join(", ");
|
|
874
|
+
const rows = db.prepare(
|
|
875
|
+
`SELECT note_id, tag_name FROM note_tags WHERE note_id IN (${placeholders}) ORDER BY tag_name`,
|
|
876
|
+
).all(...chunk) as { note_id: string; tag_name: string }[];
|
|
877
|
+
for (const row of rows) map.get(row.note_id)!.push(row.tag_name);
|
|
878
|
+
}
|
|
879
|
+
return map;
|
|
795
880
|
}
|
|
796
881
|
|
|
797
882
|
/**
|
|
@@ -895,20 +980,19 @@ export function searchNotes(
|
|
|
895
980
|
|
|
896
981
|
if (opts?.tags && opts.tags.length > 0) {
|
|
897
982
|
try {
|
|
983
|
+
// Tag membership as a semijoin — same rationale as queryNotes: a
|
|
984
|
+
// `JOIN note_tags` multiplies rows for multi-tagged notes and forced
|
|
985
|
+
// DISTINCT over full rows. The FTS join itself is 1:1 on rowid.
|
|
898
986
|
const tagPlaceholders = opts.tags.map(() => "?").join(", ");
|
|
899
987
|
const rows = db.prepare(`
|
|
900
|
-
SELECT
|
|
988
|
+
SELECT n.* FROM notes n
|
|
901
989
|
JOIN notes_fts fts ON fts.rowid = n.rowid
|
|
902
|
-
JOIN note_tags nt ON nt.note_id = n.id AND nt.tag_name IN (${tagPlaceholders})
|
|
903
990
|
WHERE notes_fts MATCH ?
|
|
991
|
+
AND n.id IN (SELECT note_id FROM note_tags WHERE tag_name IN (${tagPlaceholders}))
|
|
904
992
|
ORDER BY rank
|
|
905
993
|
LIMIT ?
|
|
906
|
-
`).all(...opts.tags,
|
|
907
|
-
return rows
|
|
908
|
-
const note = rowToNote(row);
|
|
909
|
-
note.tags = getNoteTags(db, note.id);
|
|
910
|
-
return note;
|
|
911
|
-
});
|
|
994
|
+
`).all(query, ...opts.tags, limit) as NoteRow[];
|
|
995
|
+
return notesWithTags(db, rows);
|
|
912
996
|
} catch {
|
|
913
997
|
return [];
|
|
914
998
|
}
|
|
@@ -922,16 +1006,20 @@ export function searchNotes(
|
|
|
922
1006
|
ORDER BY rank
|
|
923
1007
|
LIMIT ?
|
|
924
1008
|
`).all(query, limit) as NoteRow[];
|
|
925
|
-
return rows
|
|
926
|
-
const note = rowToNote(row);
|
|
927
|
-
note.tags = getNoteTags(db, note.id);
|
|
928
|
-
return note;
|
|
929
|
-
});
|
|
1009
|
+
return notesWithTags(db, rows);
|
|
930
1010
|
} catch {
|
|
931
1011
|
return [];
|
|
932
1012
|
}
|
|
933
1013
|
}
|
|
934
1014
|
|
|
1015
|
+
/** Map rows → Notes with tags hydrated in one batched query. */
|
|
1016
|
+
function notesWithTags(db: Database, rows: NoteRow[]): Note[] {
|
|
1017
|
+
const notes = rows.map(rowToNote);
|
|
1018
|
+
const tagsById = getNoteTagsForNotes(db, notes.map((n) => n.id));
|
|
1019
|
+
for (const note of notes) note.tags = tagsById.get(note.id) ?? [];
|
|
1020
|
+
return notes;
|
|
1021
|
+
}
|
|
1022
|
+
|
|
935
1023
|
// ---- Tag Operations ----
|
|
936
1024
|
|
|
937
1025
|
export function tagNote(db: Database, noteId: string, tags: string[]): void {
|