@openparachute/vault 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -0
- package/core/src/content-range.test.ts +374 -0
- package/core/src/content-range.ts +185 -0
- package/core/src/links.ts +76 -21
- package/core/src/mcp.ts +53 -1
- package/core/src/notes.ts +128 -40
- package/core/src/query-perf-routing.test.ts +208 -0
- package/core/src/schema.ts +30 -1
- package/package.json +1 -1
- package/src/content-range-routes.test.ts +178 -0
- package/src/github-device-flow.test.ts +265 -6
- package/src/github-device-flow.ts +297 -45
- package/src/mirror-credentials.test.ts +20 -0
- package/src/mirror-credentials.ts +6 -2
- package/src/mirror-routes.test.ts +778 -19
- package/src/mirror-routes.ts +313 -26
- package/src/routes.ts +69 -3
- package/src/routing.ts +8 -0
- package/web/ui/dist/assets/index-BPgyIjR7.js +61 -0
- package/web/ui/dist/index.html +1 -1
- package/web/ui/dist/assets/index-CGL256oe.js +0 -60
package/README.md
CHANGED
|
@@ -233,6 +233,14 @@ parachute-vault config # show current configuration
|
|
|
233
233
|
parachute-vault config set KEY value # set an env var (e.g. PORT=1940)
|
|
234
234
|
parachute-vault config unset KEY # remove an env var
|
|
235
235
|
parachute-vault restart # apply config changes (bounces the daemon)
|
|
236
|
+
# Env vars live in ~/.parachute/vault/.env. Notable ones:
|
|
237
|
+
# PORT — server port (default 1940)
|
|
238
|
+
# PARACHUTE_GITHUB_CLIENT_ID +
|
|
239
|
+
# PARACHUTE_GITHUB_APP_SLUG — bring-your-own GitHub App for the mirror
|
|
240
|
+
# "Back up to GitHub" flow (defaults to the
|
|
241
|
+
# shared Parachute app). Set BOTH or NEITHER:
|
|
242
|
+
# the pair must name the same app — mixing
|
|
243
|
+
# apps breaks the install probe.
|
|
236
244
|
|
|
237
245
|
# Server
|
|
238
246
|
parachute-vault serve # run the server in the foreground (no daemon)
|
|
@@ -522,6 +530,23 @@ curl -H "Authorization: Bearer $VAULT_TOKEN" \
|
|
|
522
530
|
|
|
523
531
|
Caller-tunable preview length is a future enhancement — file an issue if 120 chars isn't enough.
|
|
524
532
|
|
|
533
|
+
### Read a large note in chunks (content range)
|
|
534
|
+
|
|
535
|
+
A 100KB+ transcript won't fit in one MCP response. Pass `content_offset` / `content_length` (UTF-8 bytes) for a bounded read — the response carries the slice plus `content_total_length` and `content_next_offset` (`null` when complete). Loop, feeding `content_next_offset` back in as `content_offset`; concatenating the slices reconstructs the content byte-for-byte. Slices end on a codepoint boundary within the budget (never over `content_length`, at most 3 bytes under).
|
|
536
|
+
|
|
537
|
+
```bash
|
|
538
|
+
curl -H "Authorization: Bearer $VAULT_TOKEN" \
|
|
539
|
+
"http://localhost:1940/vault/default/api/notes/Meetings%2F2026-06-09?content_offset=0&content_length=65536"
|
|
540
|
+
# → { ..., "content": "<first ≤64KB>", "content_total_length": 118034, "content_next_offset": 65530 }
|
|
541
|
+
```
|
|
542
|
+
|
|
543
|
+
```jsonc
|
|
544
|
+
// MCP — same params on query-notes; works per-note on lists with include_content: true
|
|
545
|
+
{ "name": "query-notes", "arguments": { "id": "Meetings/2026-06-09", "content_offset": 0, "content_length": 65536 } }
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
Range params require content in the response — with `include_content=false` (or a list query left on its lean default) they error rather than silently no-op. Full semantics in [docs/HTTP_API.md](./docs/HTTP_API.md) ("Content range — bounded reads for large notes").
|
|
549
|
+
|
|
525
550
|
### Incremental rebuilds: "what changed since X"
|
|
526
551
|
|
|
527
552
|
The SSG / sync pattern. Two equivalent forms — bracket-style is canonical going forward; the flat form is the same shape that ships through the REST/MCP date filter today.
|
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content range / pagination tests — bounded reads for large notes.
|
|
3
|
+
*
|
|
4
|
+
* Three layers:
|
|
5
|
+
* 1. Unit tests of the parse + slice helpers (boundary cases: offset
|
|
6
|
+
* past end, sub-minimum budget, multi-byte codepoints at the cut).
|
|
7
|
+
* 2. Property test of the reassembly invariant: walking a string from
|
|
8
|
+
* offset 0 via `content_next_offset` and concatenating the slices is
|
|
9
|
+
* byte-identical to the full content, for arbitrary unicode content
|
|
10
|
+
* and budgets — and no slice ever exceeds the byte budget.
|
|
11
|
+
* 3. MCP face (`query-notes` execute): single + list shapes, the
|
|
12
|
+
* include_content interaction, and the no-params regression
|
|
13
|
+
* (response shape byte-identical to pre-pagination behavior).
|
|
14
|
+
*
|
|
15
|
+
* The REST face is exercised in src/content-range-routes.test.ts.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { describe, it, expect, beforeEach } from "bun:test";
|
|
19
|
+
import { Database } from "bun:sqlite";
|
|
20
|
+
import { SqliteStore } from "./store.js";
|
|
21
|
+
import { generateMcpTools } from "./mcp.js";
|
|
22
|
+
import {
|
|
23
|
+
parseContentRange,
|
|
24
|
+
sliceContentRange,
|
|
25
|
+
applyContentRange,
|
|
26
|
+
MIN_CONTENT_LENGTH,
|
|
27
|
+
} from "./content-range.js";
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// 1. parseContentRange
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
describe("parseContentRange", () => {
|
|
34
|
+
it("returns null when neither param is present (range mode off)", () => {
|
|
35
|
+
expect(parseContentRange(undefined, undefined)).toBeNull();
|
|
36
|
+
expect(parseContentRange(null, null)).toBeNull();
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("treats empty strings as absent (REST `?content_offset=`)", () => {
|
|
40
|
+
expect(parseContentRange("", "")).toBeNull();
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("offset only → length omitted (read to end)", () => {
|
|
44
|
+
expect(parseContentRange(10, undefined)).toEqual({ offset: 10 });
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it("length only → offset defaults to 0", () => {
|
|
48
|
+
expect(parseContentRange(undefined, 64)).toEqual({ offset: 0, length: 64 });
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("accepts decimal strings (REST query params)", () => {
|
|
52
|
+
expect(parseContentRange("5", "1024")).toEqual({ offset: 5, length: 1024 });
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("rejects negative offset", () => {
|
|
56
|
+
expect(() => parseContentRange(-1, undefined)).toThrow(/content_offset/);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it("rejects non-integer values", () => {
|
|
60
|
+
expect(() => parseContentRange(1.5, undefined)).toThrow(/content_offset/);
|
|
61
|
+
expect(() => parseContentRange(undefined, 7.2)).toThrow(/content_length/);
|
|
62
|
+
expect(() => parseContentRange("abc", undefined)).toThrow(/content_offset/);
|
|
63
|
+
expect(() => parseContentRange(undefined, "-4")).toThrow(/content_length/);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it(`rejects zero / negative / sub-minimum budget (< ${MIN_CONTENT_LENGTH})`, () => {
|
|
67
|
+
expect(() => parseContentRange(undefined, 0)).toThrow(/content_length/);
|
|
68
|
+
expect(() => parseContentRange(undefined, -8)).toThrow(/content_length/);
|
|
69
|
+
expect(() => parseContentRange(undefined, MIN_CONTENT_LENGTH - 1)).toThrow(/content_length/);
|
|
70
|
+
// The minimum itself is fine.
|
|
71
|
+
expect(parseContentRange(undefined, MIN_CONTENT_LENGTH)).toEqual({
|
|
72
|
+
offset: 0,
|
|
73
|
+
length: MIN_CONTENT_LENGTH,
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it("throws QueryError with INVALID_QUERY code", () => {
|
|
78
|
+
try {
|
|
79
|
+
parseContentRange(undefined, 2);
|
|
80
|
+
throw new Error("should have thrown");
|
|
81
|
+
} catch (e: any) {
|
|
82
|
+
expect(e.name).toBe("QueryError");
|
|
83
|
+
expect(e.code).toBe("INVALID_QUERY");
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
// 2. sliceContentRange — boundary cases
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
describe("sliceContentRange", () => {
|
|
93
|
+
it("plain ASCII window", () => {
|
|
94
|
+
const r = sliceContentRange("hello world", { offset: 0, length: 5 });
|
|
95
|
+
expect(r.content).toBe("hello");
|
|
96
|
+
expect(r.content_offset).toBe(0);
|
|
97
|
+
expect(r.content_total_length).toBe(11);
|
|
98
|
+
expect(r.content_next_offset).toBe(5);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it("continuation window reaches the end → next_offset null", () => {
|
|
102
|
+
const r = sliceContentRange("hello world", { offset: 5, length: 100 });
|
|
103
|
+
expect(r.content).toBe(" world");
|
|
104
|
+
expect(r.content_next_offset).toBeNull();
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it("offset with no length reads to the end", () => {
|
|
108
|
+
const r = sliceContentRange("hello world", { offset: 6 });
|
|
109
|
+
expect(r.content).toBe("world");
|
|
110
|
+
expect(r.content_total_length).toBe(11);
|
|
111
|
+
expect(r.content_next_offset).toBeNull();
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it("offset exactly at end → empty slice, complete", () => {
|
|
115
|
+
const r = sliceContentRange("abc", { offset: 3 });
|
|
116
|
+
expect(r.content).toBe("");
|
|
117
|
+
expect(r.content_offset).toBe(3);
|
|
118
|
+
expect(r.content_total_length).toBe(3);
|
|
119
|
+
expect(r.content_next_offset).toBeNull();
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it("offset past end → empty slice, complete (graceful loop termination)", () => {
|
|
123
|
+
const r = sliceContentRange("abc", { offset: 999, length: 16 });
|
|
124
|
+
expect(r.content).toBe("");
|
|
125
|
+
expect(r.content_offset).toBe(3); // clamped to total
|
|
126
|
+
expect(r.content_total_length).toBe(3);
|
|
127
|
+
expect(r.content_next_offset).toBeNull();
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
it("empty content → empty slice, total 0", () => {
|
|
131
|
+
const r = sliceContentRange("", { offset: 0, length: 16 });
|
|
132
|
+
expect(r.content).toBe("");
|
|
133
|
+
expect(r.content_total_length).toBe(0);
|
|
134
|
+
expect(r.content_next_offset).toBeNull();
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it("budget cutting mid-codepoint backs off to the boundary (never over budget)", () => {
|
|
138
|
+
// "ab😀cd" — bytes: a=0, b=1, 😀=2..5 (4 bytes), c=6, d=7; total 8.
|
|
139
|
+
const s = "ab\u{1F600}cd";
|
|
140
|
+
expect(Buffer.byteLength(s, "utf8")).toBe(8);
|
|
141
|
+
|
|
142
|
+
// Budget 5 would cut mid-emoji → slice backs off to byte 2.
|
|
143
|
+
const r1 = sliceContentRange(s, { offset: 0, length: 5 });
|
|
144
|
+
expect(r1.content).toBe("ab");
|
|
145
|
+
expect(Buffer.byteLength(r1.content, "utf8")).toBeLessThanOrEqual(5);
|
|
146
|
+
expect(r1.content_next_offset).toBe(2);
|
|
147
|
+
|
|
148
|
+
// Next window picks up the whole emoji.
|
|
149
|
+
const r2 = sliceContentRange(s, { offset: 2, length: 4 });
|
|
150
|
+
expect(r2.content).toBe("\u{1F600}");
|
|
151
|
+
expect(r2.content_next_offset).toBe(6);
|
|
152
|
+
|
|
153
|
+
// Final window.
|
|
154
|
+
const r3 = sliceContentRange(s, { offset: 6, length: 4 });
|
|
155
|
+
expect(r3.content).toBe("cd");
|
|
156
|
+
expect(r3.content_next_offset).toBeNull();
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it("offset landing mid-codepoint aligns DOWN (no bytes skipped) and echoes the effective offset", () => {
|
|
160
|
+
const s = "ab\u{1F600}cd"; // emoji occupies bytes 2..5
|
|
161
|
+
const r = sliceContentRange(s, { offset: 4, length: 8 });
|
|
162
|
+
expect(r.content_offset).toBe(2); // aligned down to the emoji's lead byte
|
|
163
|
+
expect(r.content).toBe("\u{1F600}cd");
|
|
164
|
+
expect(r.content_next_offset).toBeNull();
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it("minimum budget always makes progress, even on a 4-byte codepoint", () => {
|
|
168
|
+
const s = "\u{1F600}\u{1F601}"; // two 4-byte emoji
|
|
169
|
+
const r = sliceContentRange(s, { offset: 0, length: MIN_CONTENT_LENGTH });
|
|
170
|
+
expect(r.content).toBe("\u{1F600}");
|
|
171
|
+
expect(r.content_next_offset).toBe(4);
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it("applyContentRange mutates the shaped result in place", () => {
|
|
175
|
+
const result: any = { id: "n1", content: "hello world", tags: ["x"] };
|
|
176
|
+
applyContentRange(result, { offset: 0, length: 5 });
|
|
177
|
+
expect(result.content).toBe("hello");
|
|
178
|
+
expect(result.content_offset).toBe(0);
|
|
179
|
+
expect(result.content_total_length).toBe(11);
|
|
180
|
+
expect(result.content_next_offset).toBe(5);
|
|
181
|
+
expect(result.tags).toEqual(["x"]); // untouched
|
|
182
|
+
});
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
// ---------------------------------------------------------------------------
|
|
186
|
+
// 2b. Property test — reassembly invariant
|
|
187
|
+
// ---------------------------------------------------------------------------
|
|
188
|
+
|
|
189
|
+
/** Deterministic PRNG (mulberry32) so failures reproduce. */
|
|
190
|
+
function mulberry32(seed: number): () => number {
|
|
191
|
+
let a = seed >>> 0;
|
|
192
|
+
return () => {
|
|
193
|
+
a |= 0;
|
|
194
|
+
a = (a + 0x6d2b79f5) | 0;
|
|
195
|
+
let t = Math.imul(a ^ (a >>> 15), 1 | a);
|
|
196
|
+
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
|
|
197
|
+
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
describe("content range — reassembly property", () => {
|
|
202
|
+
// Mixed-width pool: 1-byte ASCII, 2-byte (é, ψ), 3-byte (你, ‱), 4-byte
|
|
203
|
+
// (😀, 𝄞) — plus whitespace, so windows land on every codepoint width.
|
|
204
|
+
const POOL = ["a", "Z", "9", " ", "\n", "é", "ψ", "你", "‱", "\u{1F600}", "\u{1D11E}"];
|
|
205
|
+
|
|
206
|
+
it("concatenating all slices is byte-identical to the full content; no slice exceeds the budget", () => {
|
|
207
|
+
const rand = mulberry32(0xc0ffee);
|
|
208
|
+
for (let iter = 0; iter < 60; iter++) {
|
|
209
|
+
const charCount = Math.floor(rand() * 120); // includes 0 (empty content)
|
|
210
|
+
let content = "";
|
|
211
|
+
for (let i = 0; i < charCount; i++) {
|
|
212
|
+
content += POOL[Math.floor(rand() * POOL.length)]!;
|
|
213
|
+
}
|
|
214
|
+
const budget = MIN_CONTENT_LENGTH + Math.floor(rand() * 13); // 4..16 bytes
|
|
215
|
+
const totalBytes = Buffer.byteLength(content, "utf8");
|
|
216
|
+
|
|
217
|
+
let offset = 0;
|
|
218
|
+
let assembled = "";
|
|
219
|
+
let lastTotal: number | null = null;
|
|
220
|
+
// Hard ceiling on iterations: every window must advance by >= 1 byte.
|
|
221
|
+
for (let step = 0; step <= totalBytes + 2; step++) {
|
|
222
|
+
const slice = sliceContentRange(content, { offset, length: budget });
|
|
223
|
+
expect(Buffer.byteLength(slice.content, "utf8")).toBeLessThanOrEqual(budget);
|
|
224
|
+
expect(slice.content_total_length).toBe(totalBytes);
|
|
225
|
+
lastTotal = slice.content_total_length;
|
|
226
|
+
assembled += slice.content;
|
|
227
|
+
if (slice.content_next_offset === null) break;
|
|
228
|
+
// Progress guarantee — next offset strictly advances.
|
|
229
|
+
expect(slice.content_next_offset).toBeGreaterThan(offset);
|
|
230
|
+
offset = slice.content_next_offset;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
expect(assembled).toBe(content);
|
|
234
|
+
expect(Buffer.from(assembled, "utf8").equals(Buffer.from(content, "utf8"))).toBe(true);
|
|
235
|
+
expect(lastTotal).toBe(totalBytes);
|
|
236
|
+
}
|
|
237
|
+
});
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
// ---------------------------------------------------------------------------
|
|
241
|
+
// 3. MCP face — query-notes
|
|
242
|
+
// ---------------------------------------------------------------------------
|
|
243
|
+
|
|
244
|
+
describe("MCP query-notes — content range", () => {
|
|
245
|
+
let db: Database;
|
|
246
|
+
let store: SqliteStore;
|
|
247
|
+
|
|
248
|
+
beforeEach(() => {
|
|
249
|
+
db = new Database(":memory:");
|
|
250
|
+
store = new SqliteStore(db);
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
function queryTool() {
|
|
254
|
+
const tools = generateMcpTools(store);
|
|
255
|
+
return tools.find((t) => t.name === "query-notes")!;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
it("single note: paged read loop reassembles the full content", async () => {
|
|
259
|
+
// Mixed-width content so windows hit multi-byte boundaries.
|
|
260
|
+
const content = ("section \u{1F600} 你好 " .repeat(40)).trim();
|
|
261
|
+
const note = await store.createNote(content);
|
|
262
|
+
const query = queryTool();
|
|
263
|
+
|
|
264
|
+
let offset = 0;
|
|
265
|
+
let assembled = "";
|
|
266
|
+
for (;;) {
|
|
267
|
+
const r: any = await query.execute({ id: note.id, content_offset: offset, content_length: 64 });
|
|
268
|
+
expect(r.content_total_length).toBe(Buffer.byteLength(content, "utf8"));
|
|
269
|
+
assembled += r.content;
|
|
270
|
+
if (r.content_next_offset === null) break;
|
|
271
|
+
offset = r.content_next_offset;
|
|
272
|
+
}
|
|
273
|
+
expect(assembled).toBe(content);
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
it("single note: response carries the range fields and the slice", async () => {
|
|
277
|
+
const note = await store.createNote("0123456789");
|
|
278
|
+
const r: any = await queryTool().execute({ id: note.id, content_length: 4 });
|
|
279
|
+
expect(r.content).toBe("0123");
|
|
280
|
+
expect(r.content_offset).toBe(0);
|
|
281
|
+
expect(r.content_total_length).toBe(10);
|
|
282
|
+
expect(r.content_next_offset).toBe(4);
|
|
283
|
+
expect(r.id).toBe(note.id); // rest of the note shape intact
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
it("single note, no range params → byte-identical to today (regression)", async () => {
|
|
287
|
+
const note = await store.createNote("full body here");
|
|
288
|
+
const r: any = await queryTool().execute({ id: note.id });
|
|
289
|
+
expect(r.content).toBe("full body here");
|
|
290
|
+
expect("content_total_length" in r).toBe(false);
|
|
291
|
+
expect("content_next_offset" in r).toBe(false);
|
|
292
|
+
expect("content_offset" in r).toBe(false);
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
it("single note: content_offset past end → empty slice, complete", async () => {
|
|
296
|
+
const note = await store.createNote("abc");
|
|
297
|
+
const r: any = await queryTool().execute({ id: note.id, content_offset: 999 });
|
|
298
|
+
expect(r.content).toBe("");
|
|
299
|
+
expect(r.content_total_length).toBe(3);
|
|
300
|
+
expect(r.content_next_offset).toBeNull();
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
it("single note: include_content=false + range params → loud error", async () => {
|
|
304
|
+
const note = await store.createNote("abc");
|
|
305
|
+
expect(
|
|
306
|
+
queryTool().execute({ id: note.id, include_content: false, content_length: 8 }),
|
|
307
|
+
).rejects.toThrow(/include_content/);
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
it("rejects sub-minimum / invalid budgets before any query work", async () => {
|
|
311
|
+
const note = await store.createNote("abc");
|
|
312
|
+
expect(queryTool().execute({ id: note.id, content_length: 0 })).rejects.toThrow(/content_length/);
|
|
313
|
+
expect(queryTool().execute({ id: note.id, content_length: -5 })).rejects.toThrow(/content_length/);
|
|
314
|
+
expect(queryTool().execute({ id: note.id, content_length: 2 })).rejects.toThrow(/content_length/);
|
|
315
|
+
expect(queryTool().execute({ id: note.id, content_offset: -1 })).rejects.toThrow(/content_offset/);
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
it("list query: include_content=true applies the window per note", async () => {
|
|
319
|
+
await store.createNote("alpha alpha alpha", { tags: ["big"] });
|
|
320
|
+
await store.createNote("beta beta beta beta", { tags: ["big"] });
|
|
321
|
+
const out: any[] = (await queryTool().execute({
|
|
322
|
+
tag: "big",
|
|
323
|
+
include_content: true,
|
|
324
|
+
content_length: 5,
|
|
325
|
+
})) as any[];
|
|
326
|
+
expect(out.length).toBe(2);
|
|
327
|
+
for (const n of out) {
|
|
328
|
+
expect(Buffer.byteLength(n.content, "utf8")).toBeLessThanOrEqual(5);
|
|
329
|
+
expect(typeof n.content_total_length).toBe("number");
|
|
330
|
+
expect(n.content_next_offset).toBe(5);
|
|
331
|
+
}
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
it("list query: lean default (no include_content) + range params → loud error", async () => {
|
|
335
|
+
await store.createNote("alpha", { tags: ["big"] });
|
|
336
|
+
expect(queryTool().execute({ tag: "big", content_length: 8 })).rejects.toThrow(
|
|
337
|
+
/include_content/,
|
|
338
|
+
);
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
it("list query, no range params → no range fields injected (regression)", async () => {
|
|
342
|
+
await store.createNote("alpha", { tags: ["big"] });
|
|
343
|
+
const out: any[] = (await queryTool().execute({ tag: "big", include_content: true })) as any[];
|
|
344
|
+
expect(out.length).toBe(1);
|
|
345
|
+
expect("content_total_length" in out[0]).toBe(false);
|
|
346
|
+
expect("content_next_offset" in out[0]).toBe(false);
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
it("expand_links: the range applies to the EXPANDED content", async () => {
|
|
350
|
+
await store.createNote("inlined body of B", { path: "B" });
|
|
351
|
+
const a = await store.createNote("A says: [[B]]", { path: "A" });
|
|
352
|
+
const query = queryTool();
|
|
353
|
+
|
|
354
|
+
const unpaged: any = await query.execute({ id: a.id, expand_links: true });
|
|
355
|
+
const paged: any = await query.execute({
|
|
356
|
+
id: a.id,
|
|
357
|
+
expand_links: true,
|
|
358
|
+
content_offset: 0,
|
|
359
|
+
content_length: 100000,
|
|
360
|
+
});
|
|
361
|
+
expect(paged.content).toBe(unpaged.content);
|
|
362
|
+
expect(paged.content_total_length).toBe(Buffer.byteLength(unpaged.content, "utf8"));
|
|
363
|
+
expect(paged.content_next_offset).toBeNull();
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
it("query-notes schema advertises the params (MCP discovery)", () => {
|
|
367
|
+
const query = queryTool();
|
|
368
|
+
const props = (query.inputSchema as any).properties;
|
|
369
|
+
expect(props.content_offset).toBeDefined();
|
|
370
|
+
expect(props.content_length).toBeDefined();
|
|
371
|
+
expect(query.description).toContain("content_offset");
|
|
372
|
+
expect(query.description).toContain("content_next_offset");
|
|
373
|
+
});
|
|
374
|
+
});
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content range / pagination — bounded reads for large notes.
|
|
3
|
+
*
|
|
4
|
+
* MCP responses are size-limited: a 100KB+ transcript can't come back in
|
|
5
|
+
* one `query-notes` call, and a remote MCP client has no `curl | head -c`
|
|
6
|
+
* escape hatch. These helpers let a caller read note content in byte
|
|
7
|
+
* windows:
|
|
8
|
+
*
|
|
9
|
+
* request: `content_offset` (default 0) + `content_length` (byte budget)
|
|
10
|
+
* response: `content` (the slice) + `content_offset` (effective start)
|
|
11
|
+
* + `content_total_length` + `content_next_offset`
|
|
12
|
+
* (`null` when the slice reaches the end)
|
|
13
|
+
*
|
|
14
|
+
* Unit is **UTF-8 bytes** — the same unit as `byteSize` on the lean
|
|
15
|
+
* NoteIndex shape, and the natural unit for budgeting response size. But
|
|
16
|
+
* naive byte-slicing can split a multi-byte codepoint, which would corrupt
|
|
17
|
+
* the JSON string. So slices always end on a codepoint boundary *within*
|
|
18
|
+
* the budget: a slice never exceeds `content_length` bytes but may come up
|
|
19
|
+
* to 3 bytes short when a multi-byte character straddles the cut. A
|
|
20
|
+
* `content_offset` that lands mid-codepoint (only possible when the caller
|
|
21
|
+
* computes offsets by hand — chained `content_next_offset` values are
|
|
22
|
+
* always boundary-aligned) is aligned DOWN to the codepoint's leading byte
|
|
23
|
+
* so no bytes are ever skipped; the effective start is echoed back as
|
|
24
|
+
* `content_offset` on the response.
|
|
25
|
+
*
|
|
26
|
+
* Reassembly invariant (pinned by content-range.test.ts): starting at
|
|
27
|
+
* offset 0 and following `content_next_offset` until `null`, the
|
|
28
|
+
* concatenation of slices is byte-identical to the full content.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { QueryError } from "./query-operators.js";
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Minimum accepted `content_length`. A UTF-8 codepoint is at most 4 bytes,
|
|
35
|
+
* so any budget >= 4 is guaranteed to make progress (the codepoint at the
|
|
36
|
+
* window start always fits). Budgets 1–3 could stall forever on a 4-byte
|
|
37
|
+
* emoji (empty slice, next_offset == offset); rejecting them up front is
|
|
38
|
+
* deterministic and simpler than a runtime "no progress" error.
|
|
39
|
+
*/
|
|
40
|
+
export const MIN_CONTENT_LENGTH = 4;
|
|
41
|
+
|
|
42
|
+
export interface ContentRange {
|
|
43
|
+
/** Byte offset (UTF-8) to start reading from. */
|
|
44
|
+
offset: number;
|
|
45
|
+
/** Max bytes to return. Absent = read to the end. */
|
|
46
|
+
length?: number;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export interface ContentRangeFields {
|
|
50
|
+
content: string;
|
|
51
|
+
/** Effective start (requested offset aligned down to a codepoint boundary). */
|
|
52
|
+
content_offset: number;
|
|
53
|
+
/** Full content size in UTF-8 bytes. */
|
|
54
|
+
content_total_length: number;
|
|
55
|
+
/** Byte offset to resume from, or null when the slice reaches the end. */
|
|
56
|
+
content_next_offset: number | null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function toNonNegativeInt(raw: unknown, name: string): number | undefined {
|
|
60
|
+
if (raw === undefined || raw === null) return undefined;
|
|
61
|
+
let n: number;
|
|
62
|
+
if (typeof raw === "number") {
|
|
63
|
+
n = raw;
|
|
64
|
+
} else if (typeof raw === "string") {
|
|
65
|
+
if (raw.trim() === "") return undefined; // `?content_offset=` — treat empty as absent
|
|
66
|
+
if (!/^\d+$/.test(raw.trim())) {
|
|
67
|
+
throw new QueryError(
|
|
68
|
+
`invalid \`${name}\` value ${JSON.stringify(raw)} — must be a non-negative integer (UTF-8 byte count).`,
|
|
69
|
+
"INVALID_QUERY",
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
n = Number(raw.trim());
|
|
73
|
+
} else {
|
|
74
|
+
throw new QueryError(
|
|
75
|
+
`invalid \`${name}\` value — must be a non-negative integer (UTF-8 byte count).`,
|
|
76
|
+
"INVALID_QUERY",
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
if (!Number.isSafeInteger(n) || n < 0) {
|
|
80
|
+
throw new QueryError(
|
|
81
|
+
`invalid \`${name}\` value ${JSON.stringify(raw)} — must be a non-negative integer (UTF-8 byte count).`,
|
|
82
|
+
"INVALID_QUERY",
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
return n;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Parse the `content_offset` / `content_length` pair. Returns `null` when
|
|
90
|
+
* neither is present (range mode off — response shape byte-identical to
|
|
91
|
+
* the no-pagination behavior). Throws `QueryError` (INVALID_QUERY) on
|
|
92
|
+
* negative / non-integer values or a `content_length` below
|
|
93
|
+
* {@link MIN_CONTENT_LENGTH}.
|
|
94
|
+
*
|
|
95
|
+
* Accepts numbers (MCP params) and decimal strings (REST query params);
|
|
96
|
+
* empty strings count as absent.
|
|
97
|
+
*/
|
|
98
|
+
export function parseContentRange(offsetRaw: unknown, lengthRaw: unknown): ContentRange | null {
|
|
99
|
+
const offset = toNonNegativeInt(offsetRaw, "content_offset");
|
|
100
|
+
const length = toNonNegativeInt(lengthRaw, "content_length");
|
|
101
|
+
if (offset === undefined && length === undefined) return null;
|
|
102
|
+
if (length !== undefined && length < MIN_CONTENT_LENGTH) {
|
|
103
|
+
throw new QueryError(
|
|
104
|
+
`invalid \`content_length\` value ${JSON.stringify(lengthRaw)} — must be at least ${MIN_CONTENT_LENGTH} bytes (the size of the largest UTF-8 codepoint, so every window makes progress).`,
|
|
105
|
+
"INVALID_QUERY",
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
return { offset: offset ?? 0, ...(length !== undefined ? { length } : {}) };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* The error both faces raise when range params are combined with a
|
|
113
|
+
* response shape that excludes content (`include_content=false`, or a
|
|
114
|
+
* list query left on its lean default). Centralized so MCP and REST emit
|
|
115
|
+
* the same message.
|
|
116
|
+
*/
|
|
117
|
+
export function contentRangeRequiresContent(): QueryError {
|
|
118
|
+
return new QueryError(
|
|
119
|
+
`content_offset/content_length apply to note content, but content is not included in this response shape. Pass include_content=true (lists default to false) or drop the range params.`,
|
|
120
|
+
"INVALID_QUERY",
|
|
121
|
+
);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/** True for UTF-8 continuation bytes (0b10xxxxxx). */
|
|
125
|
+
function isContinuationByte(b: number): boolean {
|
|
126
|
+
return (b & 0xc0) === 0x80;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Slice `content` to the requested byte window, never splitting a UTF-8
|
|
131
|
+
* codepoint and never exceeding `range.length` bytes. See module doc for
|
|
132
|
+
* the alignment rules.
|
|
133
|
+
*/
|
|
134
|
+
export function sliceContentRange(content: string, range: ContentRange): ContentRangeFields {
|
|
135
|
+
const bytes = Buffer.from(content, "utf8");
|
|
136
|
+
const total = bytes.byteLength;
|
|
137
|
+
|
|
138
|
+
// At/past the end: empty slice, complete. Graceful (not an error) so a
|
|
139
|
+
// pagination loop that overshoots — e.g. the note shrank between calls —
|
|
140
|
+
// terminates cleanly on `content_next_offset: null`.
|
|
141
|
+
if (range.offset >= total) {
|
|
142
|
+
return {
|
|
143
|
+
content: "",
|
|
144
|
+
content_offset: total,
|
|
145
|
+
content_total_length: total,
|
|
146
|
+
content_next_offset: null,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Align the start DOWN to the leading byte of the codepoint containing
|
|
151
|
+
// `offset` — never skip bytes (a forward-align would drop them from
|
|
152
|
+
// every window and break reassembly).
|
|
153
|
+
let start = range.offset;
|
|
154
|
+
while (start > 0 && isContinuationByte(bytes[start]!)) start--;
|
|
155
|
+
|
|
156
|
+
// Window end: budget capped at total. Align DOWN so the slice doesn't
|
|
157
|
+
// end mid-codepoint — under the budget, never over.
|
|
158
|
+
let end = range.length === undefined ? total : Math.min(start + range.length, total);
|
|
159
|
+
while (end > start && end < total && isContinuationByte(bytes[end]!)) end--;
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
content: bytes.subarray(start, end).toString("utf8"),
|
|
163
|
+
content_offset: start,
|
|
164
|
+
content_total_length: total,
|
|
165
|
+
content_next_offset: end >= total ? null : end,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Apply a content range to a shaped response object in place: replaces
|
|
171
|
+
* `content` with the slice and adds `content_offset`,
|
|
172
|
+
* `content_total_length`, `content_next_offset`. No-op fields are never
|
|
173
|
+
* added when range mode is off — callers only invoke this with a parsed
|
|
174
|
+
* (non-null) range.
|
|
175
|
+
*/
|
|
176
|
+
export function applyContentRange(
|
|
177
|
+
result: { content?: unknown; [key: string]: unknown },
|
|
178
|
+
range: ContentRange,
|
|
179
|
+
): void {
|
|
180
|
+
const fields = sliceContentRange(typeof result.content === "string" ? result.content : "", range);
|
|
181
|
+
result.content = fields.content;
|
|
182
|
+
result.content_offset = fields.content_offset;
|
|
183
|
+
result.content_total_length = fields.content_total_length;
|
|
184
|
+
result.content_next_offset = fields.content_next_offset;
|
|
185
|
+
}
|