mind-palace-graph 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/INSTALL.md +387 -0
- package/README.md +602 -0
- package/dist/api.d.ts +682 -0
- package/dist/api.js +660 -0
- package/dist/api.js.map +1 -0
- package/dist/cli.d.ts +95 -0
- package/dist/cli.js +856 -0
- package/dist/cli.js.map +1 -0
- package/dist/format.d.ts +16 -0
- package/dist/format.js +199 -0
- package/dist/format.js.map +1 -0
- package/dist/fuzzy.d.ts +45 -0
- package/dist/fuzzy.js +150 -0
- package/dist/fuzzy.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.js +528 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp-server.d.ts +24 -0
- package/dist/mcp-server.js +187 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/mind-palace.d.ts +148 -0
- package/dist/mind-palace.js +780 -0
- package/dist/mind-palace.js.map +1 -0
- package/dist/nodes.d.ts +57 -0
- package/dist/nodes.js +220 -0
- package/dist/nodes.js.map +1 -0
- package/dist/pagination.d.ts +41 -0
- package/dist/pagination.js +63 -0
- package/dist/pagination.js.map +1 -0
- package/dist/palace-format.d.ts +30 -0
- package/dist/palace-format.js +146 -0
- package/dist/palace-format.js.map +1 -0
- package/dist/rg.d.ts +34 -0
- package/dist/rg.js +288 -0
- package/dist/rg.js.map +1 -0
- package/dist/sources.d.ts +87 -0
- package/dist/sources.js +457 -0
- package/dist/sources.js.map +1 -0
- package/dist/tokens.d.ts +35 -0
- package/dist/tokens.js +95 -0
- package/dist/tokens.js.map +1 -0
- package/dist/types.d.ts +236 -0
- package/dist/types.js +8 -0
- package/dist/types.js.map +1 -0
- package/package.json +67 -0
- package/skills/mpg-context/SKILL.md +556 -0
- package/skills/mpg-context/references/anti-patterns.md +133 -0
- package/skills/mpg-context/references/integration.md +123 -0
- package/skills/mpg-context/references/mind-palace.md +217 -0
- package/skills/mpg-context/references/multi-agent.md +147 -0
- package/skills/mpg-context/references/sources.md +120 -0
package/dist/sources.js
ADDED
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Source resolution.
|
|
3
|
+
*
|
|
4
|
+
* A "source" is a stream of text we can search. mpg supports four kinds:
|
|
5
|
+
*
|
|
6
|
+
* - file/glob: read from disk
|
|
7
|
+
* - command: exec a shell command, search its stdout
|
|
8
|
+
* - stdin: read piped input
|
|
9
|
+
* - url: fetch with HTTP GET
|
|
10
|
+
*
|
|
11
|
+
* For non-file sources we capture the content into memory and feed it
|
|
12
|
+
* to rg via a temp file (see rg.ts). This keeps rg as the single
|
|
13
|
+
* search engine while supporting arbitrary content types.
|
|
14
|
+
*/
|
|
15
|
+
import { spawn } from "node:child_process";
|
|
16
|
+
import { existsSync, statSync } from "node:fs";
|
|
17
|
+
import { resolve as resolvePath } from "node:path";
|
|
18
|
+
/** Cached stdin content so @- and content stdin don't double-read. */
|
|
19
|
+
let _cachedStdin = null;
|
|
20
|
+
/** Read stdin once and cache it. Returns cached value on subsequent calls. */
|
|
21
|
+
export async function getStdin() {
|
|
22
|
+
if (_cachedStdin !== null)
|
|
23
|
+
return _cachedStdin;
|
|
24
|
+
if (process.stdin.isTTY) {
|
|
25
|
+
_cachedStdin = "";
|
|
26
|
+
return "";
|
|
27
|
+
}
|
|
28
|
+
const chunks = [];
|
|
29
|
+
for await (const chunk of process.stdin) {
|
|
30
|
+
chunks.push(typeof chunk === "string" ? Buffer.from(chunk) : chunk);
|
|
31
|
+
}
|
|
32
|
+
_cachedStdin = Buffer.concat(chunks).toString("utf8");
|
|
33
|
+
return _cachedStdin;
|
|
34
|
+
}
|
|
35
|
+
/** Reset the cached stdin (e.g. in test teardown). */
|
|
36
|
+
export function resetStdinCache() {
|
|
37
|
+
_cachedStdin = null;
|
|
38
|
+
}
|
|
39
|
+
/** Expand globs into individual file paths using Node's built-in fs.glob. */
|
|
40
|
+
export async function expandGlobs(patterns) {
|
|
41
|
+
if (patterns.length === 0)
|
|
42
|
+
return [];
|
|
43
|
+
const { glob } = await import("node:fs/promises");
|
|
44
|
+
const { statSync } = await import("node:fs");
|
|
45
|
+
const out = new Set();
|
|
46
|
+
async function walk(p) {
|
|
47
|
+
// If p is a directory, recurse with a trailing /** pattern.
|
|
48
|
+
// If p is a file, add it.
|
|
49
|
+
// If p contains wildcards, glob it.
|
|
50
|
+
let s = null;
|
|
51
|
+
try {
|
|
52
|
+
s = statSync(p);
|
|
53
|
+
}
|
|
54
|
+
catch { /* not on disk */ }
|
|
55
|
+
// Node's fs.glob treats `\` as a glob escape, so absolute Windows
|
|
56
|
+
// paths with backslashes never match. Normalize separators for the
|
|
57
|
+
// pattern only — entries returned by glob are still valid paths.
|
|
58
|
+
const toGlobPattern = (s) => s.replace(/\\/g, "/");
|
|
59
|
+
if (s && s.isDirectory()) {
|
|
60
|
+
const trimmed = p.replace(/[\\\/]+$/, "");
|
|
61
|
+
const recursePattern = `${toGlobPattern(trimmed)}/**`;
|
|
62
|
+
try {
|
|
63
|
+
for await (const entry of glob(recursePattern)) {
|
|
64
|
+
try {
|
|
65
|
+
const es = statSync(entry);
|
|
66
|
+
if (es.isFile())
|
|
67
|
+
out.add(entry);
|
|
68
|
+
}
|
|
69
|
+
catch { /* skip */ }
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
catch { /* ignore */ }
|
|
73
|
+
}
|
|
74
|
+
else if (s && s.isFile()) {
|
|
75
|
+
out.add(p);
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
// Not on disk — treat as a glob pattern.
|
|
79
|
+
try {
|
|
80
|
+
for await (const entry of glob(toGlobPattern(p))) {
|
|
81
|
+
try {
|
|
82
|
+
const es = statSync(entry);
|
|
83
|
+
if (es.isFile())
|
|
84
|
+
out.add(entry);
|
|
85
|
+
}
|
|
86
|
+
catch { /* skip */ }
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
catch { /* ignore */ }
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
for (const pattern of patterns) {
|
|
93
|
+
await walk(pattern);
|
|
94
|
+
}
|
|
95
|
+
return [...out];
|
|
96
|
+
}
|
|
97
|
+
/** Heuristic: if a path exists as a file/dir, classify it. */
|
|
98
|
+
export function classifyPath(p) {
|
|
99
|
+
if (!existsSync(p))
|
|
100
|
+
return "glob";
|
|
101
|
+
const s = statSync(p);
|
|
102
|
+
if (s.isDirectory())
|
|
103
|
+
return "glob";
|
|
104
|
+
if (s.isFile())
|
|
105
|
+
return "file";
|
|
106
|
+
return "glob";
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Resolve a list of path specs to actual file paths.
|
|
110
|
+
*
|
|
111
|
+
* If `stdinContent` is provided, it is used as the content when
|
|
112
|
+
* resolving `@-` specs instead of reading process.stdin again.
|
|
113
|
+
* This avoids double-reading stdin when both content-from-stdin and
|
|
114
|
+
* path-list-from-stdin are used in the same invocation.
|
|
115
|
+
*
|
|
116
|
+
* A spec can be:
|
|
117
|
+
* - `@-` read paths from stdin, one per line
|
|
118
|
+
* - `@<file>` read paths from a file, one per line
|
|
119
|
+
* - `path` a literal file or directory path
|
|
120
|
+
* - `glob` a glob pattern; expanded via fs.glob
|
|
121
|
+
*
|
|
122
|
+
* Directories are recursed into. Empty lines and `#` comments are
|
|
123
|
+
* ignored when reading from a file.
|
|
124
|
+
*/
|
|
125
|
+
export async function resolvePathSpecs(specs, stdinContent) {
|
|
126
|
+
const out = new Set();
|
|
127
|
+
for (const spec of specs) {
|
|
128
|
+
if (spec === "@-") {
|
|
129
|
+
// Read paths from stdin.
|
|
130
|
+
const text = stdinContent ?? await getStdin();
|
|
131
|
+
for (const line of text.split(/\r?\n/)) {
|
|
132
|
+
const trimmed = line.trim();
|
|
133
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
134
|
+
continue;
|
|
135
|
+
await addExpanded(trimmed, out);
|
|
136
|
+
}
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
if (spec.startsWith("@")) {
|
|
140
|
+
// Read paths from a file.
|
|
141
|
+
const filePath = spec.slice(1);
|
|
142
|
+
let text;
|
|
143
|
+
try {
|
|
144
|
+
const { readFileSync } = await import("node:fs");
|
|
145
|
+
text = readFileSync(filePath, "utf8");
|
|
146
|
+
}
|
|
147
|
+
catch (err) {
|
|
148
|
+
throw new Error(`Cannot read path list from @${filePath}: ${err.message}`);
|
|
149
|
+
}
|
|
150
|
+
for (const line of text.split(/\r?\n/)) {
|
|
151
|
+
const trimmed = line.trim();
|
|
152
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
153
|
+
continue;
|
|
154
|
+
await addExpanded(trimmed, out);
|
|
155
|
+
}
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
await addExpanded(spec, out);
|
|
159
|
+
}
|
|
160
|
+
return [...out];
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Split path specs into two buckets without expanding directories:
|
|
164
|
+
*
|
|
165
|
+
* - `files`: literal file paths the caller asked for. Each becomes
|
|
166
|
+
* a separate `runRg` invocation so they can be searched
|
|
167
|
+
* in parallel and their per-file content cache is hot.
|
|
168
|
+
* - `bulk`: directories and glob patterns. These get passed to
|
|
169
|
+
* rg as-is — rg walks them itself in parallel, much
|
|
170
|
+
* faster than fan-out-per-file from Node. Each bulk
|
|
171
|
+
* entry becomes one `runRg` invocation that may emit
|
|
172
|
+
* matches from many files.
|
|
173
|
+
*
|
|
174
|
+
* `@file` / `@-` are still expanded inline (the caller asked for an
|
|
175
|
+
* explicit list, so we respect that).
|
|
176
|
+
*
|
|
177
|
+
* Returns absolute paths so deduplication is stable across cwd-relative
|
|
178
|
+
* vs absolute inputs.
|
|
179
|
+
*/
|
|
180
|
+
export async function classifyPathSpecs(specs, stdinContent) {
|
|
181
|
+
const files = new Set();
|
|
182
|
+
const bulk = new Set();
|
|
183
|
+
function hasGlobMeta(s) {
|
|
184
|
+
// Match characters that imply globbing. We don't try to handle
|
|
185
|
+
// brace expansion (`{a,b}`) — rg doesn't accept it on argv either,
|
|
186
|
+
// so we expand ourselves below.
|
|
187
|
+
return /[*?\[\]]/.test(s);
|
|
188
|
+
}
|
|
189
|
+
async function classify(spec) {
|
|
190
|
+
if (existsSync(spec)) {
|
|
191
|
+
const s = statSync(spec);
|
|
192
|
+
if (s.isFile()) {
|
|
193
|
+
files.add(resolvePath(spec));
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
if (s.isDirectory()) {
|
|
197
|
+
// The big win: directories go to rg as-is. rg walks them
|
|
198
|
+
// itself in parallel, much faster than one rg invocation per
|
|
199
|
+
// file from Node.
|
|
200
|
+
bulk.add(resolvePath(spec));
|
|
201
|
+
return;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
if (hasGlobMeta(spec)) {
|
|
205
|
+
// rg does NOT accept shell-style globs as path args. Expand
|
|
206
|
+
// ourselves to literal files so the search target list stays
|
|
207
|
+
// valid. The win against the old code is that *dirs* (the
|
|
208
|
+
// common case) now skip expansion.
|
|
209
|
+
const expanded = await expandGlobs([spec]);
|
|
210
|
+
for (const f of expanded)
|
|
211
|
+
files.add(resolvePath(f));
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
// Spec doesn't exist and has no glob meta — most likely a typo or
|
|
215
|
+
// a stale stash entry. Let rg surface the error rather than
|
|
216
|
+
// swallowing it silently.
|
|
217
|
+
bulk.add(spec);
|
|
218
|
+
}
|
|
219
|
+
for (const spec of specs) {
|
|
220
|
+
if (spec === "@-") {
|
|
221
|
+
const text = stdinContent ?? await getStdin();
|
|
222
|
+
for (const line of text.split(/\r?\n/)) {
|
|
223
|
+
const trimmed = line.trim();
|
|
224
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
225
|
+
continue;
|
|
226
|
+
await classify(trimmed);
|
|
227
|
+
}
|
|
228
|
+
continue;
|
|
229
|
+
}
|
|
230
|
+
if (spec.startsWith("@")) {
|
|
231
|
+
const filePath = spec.slice(1);
|
|
232
|
+
let text;
|
|
233
|
+
try {
|
|
234
|
+
const { readFileSync } = await import("node:fs");
|
|
235
|
+
text = readFileSync(filePath, "utf8");
|
|
236
|
+
}
|
|
237
|
+
catch (err) {
|
|
238
|
+
throw new Error(`Cannot read path list from @${filePath}: ${err.message}`);
|
|
239
|
+
}
|
|
240
|
+
for (const line of text.split(/\r?\n/)) {
|
|
241
|
+
const trimmed = line.trim();
|
|
242
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
243
|
+
continue;
|
|
244
|
+
await classify(trimmed);
|
|
245
|
+
}
|
|
246
|
+
continue;
|
|
247
|
+
}
|
|
248
|
+
await classify(spec);
|
|
249
|
+
}
|
|
250
|
+
return { files: [...files], bulk: [...bulk] };
|
|
251
|
+
}
|
|
252
|
+
/** Classify a spec, expand globs, and add all files to `out`. */
|
|
253
|
+
async function addExpanded(spec, out) {
|
|
254
|
+
const t = classifyPath(spec);
|
|
255
|
+
if (t === "file") {
|
|
256
|
+
out.add(spec);
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
// Glob or directory — expand.
|
|
260
|
+
const files = await expandGlobs([spec]);
|
|
261
|
+
for (const f of files)
|
|
262
|
+
out.add(f);
|
|
263
|
+
}
|
|
264
|
+
export function resolveFileSource(p) {
|
|
265
|
+
const abs = resolvePath(p);
|
|
266
|
+
return {
|
|
267
|
+
source: { id: abs, type: "file" },
|
|
268
|
+
content: null, // let rg read the file directly (streaming, no temp file)
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
export async function resolveGlobSource(pattern) {
|
|
272
|
+
const files = await expandGlobs([pattern]);
|
|
273
|
+
return files.map((f) => ({
|
|
274
|
+
source: { id: resolvePath(f), type: "file" },
|
|
275
|
+
content: null,
|
|
276
|
+
}));
|
|
277
|
+
}
|
|
278
|
+
export function resolveCommandSource(cmd) {
|
|
279
|
+
return {
|
|
280
|
+
source: { id: `cmd:${cmd}`, type: "command", label: `$ ${cmd}` },
|
|
281
|
+
content: null, // will be filled by captureCommand
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
/** Cap captured command stdout at 64MB. Past that we truncate with a marker. */
|
|
285
|
+
const COMMAND_OUTPUT_MAX_BYTES = 64 * 1024 * 1024;
|
|
286
|
+
/** Default command timeout — 60s for `git log` etc. is plenty. */
|
|
287
|
+
const COMMAND_TIMEOUT_MS = 60_000;
|
|
288
|
+
/**
|
|
289
|
+
* Capture a shell command's stdout for searching.
|
|
290
|
+
*
|
|
291
|
+
* Quoting handled correctly: the command runs through the platform
|
|
292
|
+
* shell (`bash -c` on POSIX, `cmd /c` on Windows), so `git log
|
|
293
|
+
* --grep="fix bug"` parses the way the user typed it.
|
|
294
|
+
*
|
|
295
|
+
* Output is capped at COMMAND_OUTPUT_MAX_BYTES and the command is
|
|
296
|
+
* killed after COMMAND_TIMEOUT_MS so a hanging or runaway command
|
|
297
|
+
* can't lock up the agent harness.
|
|
298
|
+
*/
|
|
299
|
+
export async function captureCommand(cmd) {
|
|
300
|
+
const trimmed = cmd.trim();
|
|
301
|
+
if (!trimmed)
|
|
302
|
+
throw new Error("Empty command");
|
|
303
|
+
const shell = process.platform === "win32" ? "cmd.exe" : "bash";
|
|
304
|
+
const shellArgs = process.platform === "win32" ? ["/c", trimmed] : ["-c", trimmed];
|
|
305
|
+
return await new Promise((resolve, reject) => {
|
|
306
|
+
const proc = spawn(shell, shellArgs, { stdio: ["ignore", "pipe", "pipe"] });
|
|
307
|
+
const chunks = [];
|
|
308
|
+
let bytes = 0;
|
|
309
|
+
let truncated = false;
|
|
310
|
+
let stderr = "";
|
|
311
|
+
let settled = false;
|
|
312
|
+
const timer = setTimeout(() => {
|
|
313
|
+
if (settled)
|
|
314
|
+
return;
|
|
315
|
+
settled = true;
|
|
316
|
+
try {
|
|
317
|
+
proc.kill("SIGTERM");
|
|
318
|
+
}
|
|
319
|
+
catch { /* ignore */ }
|
|
320
|
+
reject(new Error(`Command timed out after ${COMMAND_TIMEOUT_MS}ms: ${trimmed.slice(0, 200)}`));
|
|
321
|
+
}, COMMAND_TIMEOUT_MS);
|
|
322
|
+
proc.stdout.on("data", (chunk) => {
|
|
323
|
+
if (truncated)
|
|
324
|
+
return;
|
|
325
|
+
const remaining = COMMAND_OUTPUT_MAX_BYTES - bytes;
|
|
326
|
+
if (chunk.length <= remaining) {
|
|
327
|
+
chunks.push(chunk);
|
|
328
|
+
bytes += chunk.length;
|
|
329
|
+
return;
|
|
330
|
+
}
|
|
331
|
+
// Take what we can, then signal SIGTERM. Anything after the cap
|
|
332
|
+
// is silently dropped — we keep a marker so the caller can tell.
|
|
333
|
+
if (remaining > 0) {
|
|
334
|
+
chunks.push(chunk.subarray(0, remaining));
|
|
335
|
+
bytes += remaining;
|
|
336
|
+
}
|
|
337
|
+
truncated = true;
|
|
338
|
+
try {
|
|
339
|
+
proc.kill("SIGTERM");
|
|
340
|
+
}
|
|
341
|
+
catch { /* ignore */ }
|
|
342
|
+
});
|
|
343
|
+
proc.stderr.setEncoding("utf8");
|
|
344
|
+
proc.stderr.on("data", (chunk) => { stderr += chunk; });
|
|
345
|
+
proc.on("error", (err) => {
|
|
346
|
+
if (settled)
|
|
347
|
+
return;
|
|
348
|
+
settled = true;
|
|
349
|
+
clearTimeout(timer);
|
|
350
|
+
reject(err);
|
|
351
|
+
});
|
|
352
|
+
proc.on("close", (code) => {
|
|
353
|
+
if (settled)
|
|
354
|
+
return;
|
|
355
|
+
settled = true;
|
|
356
|
+
clearTimeout(timer);
|
|
357
|
+
if (truncated) {
|
|
358
|
+
// Successful path: caller gets the truncated output plus a
|
|
359
|
+
// marker. We don't reject because partial data is still useful.
|
|
360
|
+
const out = Buffer.concat(chunks).toString("utf8") +
|
|
361
|
+
`\n[mpg: command output truncated at ${COMMAND_OUTPUT_MAX_BYTES} bytes]\n`;
|
|
362
|
+
resolve(out);
|
|
363
|
+
return;
|
|
364
|
+
}
|
|
365
|
+
if (code !== 0 && code !== null) {
|
|
366
|
+
reject(new Error(`Command exited with code ${code}: ${trimmed.slice(0, 200)}` +
|
|
367
|
+
(stderr ? `\nstderr: ${stderr.slice(0, 500)}` : "")));
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
resolve(Buffer.concat(chunks).toString("utf8"));
|
|
371
|
+
});
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
/** Deprecated: use getStdin() instead to avoid double-reads. */
|
|
375
|
+
export async function captureStdin() {
|
|
376
|
+
return getStdin();
|
|
377
|
+
}
|
|
378
|
+
/** Cap fetched URL body at 16MB — anything larger is a denial-of-context risk. */
|
|
379
|
+
const URL_FETCH_MAX_BYTES = 16 * 1024 * 1024;
|
|
380
|
+
/** Default URL fetch timeout. */
|
|
381
|
+
const URL_FETCH_TIMEOUT_MS = 30_000;
|
|
382
|
+
export async function captureUrl(url) {
|
|
383
|
+
const controller = new AbortController();
|
|
384
|
+
const timer = setTimeout(() => controller.abort(), URL_FETCH_TIMEOUT_MS);
|
|
385
|
+
let res;
|
|
386
|
+
try {
|
|
387
|
+
res = await fetch(url, {
|
|
388
|
+
redirect: "follow",
|
|
389
|
+
headers: { "user-agent": "mpg/0.3 (+https://github.com/JadeZaher/mpg)" },
|
|
390
|
+
signal: controller.signal,
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
catch (err) {
|
|
394
|
+
clearTimeout(timer);
|
|
395
|
+
if (err.name === "AbortError") {
|
|
396
|
+
throw new Error(`Fetch of ${url} timed out after ${URL_FETCH_TIMEOUT_MS}ms`);
|
|
397
|
+
}
|
|
398
|
+
throw err;
|
|
399
|
+
}
|
|
400
|
+
try {
|
|
401
|
+
if (!res.ok) {
|
|
402
|
+
throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
|
|
403
|
+
}
|
|
404
|
+
// Cheap MIME guard — we are searching text. Reject obvious binary
|
|
405
|
+
// types before we read the body so an LLM can't OOM us by passing
|
|
406
|
+
// a video URL.
|
|
407
|
+
const ct = (res.headers.get("content-type") ?? "").toLowerCase();
|
|
408
|
+
if (ct && !ct.startsWith("text/") && !/json|xml|yaml|javascript|csv|html|markdown/.test(ct)) {
|
|
409
|
+
throw new Error(`Refusing to fetch non-text content-type "${ct}" from ${url}. ` +
|
|
410
|
+
`Use a different tool to search binary payloads.`);
|
|
411
|
+
}
|
|
412
|
+
// Content-length pre-check (cheap if the server set it).
|
|
413
|
+
const clHeader = res.headers.get("content-length");
|
|
414
|
+
if (clHeader) {
|
|
415
|
+
const cl = parseInt(clHeader, 10);
|
|
416
|
+
if (!Number.isNaN(cl) && cl > URL_FETCH_MAX_BYTES) {
|
|
417
|
+
throw new Error(`Refusing to fetch ${cl} bytes from ${url} (cap: ${URL_FETCH_MAX_BYTES}). ` +
|
|
418
|
+
`Download manually and search the file.`);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
// Stream-with-cap. If content-length lied or wasn't set, we still
|
|
422
|
+
// bail out as soon as we cross the threshold.
|
|
423
|
+
if (!res.body)
|
|
424
|
+
return "";
|
|
425
|
+
const reader = res.body.getReader();
|
|
426
|
+
const decoder = new TextDecoder("utf-8");
|
|
427
|
+
let out = "";
|
|
428
|
+
let bytes = 0;
|
|
429
|
+
while (true) {
|
|
430
|
+
const { value, done } = await reader.read();
|
|
431
|
+
if (done)
|
|
432
|
+
break;
|
|
433
|
+
bytes += value.byteLength;
|
|
434
|
+
if (bytes > URL_FETCH_MAX_BYTES) {
|
|
435
|
+
try {
|
|
436
|
+
await reader.cancel();
|
|
437
|
+
}
|
|
438
|
+
catch { /* ignore */ }
|
|
439
|
+
throw new Error(`Fetched body exceeded ${URL_FETCH_MAX_BYTES} bytes from ${url}. ` +
|
|
440
|
+
`Download manually and search the file.`);
|
|
441
|
+
}
|
|
442
|
+
out += decoder.decode(value, { stream: true });
|
|
443
|
+
}
|
|
444
|
+
out += decoder.decode();
|
|
445
|
+
return out;
|
|
446
|
+
}
|
|
447
|
+
finally {
|
|
448
|
+
clearTimeout(timer);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
export function resolveUrlSource(url) {
|
|
452
|
+
return {
|
|
453
|
+
source: { id: url, type: "url" },
|
|
454
|
+
content: null, // filled by captureUrl
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
//# sourceMappingURL=sources.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sources.js","sourceRoot":"","sources":["../src/sources.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAC/C,OAAO,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,WAAW,CAAC;AAInD,sEAAsE;AACtE,IAAI,YAAY,GAAkB,IAAI,CAAC;AAEvC,8EAA8E;AAC9E,MAAM,CAAC,KAAK,UAAU,QAAQ;IAC5B,IAAI,YAAY,KAAK,IAAI;QAAE,OAAO,YAAY,CAAC;IAC/C,IAAI,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QACxB,YAAY,GAAG,EAAE,CAAC;QAClB,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,OAAO,CAAC,KAAiB,EAAE,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IACtE,CAAC;IACD,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IACtD,OAAO,YAAY,CAAC;AACtB,CAAC;AAED,sDAAsD;AACtD,MAAM,UAAU,eAAe;IAC7B,YAAY,GAAG,IAAI,CAAC;AACtB,CAAC;AAQD,6EAA6E;AAC7E,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAkB;IAClD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACrC,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;IAClD,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;IAC7C,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAE9B,KAAK,UAAU,IAAI,CAAC,CAAS;QAC3B,4DAA4D;QAC5D,0BAA0B;QAC1B,oCAAoC;QACpC,IAAI,CAAC,GAAmC,IAAI,CAAC;QAC7C,IAAI,CAAC;YAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC,CAAC,iBAAiB,CAAC,CAAC;QAEpD,kEAAkE;QAClE,mEAAmE;QACnE,iEAAiE;QACjE,MAAM,aAAa,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAE3D,IAAI,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;YAC1C,MAAM,cAAc,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,KAAK,CAAC;YACtD,IAAI,CAAC;gBACH,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC;oBAC/C,IAAI,CAAC;wBACH,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;wBAC3B,IAAI,EAAE,CAAC,MAAM,EAAE;4BAAE,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;oBAClC,CAAC;oBAAC,MAAM,CAAC,CAAC,UAAU,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC;QAC1B,CAAC;aAAM,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC;YAC3B,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACb,CAAC;aAAM,CAAC;YACN,yCAAyC;YACzC,IAAI,CAAC;gBACH,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oBACjD,IAAI,CAAC;wBACH,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;wBAC3B,IAAI,EAAE,CAAC,MAAM,EAAE;4BAAE,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;oBAClC,CAAC;oBAAC,MAAM,CAAC,CAAC,UAAU,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,IAAI,CAAC,OAAO,CAAC,CAAC;IACtB,CAAC;IACD,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC;AAClB,CAAC;AAED,8DAA8D;AAC9D,MAAM,UAAU,YAAY,CAAC,CAAS;IACpC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;QAAE,OAAO,MAAM,CAAC;IAClC,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;IACtB,IAAI,CAAC,CAAC,WAAW,EAAE;QAAE,OAAO,MAAM,CAAC;IACnC,IAAI,CAAC,CAAC,MAAM,EAAE;QAAE,OAAO,MAAM,CAAC;IAC9B,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,KAAe,EAAE,YAA4B;IAClF,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YAClB,yBAAyB;YACzB,MAAM,IAAI,GAAG,YAAY,IAAI,MAAM,QAAQ,EAAE,CAAC;YAC9C,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;gBACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC5B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAClD,MAAM,WAAW,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAClC,CAAC;YACD,SAAS;QACX,CAAC;QACD,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACzB,0BAA0B;YAC1B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC/B,IAAI,IAAY,CAAC;YACjB,IAAI,CAAC;gBACH,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;gBACjD,IAAI,GAAG,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YACxC,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,IAAI,KAAK,CAAC,+BAA+B,QAAQ,KAAM,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;YACxF,CAAC;YACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;gBACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC5B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAClD,MAAM,WAAW,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAClC,CAAC;YACD,SAAS;QACX,CAAC;QACD,MAAM,WAAW,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAC/B,CAAC;IACD,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC;AAClB,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAAe,EACf,YAA4B;IAE5B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAE/B,SAAS,WAAW,CAAC,CAAS;QAC5B,+DAA+D;QAC/D,mEAAmE;QACnE,gCAAgC;QAChC,OAAO,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,KAAK,UAAU,QAAQ,CAAC,IAAY;QAClC,IAAI,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACrB,MAAM,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;YACzB,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC;gBACf,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC;gBAC7B,OAAO;YACT,CAAC;YACD,IAAI,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;gBACpB,yDAAyD;gBACzD,6DAA6D;gBAC7D,kBAAkB;gBAClB,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC;gBAC5B,OAAO;YACT,CAAC;QACH,CAAC;QACD,IAAI,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;YACtB,4DAA4D;YAC5D,6DAA6D;YAC7D,0DAA0D;YAC1D,mCAAmC;YACnC,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;YAC3C,KAAK,MAAM,CAAC,IAAI,QAAQ;gBAAE,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;YACpD,OAAO;QACT,CAAC;QACD,kEAAkE;QAClE,4DAA4D;QAC5D,0BAA0B;QAC1B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACjB,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YAClB,MAAM,IAAI,GAAG,YAAY,IAAI,MAAM,QAAQ,EAAE,CAAC;YAC9C,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;gBACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC5B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAClD,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC1B,CAAC;YACD,SAAS;QACX,CAAC;QACD,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC/B,IAAI,IAAY,CAAC;YACjB,IAAI,CAAC;gBACH,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;gBACjD,IAAI,GAAG,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YACxC,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,IAAI,KAAK,CAAC,+BAA+B,QAAQ,KAAM,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;YACxF,CAAC;YACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;gBACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC5B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAClD,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC1B,CAAC;YACD,SAAS;QACX,CAAC;QACD,MAAM,QAAQ,CAAC,IAAI,CAAC,CAAC;IACvB,CAAC;IACD,OAAO,EAAE,KAAK,EAAE,CAAC,GAAG,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC;AAChD,CAAC;AAED,iEAAiE;AACjE,KAAK,UAAU,WAAW,CAAC,IAAY,EAAE,GAAgB;IACvD,MAAM,CAAC,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAC7B,IAAI,CAAC,KAAK,MAAM,EAAE,CAAC;QACjB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACd,OAAO;IACT,CAAC;IACD,8BAA8B;IAC9B,MAAM,KAAK,GAAG,MAAM,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACxC,KAAK,MAAM,CAAC,IAAI,KAAK;QAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;AACpC,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,CAAS;IACzC,MAAM,GAAG,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;IAC3B,OAAO;QACL,MAAM,EAAE,EAAE,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE;QACjC,OAAO,EAAE,IAAI,EAAE,0DAA0D;KAC1E,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,OAAe;IACrD,MAAM,KAAK,GAAG,MAAM,WAAW,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IAC3C,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACvB,MAAM,EAAE,EAAE,EAAE,EAAE,WAAW,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE;QAC5C,OAAO,EAAE,IAAI;KACd,CAAC,CAAC,CAAC;AACN,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,GAAW;IAC9C,OAAO;QACL,MAAM,EAAE,EAAE,EAAE,EAAE,OAAO,GAAG,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,KAAK,GAAG,EAAE,EAAE;QAChE,OAAO,EAAE,IAAI,EAAE,mCAAmC;KACnD,CAAC;AACJ,CAAC;AAED,gFAAgF;AAChF,MAAM,wBAAwB,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAClD,kEAAkE;AAClE,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAElC;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,GAAW;IAC9C,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;IAC3B,IAAI,CAAC,OAAO;QAAE,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;IAE/C,MAAM,KAAK,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC;IAChE,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAEnF,OAAO,MAAM,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACnD,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;QAC5E,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,SAAS,GAAG,KAAK,CAAC;QACtB,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,IAAI,OAAO,GAAG,KAAK,CAAC;QAEpB,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;YAC5B,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YACf,IAAI,CAAC;gBAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC;YACpD,MAAM,CAAC,IAAI,KAAK,CACd,2BAA2B,kBAAkB,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAC5E,CAAC,CAAC;QACL,CAAC,EAAE,kBAAkB,CAAC,CAAC;QAEvB,IAAI,CAAC,MAAO,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YACxC,IAAI,SAAS;gBAAE,OAAO;YACtB,MAAM,SAAS,GAAG,wBAAwB,GAAG,KAAK,CAAC;YACnD,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;gBAC9B,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACnB,KAAK,IAAI,KAAK,CAAC,MAAM,CAAC;gBACtB,OAAO;YACT,CAAC;YACD,gEAAgE;YAChE,iEAAiE;YACjE,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;gBAClB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC;gBAC1C,KAAK,IAAI,SAAS,CAAC;YACrB,CAAC;YACD,SAAS,GAAG,IAAI,CAAC;YACjB,IAAI,CAAC;gBAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC;QACtD,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,MAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QACjC,IAAI,CAAC,MAAO,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE,GAAG,MAAM,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAEjE,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACvB,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YACf,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,MAAM,CAAC,GAAG,CAAC,CAAC;QACd,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YACxB,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YACf,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,IAAI,SAAS,EAAE,CAAC;gBACd,2DAA2D;gBAC3D,gEAAgE;gBAChE,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC;oBAChD,uCAAuC,wBAAwB,WAAW,CAAC;gBAC7E,OAAO,CAAC,GAAG,CAAC,CAAC;gBACb,OAAO;YACT,CAAC;YACD,IAAI,IAAI,KAAK,CAAC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;gBAChC,MAAM,CAAC,IAAI,KAAK,CACd,4BAA4B,IAAI,KAAK,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;oBAC5D,CAAC,MAAM,CAAC,CAAC,CAAC,aAAa,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CACpD,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YACD,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,gEAAgE;AAChE,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,OAAO,QAAQ,EAAE,CAAC;AACpB,CAAC;AAED,kFAAkF;AAClF,MAAM,mBAAmB,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAC7C,iCAAiC;AACjC,MAAM,oBAAoB,GAAG,MAAM,CAAC;AAEpC,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW;IAC1C,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,oBAAoB,CAAC,CAAC;IACzE,IAAI,GAAa,CAAC;IAClB,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YACrB,QAAQ,EAAE,QAAQ;YAClB,OAAO,EAAE,EAAE,YAAY,EAAE,6CAA6C,EAAE;YACxE,MAAM,EAAE,UAAU,CAAC,MAAM;SAC1B,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,YAAY,CAAC,KAAK,CAAC,CAAC;QACpB,IAAK,GAAa,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,YAAY,GAAG,oBAAoB,oBAAoB,IAAI,CAAC,CAAC;QAC/E,CAAC;QACD,MAAM,GAAG,CAAC;IACZ,CAAC;IACD,IAAI,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,KAAK,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QAC7E,CAAC;QACD,kEAAkE;QAClE,kEAAkE;QAClE,eAAe;QACf,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;QACjE,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,4CAA4C,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;YAC5F,MAAM,IAAI,KAAK,CACb,4CAA4C,EAAE,UAAU,GAAG,IAAI;gBAC/D,iDAAiD,CAClD,CAAC;QACJ,CAAC;QACD,yDAAyD;QACzD,MAAM,QAAQ,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;QACnD,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YAClC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,EAAE,GAAG,mBAAmB,EAAE,CAAC;gBAClD,MAAM,IAAI,KAAK,CACb,qBAAqB,EAAE,eAAe,GAAG,UAAU,mBAAmB,KAAK;oBAC3E,wCAAwC,CACzC,CAAC;YACJ,CAAC;QACH,CAAC;QACD,kEAAkE;QAClE,8CAA8C;QAC9C,IAAI,CAAC,GAAG,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC;QACzC,IAAI,GAAG,GAAG,EAAE,CAAC;QACb,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;YAC5C,IAAI,IAAI;gBAAE,MAAM;YAChB,KAAK,IAAI,KAAK,CAAC,UAAU,CAAC;YAC1B,IAAI,KAAK,GAAG,mBAAmB,EAAE,CAAC;gBAChC,IAAI,CAAC;oBAAC,MAAM,MAAM,CAAC,MAAM,EAAE,CAAC;gBAAC,CAAC;gBAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC;gBACrD,MAAM,IAAI,KAAK,CACb,yBAAyB,mBAAmB,eAAe,GAAG,IAAI;oBAClE,wCAAwC,CACzC,CAAC;YACJ,CAAC;YACD,GAAG,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;QACjD,CAAC;QACD,GAAG,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACxB,OAAO,GAAG,CAAC;IACb,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAW;IAC1C,OAAO;QACL,MAAM,EAAE,EAAE,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE;QAChC,OAAO,EAAE,IAAI,EAAE,uBAAuB;KACvC,CAAC;AACJ,CAAC"}
|
package/dist/tokens.d.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token estimation.
|
|
3
|
+
*
|
|
4
|
+
* We use a simple chars/4 heuristic as the default. It's an approximation:
|
|
5
|
+
* - English prose averages ~4 chars/token
|
|
6
|
+
* - Code averages ~3.5 chars/token (more symbols)
|
|
7
|
+
* - JSON averages ~3 chars/token
|
|
8
|
+
*
|
|
9
|
+
* This is intentionally dependency-free. For higher fidelity, callers
|
|
10
|
+
* can plug in tiktoken or gpt-tokenizer at the integration boundary.
|
|
11
|
+
*
|
|
12
|
+
* The heuristic is good enough to make *budgeting* decisions (sizing
|
|
13
|
+
* context windows, capping output) which is what mpg is for. It's not
|
|
14
|
+
* a substitute for a real tokenizer when billing accuracy matters.
|
|
15
|
+
*/
|
|
16
|
+
export interface TokenModel {
|
|
17
|
+
/** Estimate tokens for a single string. */
|
|
18
|
+
estimate(text: string): number;
|
|
19
|
+
/** Estimate tokens across an array of strings (slightly faster, no array alloc). */
|
|
20
|
+
estimateMany(texts: string[]): number;
|
|
21
|
+
}
|
|
22
|
+
export declare const defaultTokens: TokenModel;
|
|
23
|
+
/**
|
|
24
|
+
* Trim a list of lines to fit within a token budget, preferring the
|
|
25
|
+
* lines closest to a target index.
|
|
26
|
+
*
|
|
27
|
+
* This is the workhorse used to build the pre/post context windows of
|
|
28
|
+
* a node. It walks outward from the match line, accumulating lines
|
|
29
|
+
* until the budget is exhausted, then returns the kept lines in
|
|
30
|
+
* original order.
|
|
31
|
+
*/
|
|
32
|
+
export declare function trimLinesToBudget(lines: string[], targetIndex: number, budget: number, model?: TokenModel): {
|
|
33
|
+
kept: string[];
|
|
34
|
+
spent: number;
|
|
35
|
+
};
|
package/dist/tokens.js
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token estimation.
|
|
3
|
+
*
|
|
4
|
+
* We use a simple chars/4 heuristic as the default. It's an approximation:
|
|
5
|
+
* - English prose averages ~4 chars/token
|
|
6
|
+
* - Code averages ~3.5 chars/token (more symbols)
|
|
7
|
+
* - JSON averages ~3 chars/token
|
|
8
|
+
*
|
|
9
|
+
* This is intentionally dependency-free. For higher fidelity, callers
|
|
10
|
+
* can plug in tiktoken or gpt-tokenizer at the integration boundary.
|
|
11
|
+
*
|
|
12
|
+
* The heuristic is good enough to make *budgeting* decisions (sizing
|
|
13
|
+
* context windows, capping output) which is what mpg is for. It's not
|
|
14
|
+
* a substitute for a real tokenizer when billing accuracy matters.
|
|
15
|
+
*/
|
|
16
|
+
const DEFAULT_CHARS_PER_TOKEN = 4;
|
|
17
|
+
class HeuristicTokenModel {
|
|
18
|
+
charsPerToken;
|
|
19
|
+
constructor(charsPerToken = DEFAULT_CHARS_PER_TOKEN) {
|
|
20
|
+
this.charsPerToken = charsPerToken;
|
|
21
|
+
}
|
|
22
|
+
estimate(text) {
|
|
23
|
+
if (!text)
|
|
24
|
+
return 0;
|
|
25
|
+
// Round up so empty/single-char strings get 1 token, not 0.
|
|
26
|
+
return Math.max(1, Math.ceil(text.length / this.charsPerToken));
|
|
27
|
+
}
|
|
28
|
+
estimateMany(texts) {
|
|
29
|
+
let total = 0;
|
|
30
|
+
for (const t of texts)
|
|
31
|
+
total += this.estimate(t);
|
|
32
|
+
return total;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
export const defaultTokens = new HeuristicTokenModel();
|
|
36
|
+
/**
|
|
37
|
+
* Trim a list of lines to fit within a token budget, preferring the
|
|
38
|
+
* lines closest to a target index.
|
|
39
|
+
*
|
|
40
|
+
* This is the workhorse used to build the pre/post context windows of
|
|
41
|
+
* a node. It walks outward from the match line, accumulating lines
|
|
42
|
+
* until the budget is exhausted, then returns the kept lines in
|
|
43
|
+
* original order.
|
|
44
|
+
*/
|
|
45
|
+
export function trimLinesToBudget(lines, targetIndex, budget, model = defaultTokens) {
|
|
46
|
+
if (budget <= 0 || lines.length === 0) {
|
|
47
|
+
return { kept: [], spent: 0 };
|
|
48
|
+
}
|
|
49
|
+
// Greedy outward expansion: keep a window of lines around targetIndex
|
|
50
|
+
// that fits in `budget` tokens.
|
|
51
|
+
const kept = new Array(lines.length).fill(null);
|
|
52
|
+
kept[targetIndex] = lines[targetIndex];
|
|
53
|
+
let spent = model.estimate(lines[targetIndex]);
|
|
54
|
+
let lo = targetIndex - 1;
|
|
55
|
+
let hi = targetIndex + 1;
|
|
56
|
+
// Alternate between expanding above and below, preferring whichever
|
|
57
|
+
// side has more remaining lines. This produces a balanced window.
|
|
58
|
+
while (lo >= 0 || hi < lines.length) {
|
|
59
|
+
const canLo = lo >= 0;
|
|
60
|
+
const canHi = hi < lines.length;
|
|
61
|
+
if (!canLo && !canHi)
|
|
62
|
+
break;
|
|
63
|
+
// Prefer the side with more remaining lines (balanced growth).
|
|
64
|
+
const takeLo = canLo && (!canHi || (targetIndex - lo) <= (hi - targetIndex));
|
|
65
|
+
if (takeLo) {
|
|
66
|
+
const cost = model.estimate(lines[lo]);
|
|
67
|
+
if (spent + cost > budget) {
|
|
68
|
+
lo = -1; // stop expanding up
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
kept[lo] = lines[lo];
|
|
72
|
+
spent += cost;
|
|
73
|
+
lo--;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
const cost = model.estimate(lines[hi]);
|
|
78
|
+
if (spent + cost > budget) {
|
|
79
|
+
hi = lines.length; // stop expanding down
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
kept[hi] = lines[hi];
|
|
83
|
+
spent += cost;
|
|
84
|
+
hi++;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const result = [];
|
|
89
|
+
for (let i = 0; i < kept.length; i++) {
|
|
90
|
+
if (kept[i] !== null)
|
|
91
|
+
result.push(kept[i]);
|
|
92
|
+
}
|
|
93
|
+
return { kept: result, spent };
|
|
94
|
+
}
|
|
95
|
+
//# sourceMappingURL=tokens.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokens.js","sourceRoot":"","sources":["../src/tokens.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,MAAM,uBAAuB,GAAG,CAAC,CAAC;AASlC,MAAM,mBAAmB;IACH;IAApB,YAAoB,gBAAwB,uBAAuB;QAA/C,kBAAa,GAAb,aAAa,CAAkC;IAAG,CAAC;IAEvE,QAAQ,CAAC,IAAY;QACnB,IAAI,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC;QACpB,4DAA4D;QAC5D,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC;IAClE,CAAC;IAED,YAAY,CAAC,KAAe;QAC1B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,CAAC,IAAI,KAAK;YAAE,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;QACjD,OAAO,KAAK,CAAC;IACf,CAAC;CACF;AAED,MAAM,CAAC,MAAM,aAAa,GAAe,IAAI,mBAAmB,EAAE,CAAC;AAEnE;;;;;;;;GAQG;AACH,MAAM,UAAU,iBAAiB,CAC/B,KAAe,EACf,WAAmB,EACnB,MAAc,EACd,QAAoB,aAAa;IAEjC,IAAI,MAAM,IAAI,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAChC,CAAC;IAED,sEAAsE;IACtE,gCAAgC;IAChC,MAAM,IAAI,GAAG,IAAI,KAAK,CAAgB,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/D,IAAI,CAAC,WAAW,CAAC,GAAG,KAAK,CAAC,WAAW,CAAC,CAAC;IACvC,IAAI,KAAK,GAAG,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;IAE/C,IAAI,EAAE,GAAG,WAAW,GAAG,CAAC,CAAC;IACzB,IAAI,EAAE,GAAG,WAAW,GAAG,CAAC,CAAC;IAEzB,oEAAoE;IACpE,kEAAkE;IAClE,OAAO,EAAE,IAAI,CAAC,IAAI,EAAE,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QACpC,MAAM,KAAK,GAAG,EAAE,IAAI,CAAC,CAAC;QACtB,MAAM,KAAK,GAAG,EAAE,GAAG,KAAK,CAAC,MAAM,CAAC;QAEhC,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK;YAAE,MAAM;QAE5B,+DAA+D;QAC/D,MAAM,MAAM,GACV,KAAK,IAAI,CAAC,CAAC,KAAK,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC,IAAI,CAAC,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC;QAEhE,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;YACvC,IAAI,KAAK,GAAG,IAAI,GAAG,MAAM,EAAE,CAAC;gBAC1B,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,oBAAoB;YAC/B,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC;gBACrB,KAAK,IAAI,IAAI,CAAC;gBACd,EAAE,EAAE,CAAC;YACP,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;YACvC,IAAI,KAAK,GAAG,IAAI,GAAG,MAAM,EAAE,CAAC;gBAC1B,EAAE,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,sBAAsB;YAC3C,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC;gBACrB,KAAK,IAAI,IAAI,CAAC;gBACd,EAAE,EAAE,CAAC;YACP,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI;YAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAW,CAAC,CAAC;IACvD,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;AACjC,CAAC"}
|