nodebench-mcp 2.6.0 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/NODEBENCH_AGENTS.md +1 -1
- package/README.md +21 -12
- package/dist/__tests__/audit-registry.d.ts +1 -0
- package/dist/__tests__/audit-registry.js +60 -0
- package/dist/__tests__/audit-registry.js.map +1 -0
- package/dist/__tests__/gaiaCapabilityEval.test.js +59 -1
- package/dist/__tests__/gaiaCapabilityEval.test.js.map +1 -1
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js +388 -9
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +1 -1
- package/dist/__tests__/tools.test.js +551 -4
- package/dist/__tests__/tools.test.js.map +1 -1
- package/dist/index.js +28 -6
- package/dist/index.js.map +1 -1
- package/dist/tools/boilerplateTools.d.ts +11 -0
- package/dist/tools/boilerplateTools.js +500 -0
- package/dist/tools/boilerplateTools.js.map +1 -0
- package/dist/tools/cCompilerBenchmarkTools.d.ts +14 -0
- package/dist/tools/cCompilerBenchmarkTools.js +453 -0
- package/dist/tools/cCompilerBenchmarkTools.js.map +1 -0
- package/dist/tools/figmaFlowTools.d.ts +13 -0
- package/dist/tools/figmaFlowTools.js +183 -0
- package/dist/tools/figmaFlowTools.js.map +1 -0
- package/dist/tools/flickerDetectionTools.d.ts +14 -0
- package/dist/tools/flickerDetectionTools.js +231 -0
- package/dist/tools/flickerDetectionTools.js.map +1 -0
- package/dist/tools/localFileTools.d.ts +1 -0
- package/dist/tools/localFileTools.js +1926 -27
- package/dist/tools/localFileTools.js.map +1 -1
- package/dist/tools/metaTools.js +17 -0
- package/dist/tools/metaTools.js.map +1 -1
- package/dist/tools/progressiveDiscoveryTools.d.ts +14 -0
- package/dist/tools/progressiveDiscoveryTools.js +239 -0
- package/dist/tools/progressiveDiscoveryTools.js.map +1 -0
- package/dist/tools/toolRegistry.d.ts +88 -0
- package/dist/tools/toolRegistry.js +1926 -0
- package/dist/tools/toolRegistry.js.map +1 -0
- package/package.json +3 -2
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
* - xlsx: XLSX parsing
|
|
11
11
|
* - papaparse: CSV parsing
|
|
12
12
|
* - pdf-parse: PDF text extraction (page-aware)
|
|
13
|
+
* - yauzl: ZIP/DOCX/PPTX parsing
|
|
13
14
|
*/
|
|
14
15
|
import { readFile } from "node:fs/promises";
|
|
15
16
|
import { existsSync } from "node:fs";
|
|
@@ -50,6 +51,117 @@ function truncateCell(value, maxChars) {
|
|
|
50
51
|
return s;
|
|
51
52
|
return s.slice(0, maxChars) + "...";
|
|
52
53
|
}
|
|
54
|
+
function toText(value) {
|
|
55
|
+
if (value === null || value === undefined)
|
|
56
|
+
return "";
|
|
57
|
+
if (typeof value === "string")
|
|
58
|
+
return value;
|
|
59
|
+
if (typeof value === "number" || typeof value === "boolean")
|
|
60
|
+
return String(value);
|
|
61
|
+
if (value instanceof Date)
|
|
62
|
+
return value.toISOString();
|
|
63
|
+
return String(value);
|
|
64
|
+
}
|
|
65
|
+
function toNumberOrNull(value) {
|
|
66
|
+
if (value === null || value === undefined)
|
|
67
|
+
return null;
|
|
68
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
69
|
+
return value;
|
|
70
|
+
const s = toText(value).trim();
|
|
71
|
+
if (!s)
|
|
72
|
+
return null;
|
|
73
|
+
// Strip common formatting (currency, thousands separators, %). Keep digits/sign/decimal/exponent.
|
|
74
|
+
const cleaned = s.replace(/[, $£€¥%]/g, "").replace(/[^\d.+\-eE]/g, "");
|
|
75
|
+
const n = Number.parseFloat(cleaned);
|
|
76
|
+
return Number.isFinite(n) ? n : null;
|
|
77
|
+
}
|
|
78
|
+
function resolveColumnIndex(headers, column) {
|
|
79
|
+
if (typeof column === "number" && Number.isFinite(column)) {
|
|
80
|
+
const idx = Math.trunc(column);
|
|
81
|
+
return idx >= 0 && idx < headers.length ? idx : -1;
|
|
82
|
+
}
|
|
83
|
+
const name = String(column ?? "").trim();
|
|
84
|
+
if (!name)
|
|
85
|
+
return -1;
|
|
86
|
+
const needle = name.toLowerCase();
|
|
87
|
+
return headers.findIndex((h) => String(h ?? "").trim().toLowerCase() === needle);
|
|
88
|
+
}
|
|
89
|
+
function compileWhere(headers, whereRaw) {
|
|
90
|
+
if (!Array.isArray(whereRaw))
|
|
91
|
+
return [];
|
|
92
|
+
const out = [];
|
|
93
|
+
for (const raw of whereRaw) {
|
|
94
|
+
if (!raw || typeof raw !== "object")
|
|
95
|
+
continue;
|
|
96
|
+
const obj = raw;
|
|
97
|
+
const column = obj.column;
|
|
98
|
+
const op = String(obj.op ?? "").trim();
|
|
99
|
+
if (!op)
|
|
100
|
+
continue;
|
|
101
|
+
const columnIndex = resolveColumnIndex(headers, column);
|
|
102
|
+
if (columnIndex < 0)
|
|
103
|
+
continue;
|
|
104
|
+
out.push({
|
|
105
|
+
columnIndex,
|
|
106
|
+
op,
|
|
107
|
+
value: obj.value ?? null,
|
|
108
|
+
caseSensitive: obj.caseSensitive === true,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
return out;
|
|
112
|
+
}
|
|
113
|
+
function rowMatchesWhere(row, where) {
|
|
114
|
+
if (where.length === 0)
|
|
115
|
+
return true;
|
|
116
|
+
for (const clause of where) {
|
|
117
|
+
const cell = row[clause.columnIndex];
|
|
118
|
+
if (clause.op === "is_empty") {
|
|
119
|
+
const s = toText(cell).trim();
|
|
120
|
+
if (s.length !== 0)
|
|
121
|
+
return false;
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
if (clause.op === "not_empty") {
|
|
125
|
+
const s = toText(cell).trim();
|
|
126
|
+
if (s.length === 0)
|
|
127
|
+
return false;
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
if (clause.op === "contains") {
|
|
131
|
+
const hay = clause.caseSensitive ? toText(cell) : toText(cell).toLowerCase();
|
|
132
|
+
const needle = clause.caseSensitive ? toText(clause.value) : toText(clause.value).toLowerCase();
|
|
133
|
+
if (!hay.includes(needle))
|
|
134
|
+
return false;
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
if (clause.op === "eq" || clause.op === "ne") {
|
|
138
|
+
const a = clause.caseSensitive ? toText(cell).trim() : toText(cell).trim().toLowerCase();
|
|
139
|
+
const b = clause.caseSensitive
|
|
140
|
+
? toText(clause.value).trim()
|
|
141
|
+
: toText(clause.value).trim().toLowerCase();
|
|
142
|
+
const ok = a === b;
|
|
143
|
+
if (clause.op === "eq" && !ok)
|
|
144
|
+
return false;
|
|
145
|
+
if (clause.op === "ne" && ok)
|
|
146
|
+
return false;
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
// Numeric comparisons.
|
|
150
|
+
const n = toNumberOrNull(cell);
|
|
151
|
+
const v = toNumberOrNull(clause.value);
|
|
152
|
+
if (n === null || v === null)
|
|
153
|
+
return false;
|
|
154
|
+
if (clause.op === "gt" && !(n > v))
|
|
155
|
+
return false;
|
|
156
|
+
if (clause.op === "gte" && !(n >= v))
|
|
157
|
+
return false;
|
|
158
|
+
if (clause.op === "lt" && !(n < v))
|
|
159
|
+
return false;
|
|
160
|
+
if (clause.op === "lte" && !(n <= v))
|
|
161
|
+
return false;
|
|
162
|
+
}
|
|
163
|
+
return true;
|
|
164
|
+
}
|
|
53
165
|
async function getXlsx() {
|
|
54
166
|
try {
|
|
55
167
|
const mod = await import("xlsx");
|
|
@@ -76,6 +188,389 @@ async function getPdfParseModule() {
|
|
|
76
188
|
throw new Error("Missing optional dependency: pdf-parse. Install it (or run npm install in packages/mcp-local) to use PDF parsing.");
|
|
77
189
|
}
|
|
78
190
|
}
|
|
191
|
+
async function getYauzl() {
|
|
192
|
+
try {
|
|
193
|
+
const mod = await import("yauzl");
|
|
194
|
+
return mod.default ?? mod;
|
|
195
|
+
}
|
|
196
|
+
catch {
|
|
197
|
+
throw new Error("Missing optional dependency: yauzl. Install it (or run npm install in packages/mcp-local) to use ZIP/DOCX/PPTX parsing.");
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
function decodeXmlEntities(text) {
|
|
201
|
+
return text
|
|
202
|
+
.replace(/"/g, "\"")
|
|
203
|
+
.replace(/'/g, "'")
|
|
204
|
+
.replace(/</g, "<")
|
|
205
|
+
.replace(/>/g, ">")
|
|
206
|
+
.replace(/&/g, "&")
|
|
207
|
+
.replace(/&#(x?[0-9a-fA-F]+);/g, (_m, raw) => {
|
|
208
|
+
const isHex = String(raw).toLowerCase().startsWith("x");
|
|
209
|
+
const n = Number.parseInt(isHex ? String(raw).slice(1) : String(raw), isHex ? 16 : 10);
|
|
210
|
+
if (!Number.isFinite(n))
|
|
211
|
+
return _m;
|
|
212
|
+
try {
|
|
213
|
+
return String.fromCodePoint(n);
|
|
214
|
+
}
|
|
215
|
+
catch {
|
|
216
|
+
return _m;
|
|
217
|
+
}
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
function safeJoinInsideDir(baseDir, unsafeRelative) {
|
|
221
|
+
const rel = String(unsafeRelative ?? "")
|
|
222
|
+
.replace(/\\/g, "/")
|
|
223
|
+
.replace(/^\.\//, "")
|
|
224
|
+
.replace(/^\/+/, "");
|
|
225
|
+
const normalized = path.posix.normalize(rel);
|
|
226
|
+
if (normalized === "." || normalized === "") {
|
|
227
|
+
throw new Error("innerPath resolved to empty path");
|
|
228
|
+
}
|
|
229
|
+
if (normalized.startsWith("../") || normalized === "..") {
|
|
230
|
+
throw new Error(`Refusing zip-slip path: ${unsafeRelative}`);
|
|
231
|
+
}
|
|
232
|
+
if (/[A-Za-z]:/.test(normalized)) {
|
|
233
|
+
throw new Error(`Refusing drive-qualified path: ${unsafeRelative}`);
|
|
234
|
+
}
|
|
235
|
+
const out = path.resolve(baseDir, normalized.replace(/\//g, path.sep));
|
|
236
|
+
const baseResolved = path.resolve(baseDir);
|
|
237
|
+
if (!out.startsWith(baseResolved + path.sep) && out !== baseResolved) {
|
|
238
|
+
throw new Error("Resolved path escapes outputDir");
|
|
239
|
+
}
|
|
240
|
+
return out;
|
|
241
|
+
}
|
|
242
|
+
async function zipListEntries(zipPath, maxEntries) {
|
|
243
|
+
const yauzl = await getYauzl();
|
|
244
|
+
return await new Promise((resolve, reject) => {
|
|
245
|
+
yauzl.open(zipPath, { lazyEntries: true, autoClose: true }, (err, zipfile) => {
|
|
246
|
+
if (err || !zipfile)
|
|
247
|
+
return reject(err ?? new Error("Failed to open zip"));
|
|
248
|
+
const entries = [];
|
|
249
|
+
let done = false;
|
|
250
|
+
let truncated = false;
|
|
251
|
+
const finish = () => {
|
|
252
|
+
if (done)
|
|
253
|
+
return;
|
|
254
|
+
done = true;
|
|
255
|
+
try {
|
|
256
|
+
zipfile.close();
|
|
257
|
+
}
|
|
258
|
+
catch {
|
|
259
|
+
// ignore
|
|
260
|
+
}
|
|
261
|
+
resolve({ entries, truncated });
|
|
262
|
+
};
|
|
263
|
+
zipfile.on("error", (e) => {
|
|
264
|
+
if (done)
|
|
265
|
+
return;
|
|
266
|
+
done = true;
|
|
267
|
+
reject(e);
|
|
268
|
+
});
|
|
269
|
+
zipfile.on("end", finish);
|
|
270
|
+
zipfile.readEntry();
|
|
271
|
+
zipfile.on("entry", (entry) => {
|
|
272
|
+
if (done)
|
|
273
|
+
return;
|
|
274
|
+
if (entries.length >= maxEntries) {
|
|
275
|
+
truncated = true;
|
|
276
|
+
finish();
|
|
277
|
+
return;
|
|
278
|
+
}
|
|
279
|
+
const name = String(entry.fileName ?? "");
|
|
280
|
+
const isDirectory = name.endsWith("/");
|
|
281
|
+
entries.push({
|
|
282
|
+
fileName: name,
|
|
283
|
+
uncompressedSize: Number(entry.uncompressedSize ?? 0),
|
|
284
|
+
compressedSize: Number(entry.compressedSize ?? 0),
|
|
285
|
+
isDirectory,
|
|
286
|
+
crc32: typeof entry.crc32 === "number" ? entry.crc32 : undefined,
|
|
287
|
+
compressionMethod: typeof entry.compressionMethod === "number" ? entry.compressionMethod : undefined,
|
|
288
|
+
});
|
|
289
|
+
zipfile.readEntry();
|
|
290
|
+
});
|
|
291
|
+
});
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
async function zipReadEntryBuffer(zipPath, innerPath, opts) {
|
|
295
|
+
const yauzl = await getYauzl();
|
|
296
|
+
const target = String(innerPath ?? "").replace(/\\/g, "/").replace(/^\/+/, "");
|
|
297
|
+
if (!target)
|
|
298
|
+
throw new Error("innerPath is required");
|
|
299
|
+
return await new Promise((resolve, reject) => {
|
|
300
|
+
yauzl.open(zipPath, { lazyEntries: true, autoClose: true }, (err, zipfile) => {
|
|
301
|
+
if (err || !zipfile)
|
|
302
|
+
return reject(err ?? new Error("Failed to open zip"));
|
|
303
|
+
let done = false;
|
|
304
|
+
const finishError = (e) => {
|
|
305
|
+
if (done)
|
|
306
|
+
return;
|
|
307
|
+
done = true;
|
|
308
|
+
try {
|
|
309
|
+
zipfile.close();
|
|
310
|
+
}
|
|
311
|
+
catch {
|
|
312
|
+
// ignore
|
|
313
|
+
}
|
|
314
|
+
reject(e);
|
|
315
|
+
};
|
|
316
|
+
zipfile.on("error", finishError);
|
|
317
|
+
const want = opts.caseSensitive ? target : target.toLowerCase();
|
|
318
|
+
zipfile.readEntry();
|
|
319
|
+
zipfile.on("entry", (entry) => {
|
|
320
|
+
if (done)
|
|
321
|
+
return;
|
|
322
|
+
const nameRaw = String(entry.fileName ?? "");
|
|
323
|
+
const name = opts.caseSensitive ? nameRaw : nameRaw.toLowerCase();
|
|
324
|
+
if (name !== want) {
|
|
325
|
+
zipfile.readEntry();
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
if (nameRaw.endsWith("/")) {
|
|
329
|
+
finishError(new Error(`zip entry is a directory: ${nameRaw}`));
|
|
330
|
+
return;
|
|
331
|
+
}
|
|
332
|
+
const uncompressedSize = Number(entry.uncompressedSize ?? 0);
|
|
333
|
+
if (Number.isFinite(uncompressedSize) && uncompressedSize > opts.maxBytes) {
|
|
334
|
+
finishError(new Error(`zip entry too large (${uncompressedSize} bytes) for maxBytes=${opts.maxBytes}: ${nameRaw}`));
|
|
335
|
+
return;
|
|
336
|
+
}
|
|
337
|
+
zipfile.openReadStream(entry, (streamErr, readStream) => {
|
|
338
|
+
if (streamErr || !readStream) {
|
|
339
|
+
finishError(streamErr ?? new Error("Failed to open zip entry stream"));
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
const chunks = [];
|
|
343
|
+
let total = 0;
|
|
344
|
+
readStream.on("data", (chunk) => {
|
|
345
|
+
if (done)
|
|
346
|
+
return;
|
|
347
|
+
total += chunk.length;
|
|
348
|
+
if (total > opts.maxBytes) {
|
|
349
|
+
try {
|
|
350
|
+
readStream.destroy();
|
|
351
|
+
}
|
|
352
|
+
catch {
|
|
353
|
+
// ignore
|
|
354
|
+
}
|
|
355
|
+
finishError(new Error(`zip entry exceeded maxBytes=${opts.maxBytes}: ${nameRaw}`));
|
|
356
|
+
return;
|
|
357
|
+
}
|
|
358
|
+
chunks.push(chunk);
|
|
359
|
+
});
|
|
360
|
+
readStream.on("error", finishError);
|
|
361
|
+
readStream.on("end", () => {
|
|
362
|
+
if (done)
|
|
363
|
+
return;
|
|
364
|
+
done = true;
|
|
365
|
+
const buffer = Buffer.concat(chunks);
|
|
366
|
+
const info = {
|
|
367
|
+
fileName: nameRaw,
|
|
368
|
+
uncompressedSize: Number(entry.uncompressedSize ?? buffer.length),
|
|
369
|
+
compressedSize: Number(entry.compressedSize ?? buffer.length),
|
|
370
|
+
isDirectory: false,
|
|
371
|
+
crc32: typeof entry.crc32 === "number" ? entry.crc32 : undefined,
|
|
372
|
+
compressionMethod: typeof entry.compressionMethod === "number" ? entry.compressionMethod : undefined,
|
|
373
|
+
};
|
|
374
|
+
try {
|
|
375
|
+
zipfile.close();
|
|
376
|
+
}
|
|
377
|
+
catch {
|
|
378
|
+
// ignore
|
|
379
|
+
}
|
|
380
|
+
resolve({ buffer, entry: info });
|
|
381
|
+
});
|
|
382
|
+
});
|
|
383
|
+
});
|
|
384
|
+
zipfile.on("end", () => {
|
|
385
|
+
if (done)
|
|
386
|
+
return;
|
|
387
|
+
finishError(new Error(`zip entry not found: ${target}`));
|
|
388
|
+
});
|
|
389
|
+
});
|
|
390
|
+
});
|
|
391
|
+
}
|
|
392
|
+
function pruneJsonForPreview(value, opts, state, depth = 0) {
|
|
393
|
+
if (value === null || value === undefined)
|
|
394
|
+
return null;
|
|
395
|
+
if (typeof value === "string") {
|
|
396
|
+
if (value.length > opts.maxStringChars) {
|
|
397
|
+
state.truncated = true;
|
|
398
|
+
return value.slice(0, opts.maxStringChars) + "...";
|
|
399
|
+
}
|
|
400
|
+
return value;
|
|
401
|
+
}
|
|
402
|
+
if (typeof value === "number" || typeof value === "boolean")
|
|
403
|
+
return value;
|
|
404
|
+
if (value instanceof Date)
|
|
405
|
+
return value.toISOString();
|
|
406
|
+
if (depth >= opts.maxDepth) {
|
|
407
|
+
state.truncated = true;
|
|
408
|
+
return "[Truncated:maxDepth]";
|
|
409
|
+
}
|
|
410
|
+
if (Array.isArray(value)) {
|
|
411
|
+
const out = [];
|
|
412
|
+
const take = Math.min(value.length, opts.maxItems);
|
|
413
|
+
if (value.length > take)
|
|
414
|
+
state.truncated = true;
|
|
415
|
+
for (let i = 0; i < take; i++) {
|
|
416
|
+
out.push(pruneJsonForPreview(value[i], opts, state, depth + 1));
|
|
417
|
+
}
|
|
418
|
+
return out;
|
|
419
|
+
}
|
|
420
|
+
if (typeof value === "object") {
|
|
421
|
+
const keys = Object.keys(value);
|
|
422
|
+
const take = Math.min(keys.length, opts.maxItems);
|
|
423
|
+
if (keys.length > take)
|
|
424
|
+
state.truncated = true;
|
|
425
|
+
const out = {};
|
|
426
|
+
for (let i = 0; i < take; i++) {
|
|
427
|
+
const k = keys[i];
|
|
428
|
+
out[k] = pruneJsonForPreview(value[k], opts, state, depth + 1);
|
|
429
|
+
}
|
|
430
|
+
return out;
|
|
431
|
+
}
|
|
432
|
+
return String(value);
|
|
433
|
+
}
|
|
434
|
+
function jsonPointerGet(root, pointerRaw) {
|
|
435
|
+
const pointer = String(pointerRaw ?? "").trim();
|
|
436
|
+
if (pointer === "" || pointer === "/")
|
|
437
|
+
return { found: true, value: root };
|
|
438
|
+
if (!pointer.startsWith("/")) {
|
|
439
|
+
throw new Error("pointer must start with '/' or be empty");
|
|
440
|
+
}
|
|
441
|
+
const parts = pointer
|
|
442
|
+
.split("/")
|
|
443
|
+
.slice(1)
|
|
444
|
+
.map((p) => p.replace(/~1/g, "/").replace(/~0/g, "~"));
|
|
445
|
+
let cur = root;
|
|
446
|
+
for (const part of parts) {
|
|
447
|
+
if (cur === null || cur === undefined)
|
|
448
|
+
return { found: false, value: null };
|
|
449
|
+
if (Array.isArray(cur)) {
|
|
450
|
+
const idx = Number.parseInt(part, 10);
|
|
451
|
+
if (!Number.isFinite(idx) || idx < 0 || idx >= cur.length)
|
|
452
|
+
return { found: false, value: null };
|
|
453
|
+
cur = cur[idx];
|
|
454
|
+
continue;
|
|
455
|
+
}
|
|
456
|
+
if (typeof cur === "object") {
|
|
457
|
+
if (!Object.prototype.hasOwnProperty.call(cur, part))
|
|
458
|
+
return { found: false, value: null };
|
|
459
|
+
cur = cur[part];
|
|
460
|
+
continue;
|
|
461
|
+
}
|
|
462
|
+
return { found: false, value: null };
|
|
463
|
+
}
|
|
464
|
+
return { found: true, value: cur };
|
|
465
|
+
}
|
|
466
|
+
function docxXmlToText(xmlRaw) {
|
|
467
|
+
let s = String(xmlRaw ?? "");
|
|
468
|
+
s = s.replace(/<w:tab[^>]*\/>/gi, "\t");
|
|
469
|
+
s = s.replace(/<(w:br|w:cr)[^>]*\/>/gi, "\n");
|
|
470
|
+
s = s.replace(/<\/w:p>/gi, "\n");
|
|
471
|
+
s = s.replace(/<w:t\b[^>]*>/gi, "");
|
|
472
|
+
s = s.replace(/<\/w:t>/gi, "");
|
|
473
|
+
s = s.replace(/<[^>]+>/g, "");
|
|
474
|
+
s = decodeXmlEntities(s);
|
|
475
|
+
s = s.replace(/\r/g, "");
|
|
476
|
+
s = s.replace(/[ \t]+\n/g, "\n");
|
|
477
|
+
s = s.replace(/\n{3,}/g, "\n\n");
|
|
478
|
+
return s.trim();
|
|
479
|
+
}
|
|
480
|
+
function pptxSlideXmlToText(xmlRaw) {
|
|
481
|
+
let s = String(xmlRaw ?? "");
|
|
482
|
+
s = s.replace(/<a:br[^>]*\/>/gi, "\n");
|
|
483
|
+
s = s.replace(/<\/a:p>/gi, "\n");
|
|
484
|
+
s = s.replace(/<a:t\b[^>]*>/gi, "");
|
|
485
|
+
s = s.replace(/<\/a:t>/gi, "");
|
|
486
|
+
s = s.replace(/<[^>]+>/g, "");
|
|
487
|
+
s = decodeXmlEntities(s);
|
|
488
|
+
s = s.replace(/\r/g, "");
|
|
489
|
+
s = s.replace(/[ \t]+\n/g, "\n");
|
|
490
|
+
s = s.replace(/\n{3,}/g, "\n\n");
|
|
491
|
+
return s.trim();
|
|
492
|
+
}
|
|
493
|
+
async function loadCsvTable(args, opts) {
|
|
494
|
+
const encoding = String(args?.encoding ?? "utf8");
|
|
495
|
+
const text = await readFile(opts.filePath, { encoding });
|
|
496
|
+
const hasHeader = args?.hasHeader !== false;
|
|
497
|
+
const delimiter = typeof args?.delimiter === "string" ? args.delimiter : undefined;
|
|
498
|
+
const papa = await getPapaParse();
|
|
499
|
+
let rows = [];
|
|
500
|
+
let parseErrors = [];
|
|
501
|
+
if (papa?.parse) {
|
|
502
|
+
const result = papa.parse(text, {
|
|
503
|
+
...(delimiter ? { delimiter } : {}),
|
|
504
|
+
skipEmptyLines: true,
|
|
505
|
+
dynamicTyping: false,
|
|
506
|
+
});
|
|
507
|
+
rows = Array.isArray(result?.data) ? result.data : [];
|
|
508
|
+
parseErrors = Array.isArray(result?.errors) ? result.errors : [];
|
|
509
|
+
}
|
|
510
|
+
else {
|
|
511
|
+
// Minimal fallback parser: split by newlines and delimiter (no quote handling).
|
|
512
|
+
const lines = String(text).split(/\r?\n/).filter((l) => l.trim().length > 0);
|
|
513
|
+
rows = lines.map((l) => l.split(delimiter ?? ","));
|
|
514
|
+
}
|
|
515
|
+
const normalizedAll = rows
|
|
516
|
+
.filter((r) => Array.isArray(r))
|
|
517
|
+
.map((r) => r.slice(0, opts.maxCols).map((c) => truncateCell(c, opts.maxCellChars)));
|
|
518
|
+
const headerRow = hasHeader ? normalizedAll[0] : undefined;
|
|
519
|
+
const dataRowsAll = hasHeader ? normalizedAll.slice(1) : normalizedAll;
|
|
520
|
+
const dataRows = dataRowsAll.slice(0, opts.maxScanRows);
|
|
521
|
+
const colCount = Math.max(headerRow ? headerRow.length : 0, ...dataRows.map((r) => r.length));
|
|
522
|
+
const headers = headerRow
|
|
523
|
+
? headerRow.map((h) => String(h ?? "").trim())
|
|
524
|
+
: Array.from({ length: colCount }, (_, i) => `col_${i + 1}`);
|
|
525
|
+
return {
|
|
526
|
+
encoding,
|
|
527
|
+
hasHeader,
|
|
528
|
+
delimiter: delimiter ?? null,
|
|
529
|
+
parseErrors,
|
|
530
|
+
headers,
|
|
531
|
+
dataRows,
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
async function loadXlsxTable(args, opts) {
|
|
535
|
+
const XLSX = await getXlsx();
|
|
536
|
+
const wb = XLSX.readFile(opts.filePath, { cellDates: true, dense: true });
|
|
537
|
+
const sheets = Array.isArray(wb?.SheetNames) ? wb.SheetNames : [];
|
|
538
|
+
if (sheets.length === 0)
|
|
539
|
+
throw new Error(`No sheets found in workbook: ${opts.filePath}`);
|
|
540
|
+
const requestedSheet = typeof args?.sheetName === "string" ? args.sheetName.trim() : "";
|
|
541
|
+
const sheetName = requestedSheet || sheets[0];
|
|
542
|
+
const sheet = wb.Sheets?.[sheetName];
|
|
543
|
+
if (!sheet) {
|
|
544
|
+
throw new Error(`Sheet not found: \"${sheetName}\". Available sheets: ${sheets.join(", ")}`);
|
|
545
|
+
}
|
|
546
|
+
const headerRow = clampInt(args?.headerRow, 1, 0, 1000);
|
|
547
|
+
const rangeA1 = typeof args?.rangeA1 === "string" ? args.rangeA1.trim() : "";
|
|
548
|
+
const table = XLSX.utils.sheet_to_json(sheet, {
|
|
549
|
+
header: 1,
|
|
550
|
+
blankrows: false,
|
|
551
|
+
defval: "",
|
|
552
|
+
...(rangeA1 ? { range: rangeA1 } : {}),
|
|
553
|
+
});
|
|
554
|
+
const normalizedAll = table
|
|
555
|
+
.filter((r) => Array.isArray(r))
|
|
556
|
+
.map((r) => r.slice(0, opts.maxCols).map((c) => truncateCell(c, opts.maxCellChars)));
|
|
557
|
+
const headerIdx = headerRow > 0 ? headerRow - 1 : -1;
|
|
558
|
+
const header = headerIdx >= 0 ? normalizedAll[headerIdx] : undefined;
|
|
559
|
+
const dataRowsAll = headerIdx >= 0 ? normalizedAll.slice(headerIdx + 1) : normalizedAll;
|
|
560
|
+
const dataRows = dataRowsAll.slice(0, opts.maxScanRows);
|
|
561
|
+
const colCount = Math.max(header ? header.length : 0, ...dataRows.map((r) => r.length));
|
|
562
|
+
const headers = header
|
|
563
|
+
? header.map((h) => String(h ?? "").trim())
|
|
564
|
+
: Array.from({ length: colCount }, (_, i) => `col_${i + 1}`);
|
|
565
|
+
return {
|
|
566
|
+
sheets,
|
|
567
|
+
sheetName,
|
|
568
|
+
headerRow,
|
|
569
|
+
rangeA1: rangeA1 || null,
|
|
570
|
+
headers,
|
|
571
|
+
dataRows,
|
|
572
|
+
};
|
|
573
|
+
}
|
|
79
574
|
export const localFileTools = [
|
|
80
575
|
{
|
|
81
576
|
name: "read_csv_file",
|
|
@@ -176,6 +671,347 @@ export const localFileTools = [
|
|
|
176
671
|
};
|
|
177
672
|
},
|
|
178
673
|
},
|
|
674
|
+
{
|
|
675
|
+
name: "csv_select_rows",
|
|
676
|
+
description: "Select rows from a local CSV using deterministic filters. Returns bounded results (selected columns + matching rows). No network.",
|
|
677
|
+
inputSchema: {
|
|
678
|
+
type: "object",
|
|
679
|
+
properties: {
|
|
680
|
+
path: {
|
|
681
|
+
type: "string",
|
|
682
|
+
description: "Path to a local .csv file (absolute or relative to current working directory).",
|
|
683
|
+
},
|
|
684
|
+
hasHeader: {
|
|
685
|
+
type: "boolean",
|
|
686
|
+
description: "If true, treats the first row as headers.",
|
|
687
|
+
default: true,
|
|
688
|
+
},
|
|
689
|
+
delimiter: {
|
|
690
|
+
type: "string",
|
|
691
|
+
description: "Optional delimiter override, e.g. ',' or '\\t'. If omitted, parser default is used.",
|
|
692
|
+
},
|
|
693
|
+
encoding: {
|
|
694
|
+
type: "string",
|
|
695
|
+
description: "File encoding (default: utf8).",
|
|
696
|
+
default: "utf8",
|
|
697
|
+
},
|
|
698
|
+
where: {
|
|
699
|
+
type: "array",
|
|
700
|
+
description: "Optional filters combined with AND. Column can be a header name or 0-based index. Ops: eq, ne, contains, gt, gte, lt, lte, is_empty, not_empty.",
|
|
701
|
+
items: {
|
|
702
|
+
type: "object",
|
|
703
|
+
properties: {
|
|
704
|
+
column: { type: ["string", "number"] },
|
|
705
|
+
op: {
|
|
706
|
+
type: "string",
|
|
707
|
+
enum: ["eq", "ne", "contains", "gt", "gte", "lt", "lte", "is_empty", "not_empty"],
|
|
708
|
+
},
|
|
709
|
+
value: { type: ["string", "number", "boolean", "null"] },
|
|
710
|
+
caseSensitive: { type: "boolean" },
|
|
711
|
+
},
|
|
712
|
+
required: ["column", "op"],
|
|
713
|
+
},
|
|
714
|
+
},
|
|
715
|
+
returnColumns: {
|
|
716
|
+
type: "array",
|
|
717
|
+
description: "Optional list of columns to return (header name or 0-based index). If omitted, returns all columns (bounded by maxCols).",
|
|
718
|
+
items: { type: ["string", "number"] },
|
|
719
|
+
},
|
|
720
|
+
offset: {
|
|
721
|
+
type: "number",
|
|
722
|
+
description: "Number of matching rows to skip before returning results.",
|
|
723
|
+
default: 0,
|
|
724
|
+
},
|
|
725
|
+
limit: {
|
|
726
|
+
type: "number",
|
|
727
|
+
description: "Maximum number of matching rows to return.",
|
|
728
|
+
default: 50,
|
|
729
|
+
},
|
|
730
|
+
maxScanRows: {
|
|
731
|
+
type: "number",
|
|
732
|
+
description: "Maximum number of data rows to scan (excluding header).",
|
|
733
|
+
default: 50000,
|
|
734
|
+
},
|
|
735
|
+
maxCols: {
|
|
736
|
+
type: "number",
|
|
737
|
+
description: "Maximum number of columns to scan/return.",
|
|
738
|
+
default: 80,
|
|
739
|
+
},
|
|
740
|
+
maxCellChars: {
|
|
741
|
+
type: "number",
|
|
742
|
+
description: "Maximum characters to return per cell (long cells are truncated).",
|
|
743
|
+
default: 2000,
|
|
744
|
+
},
|
|
745
|
+
},
|
|
746
|
+
required: ["path"],
|
|
747
|
+
},
|
|
748
|
+
handler: async (args) => {
|
|
749
|
+
const filePath = resolveLocalPath(args?.path);
|
|
750
|
+
if (!existsSync(filePath))
|
|
751
|
+
throw new Error(`File not found: ${filePath}`);
|
|
752
|
+
const maxScanRows = clampInt(args?.maxScanRows, 50000, 1, 200000);
|
|
753
|
+
const maxCols = clampInt(args?.maxCols, 80, 1, 500);
|
|
754
|
+
const maxCellChars = clampInt(args?.maxCellChars, 2000, 20, 20000);
|
|
755
|
+
const offset = clampInt(args?.offset, 0, 0, 1_000_000_000);
|
|
756
|
+
const limit = clampInt(args?.limit, 50, 1, 5000);
|
|
757
|
+
const table = await loadCsvTable(args, { filePath, maxScanRows, maxCols, maxCellChars });
|
|
758
|
+
const where = compileWhere(table.headers, args?.where);
|
|
759
|
+
const returnColumnsRaw = Array.isArray(args?.returnColumns)
|
|
760
|
+
? args.returnColumns.slice(0, 200)
|
|
761
|
+
: null;
|
|
762
|
+
const returnIndices = returnColumnsRaw && returnColumnsRaw.length > 0
|
|
763
|
+
? returnColumnsRaw
|
|
764
|
+
.map((c) => resolveColumnIndex(table.headers, c))
|
|
765
|
+
.filter((idx) => idx >= 0)
|
|
766
|
+
: Array.from({ length: Math.min(table.headers.length, maxCols) }, (_, i) => i);
|
|
767
|
+
const selectedHeaders = returnIndices.map((i) => table.headers[i] ?? `col_${i + 1}`);
|
|
768
|
+
const outRows = [];
|
|
769
|
+
let matched = 0;
|
|
770
|
+
for (let i = 0; i < table.dataRows.length; i++) {
|
|
771
|
+
const row = table.dataRows[i];
|
|
772
|
+
if (!rowMatchesWhere(row, where))
|
|
773
|
+
continue;
|
|
774
|
+
if (matched >= offset && outRows.length < limit) {
|
|
775
|
+
outRows.push({
|
|
776
|
+
rowIndex: i + 1,
|
|
777
|
+
row: returnIndices.map((idx) => row[idx]),
|
|
778
|
+
});
|
|
779
|
+
}
|
|
780
|
+
matched++;
|
|
781
|
+
if (outRows.length >= limit && matched >= offset + limit)
|
|
782
|
+
break;
|
|
783
|
+
}
|
|
784
|
+
return {
|
|
785
|
+
path: filePath,
|
|
786
|
+
encoding: table.encoding,
|
|
787
|
+
hasHeader: table.hasHeader,
|
|
788
|
+
delimiter: table.delimiter,
|
|
789
|
+
parseErrors: table.parseErrors.length > 0 ? table.parseErrors.slice(0, 5) : [],
|
|
790
|
+
scannedRows: table.dataRows.length,
|
|
791
|
+
matchedRows: matched,
|
|
792
|
+
returnedRows: outRows.length,
|
|
793
|
+
offset,
|
|
794
|
+
limit,
|
|
795
|
+
headers: selectedHeaders,
|
|
796
|
+
rows: outRows,
|
|
797
|
+
};
|
|
798
|
+
},
|
|
799
|
+
},
|
|
800
|
+
{
|
|
801
|
+
name: "csv_aggregate",
|
|
802
|
+
description: "Aggregate values from a local CSV (count/sum/avg/min/max) with optional filters. Deterministic, no network.",
|
|
803
|
+
inputSchema: {
|
|
804
|
+
type: "object",
|
|
805
|
+
properties: {
|
|
806
|
+
path: {
|
|
807
|
+
type: "string",
|
|
808
|
+
description: "Path to a local .csv file (absolute or relative to current working directory).",
|
|
809
|
+
},
|
|
810
|
+
hasHeader: {
|
|
811
|
+
type: "boolean",
|
|
812
|
+
description: "If true, treats the first row as headers.",
|
|
813
|
+
default: true,
|
|
814
|
+
},
|
|
815
|
+
delimiter: {
|
|
816
|
+
type: "string",
|
|
817
|
+
description: "Optional delimiter override, e.g. ',' or '\\t'. If omitted, parser default is used.",
|
|
818
|
+
},
|
|
819
|
+
encoding: {
|
|
820
|
+
type: "string",
|
|
821
|
+
description: "File encoding (default: utf8).",
|
|
822
|
+
default: "utf8",
|
|
823
|
+
},
|
|
824
|
+
where: {
|
|
825
|
+
type: "array",
|
|
826
|
+
description: "Optional filters combined with AND. Column can be a header name or 0-based index. Ops: eq, ne, contains, gt, gte, lt, lte, is_empty, not_empty.",
|
|
827
|
+
items: {
|
|
828
|
+
type: "object",
|
|
829
|
+
properties: {
|
|
830
|
+
column: { type: ["string", "number"] },
|
|
831
|
+
op: {
|
|
832
|
+
type: "string",
|
|
833
|
+
enum: ["eq", "ne", "contains", "gt", "gte", "lt", "lte", "is_empty", "not_empty"],
|
|
834
|
+
},
|
|
835
|
+
value: { type: ["string", "number", "boolean", "null"] },
|
|
836
|
+
caseSensitive: { type: "boolean" },
|
|
837
|
+
},
|
|
838
|
+
required: ["column", "op"],
|
|
839
|
+
},
|
|
840
|
+
},
|
|
841
|
+
operation: {
|
|
842
|
+
type: "string",
|
|
843
|
+
enum: ["count", "sum", "avg", "min", "max"],
|
|
844
|
+
description: "Aggregation to compute.",
|
|
845
|
+
},
|
|
846
|
+
value: {
|
|
847
|
+
type: "object",
|
|
848
|
+
description: "Value definition. Use {type:'column',column:'ColName'} or {type:'ratio',numeratorColumn:'A',denominatorColumn:'B'}.",
|
|
849
|
+
properties: {
|
|
850
|
+
type: { type: "string", enum: ["column", "ratio"] },
|
|
851
|
+
column: { type: ["string", "number"] },
|
|
852
|
+
numeratorColumn: { type: ["string", "number"] },
|
|
853
|
+
denominatorColumn: { type: ["string", "number"] },
|
|
854
|
+
},
|
|
855
|
+
required: ["type"],
|
|
856
|
+
},
|
|
857
|
+
ignoreNonNumeric: {
|
|
858
|
+
type: "boolean",
|
|
859
|
+
description: "If true, skips rows where the value can't be parsed as a number (default true).",
|
|
860
|
+
default: true,
|
|
861
|
+
},
|
|
862
|
+
returnRow: {
|
|
863
|
+
type: "boolean",
|
|
864
|
+
description: "If true (and operation is min/max), includes the best row.",
|
|
865
|
+
default: true,
|
|
866
|
+
},
|
|
867
|
+
returnColumns: {
|
|
868
|
+
type: "array",
|
|
869
|
+
description: "If returnRow is true, optionally choose which columns to include from the best row (header name or 0-based index).",
|
|
870
|
+
items: { type: ["string", "number"] },
|
|
871
|
+
},
|
|
872
|
+
maxScanRows: {
|
|
873
|
+
type: "number",
|
|
874
|
+
description: "Maximum number of data rows to scan (excluding header).",
|
|
875
|
+
default: 50000,
|
|
876
|
+
},
|
|
877
|
+
maxCols: {
|
|
878
|
+
type: "number",
|
|
879
|
+
description: "Maximum number of columns to scan.",
|
|
880
|
+
default: 200,
|
|
881
|
+
},
|
|
882
|
+
maxCellChars: {
|
|
883
|
+
type: "number",
|
|
884
|
+
description: "Maximum characters to return per cell in bestRow (long cells are truncated).",
|
|
885
|
+
default: 2000,
|
|
886
|
+
},
|
|
887
|
+
},
|
|
888
|
+
required: ["path", "operation"],
|
|
889
|
+
},
|
|
890
|
+
handler: async (args) => {
|
|
891
|
+
const filePath = resolveLocalPath(args?.path);
|
|
892
|
+
if (!existsSync(filePath))
|
|
893
|
+
throw new Error(`File not found: ${filePath}`);
|
|
894
|
+
const operation = String(args?.operation ?? "").trim().toLowerCase();
|
|
895
|
+
if (!["count", "sum", "avg", "min", "max"].includes(operation)) {
|
|
896
|
+
throw new Error(`Unsupported operation: ${operation}`);
|
|
897
|
+
}
|
|
898
|
+
const maxScanRows = clampInt(args?.maxScanRows, 50000, 1, 200000);
|
|
899
|
+
const maxCols = clampInt(args?.maxCols, 200, 1, 500);
|
|
900
|
+
const maxCellChars = clampInt(args?.maxCellChars, 2000, 20, 20000);
|
|
901
|
+
const ignoreNonNumeric = args?.ignoreNonNumeric !== false;
|
|
902
|
+
const returnRow = args?.returnRow !== false;
|
|
903
|
+
const table = await loadCsvTable(args, { filePath, maxScanRows, maxCols, maxCellChars });
|
|
904
|
+
const where = compileWhere(table.headers, args?.where);
|
|
905
|
+
const valueSpec = (args?.value ?? null);
|
|
906
|
+
const getValue = (row) => {
|
|
907
|
+
if (!valueSpec || typeof valueSpec !== "object")
|
|
908
|
+
return null;
|
|
909
|
+
const t = String(valueSpec.type ?? "").trim().toLowerCase();
|
|
910
|
+
if (t === "column") {
|
|
911
|
+
const idx = resolveColumnIndex(table.headers, valueSpec.column);
|
|
912
|
+
if (idx < 0)
|
|
913
|
+
return null;
|
|
914
|
+
return toNumberOrNull(row[idx]);
|
|
915
|
+
}
|
|
916
|
+
if (t === "ratio") {
|
|
917
|
+
const nIdx = resolveColumnIndex(table.headers, valueSpec.numeratorColumn);
|
|
918
|
+
const dIdx = resolveColumnIndex(table.headers, valueSpec.denominatorColumn);
|
|
919
|
+
if (nIdx < 0 || dIdx < 0)
|
|
920
|
+
return null;
|
|
921
|
+
const n = toNumberOrNull(row[nIdx]);
|
|
922
|
+
const d = toNumberOrNull(row[dIdx]);
|
|
923
|
+
if (n === null || d === null || d === 0)
|
|
924
|
+
return null;
|
|
925
|
+
return n / d;
|
|
926
|
+
}
|
|
927
|
+
return null;
|
|
928
|
+
};
|
|
929
|
+
let matchedRows = 0;
|
|
930
|
+
let usedRows = 0;
|
|
931
|
+
let skippedRows = 0;
|
|
932
|
+
let sum = 0;
|
|
933
|
+
let bestVal = null;
|
|
934
|
+
let bestRowIndex = null;
|
|
935
|
+
let bestRow = null;
|
|
936
|
+
for (let i = 0; i < table.dataRows.length; i++) {
|
|
937
|
+
const row = table.dataRows[i];
|
|
938
|
+
if (!rowMatchesWhere(row, where))
|
|
939
|
+
continue;
|
|
940
|
+
matchedRows++;
|
|
941
|
+
if (operation === "count" && !valueSpec)
|
|
942
|
+
continue;
|
|
943
|
+
const value = valueSpec ? getValue(row) : null;
|
|
944
|
+
if (value === null) {
|
|
945
|
+
skippedRows++;
|
|
946
|
+
continue;
|
|
947
|
+
}
|
|
948
|
+
usedRows++;
|
|
949
|
+
sum += value;
|
|
950
|
+
if (operation === "min") {
|
|
951
|
+
if (bestVal === null || value < bestVal) {
|
|
952
|
+
bestVal = value;
|
|
953
|
+
bestRowIndex = i + 1;
|
|
954
|
+
bestRow = row;
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
else if (operation === "max") {
|
|
958
|
+
if (bestVal === null || value > bestVal) {
|
|
959
|
+
bestVal = value;
|
|
960
|
+
bestRowIndex = i + 1;
|
|
961
|
+
bestRow = row;
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
}
|
|
965
|
+
let resultValue = null;
|
|
966
|
+
if (operation === "count") {
|
|
967
|
+
resultValue = valueSpec ? usedRows : matchedRows;
|
|
968
|
+
}
|
|
969
|
+
else if (operation === "sum") {
|
|
970
|
+
resultValue = usedRows > 0 ? sum : null;
|
|
971
|
+
}
|
|
972
|
+
else if (operation === "avg") {
|
|
973
|
+
resultValue = usedRows > 0 ? sum / usedRows : null;
|
|
974
|
+
}
|
|
975
|
+
else if (operation === "min" || operation === "max") {
|
|
976
|
+
resultValue = bestVal;
|
|
977
|
+
}
|
|
978
|
+
let best = null;
|
|
979
|
+
if (returnRow && (operation === "min" || operation === "max") && bestRowIndex !== null && bestRow) {
|
|
980
|
+
const returnColumnsRaw = Array.isArray(args?.returnColumns)
|
|
981
|
+
? args.returnColumns.slice(0, 200)
|
|
982
|
+
: null;
|
|
983
|
+
const returnIndices = returnColumnsRaw && returnColumnsRaw.length > 0
|
|
984
|
+
? returnColumnsRaw
|
|
985
|
+
.map((c) => resolveColumnIndex(table.headers, c))
|
|
986
|
+
.filter((idx) => idx >= 0)
|
|
987
|
+
: Array.from({ length: Math.min(table.headers.length, 50) }, (_, i) => i);
|
|
988
|
+
best = {
|
|
989
|
+
rowIndex: bestRowIndex,
|
|
990
|
+
headers: returnIndices.map((i) => table.headers[i] ?? `col_${i + 1}`),
|
|
991
|
+
row: returnIndices.map((i) => bestRow[i]),
|
|
992
|
+
};
|
|
993
|
+
}
|
|
994
|
+
if (!ignoreNonNumeric && (operation === "sum" || operation === "avg" || operation === "min" || operation === "max") && skippedRows > 0) {
|
|
995
|
+
// Caller can choose to treat non-numeric rows as a hard failure.
|
|
996
|
+
// We keep it non-throwing by default for robustness.
|
|
997
|
+
}
|
|
998
|
+
return {
|
|
999
|
+
path: filePath,
|
|
1000
|
+
encoding: table.encoding,
|
|
1001
|
+
hasHeader: table.hasHeader,
|
|
1002
|
+
delimiter: table.delimiter,
|
|
1003
|
+
parseErrors: table.parseErrors.length > 0 ? table.parseErrors.slice(0, 5) : [],
|
|
1004
|
+
operation,
|
|
1005
|
+
value: valueSpec ?? null,
|
|
1006
|
+
scannedRows: table.dataRows.length,
|
|
1007
|
+
matchedRows,
|
|
1008
|
+
usedRows,
|
|
1009
|
+
skippedRows,
|
|
1010
|
+
result: resultValue,
|
|
1011
|
+
bestRow: best,
|
|
1012
|
+
};
|
|
1013
|
+
},
|
|
1014
|
+
},
|
|
179
1015
|
{
|
|
180
1016
|
name: "read_xlsx_file",
|
|
181
1017
|
description: "Read a local XLSX workbook and return a bounded sheet preview (headers + rows). Deterministic, no network.",
|
|
@@ -276,34 +1112,74 @@ export const localFileTools = [
|
|
|
276
1112
|
},
|
|
277
1113
|
},
|
|
278
1114
|
{
|
|
279
|
-
name: "
|
|
280
|
-
description: "
|
|
1115
|
+
name: "xlsx_select_rows",
|
|
1116
|
+
description: "Select rows from a local XLSX using deterministic filters. Returns bounded results (selected columns + matching rows). No network.",
|
|
281
1117
|
inputSchema: {
|
|
282
1118
|
type: "object",
|
|
283
1119
|
properties: {
|
|
284
1120
|
path: {
|
|
285
1121
|
type: "string",
|
|
286
|
-
description: "Path to a local .
|
|
1122
|
+
description: "Path to a local .xlsx file (absolute or relative to current working directory).",
|
|
287
1123
|
},
|
|
288
|
-
|
|
1124
|
+
sheetName: {
|
|
1125
|
+
type: "string",
|
|
1126
|
+
description: "Sheet to read. If omitted, the first sheet is used.",
|
|
1127
|
+
},
|
|
1128
|
+
headerRow: {
|
|
289
1129
|
type: "number",
|
|
290
|
-
description: "1-based
|
|
1130
|
+
description: "1-based header row index. Use 0 for no header row.",
|
|
291
1131
|
default: 1,
|
|
292
1132
|
},
|
|
293
|
-
|
|
294
|
-
type: "
|
|
295
|
-
description: "
|
|
296
|
-
default: 3,
|
|
1133
|
+
rangeA1: {
|
|
1134
|
+
type: "string",
|
|
1135
|
+
description: "Optional A1 range (e.g. A1:D5000) to limit scanning to a specific region.",
|
|
297
1136
|
},
|
|
298
|
-
|
|
1137
|
+
where: {
|
|
299
1138
|
type: "array",
|
|
300
|
-
description: "Optional
|
|
301
|
-
items: {
|
|
1139
|
+
description: "Optional filters combined with AND. Column can be a header name or 0-based index. Ops: eq, ne, contains, gt, gte, lt, lte, is_empty, not_empty.",
|
|
1140
|
+
items: {
|
|
1141
|
+
type: "object",
|
|
1142
|
+
properties: {
|
|
1143
|
+
column: { type: ["string", "number"] },
|
|
1144
|
+
op: {
|
|
1145
|
+
type: "string",
|
|
1146
|
+
enum: ["eq", "ne", "contains", "gt", "gte", "lt", "lte", "is_empty", "not_empty"],
|
|
1147
|
+
},
|
|
1148
|
+
value: { type: ["string", "number", "boolean", "null"] },
|
|
1149
|
+
caseSensitive: { type: "boolean" },
|
|
1150
|
+
},
|
|
1151
|
+
required: ["column", "op"],
|
|
1152
|
+
},
|
|
302
1153
|
},
|
|
303
|
-
|
|
1154
|
+
returnColumns: {
|
|
1155
|
+
type: "array",
|
|
1156
|
+
description: "Optional list of columns to return (header name or 0-based index). If omitted, returns all columns (bounded by maxCols).",
|
|
1157
|
+
items: { type: ["string", "number"] },
|
|
1158
|
+
},
|
|
1159
|
+
offset: {
|
|
304
1160
|
type: "number",
|
|
305
|
-
description: "
|
|
306
|
-
default:
|
|
1161
|
+
description: "Number of matching rows to skip before returning results.",
|
|
1162
|
+
default: 0,
|
|
1163
|
+
},
|
|
1164
|
+
limit: {
|
|
1165
|
+
type: "number",
|
|
1166
|
+
description: "Maximum number of matching rows to return.",
|
|
1167
|
+
default: 50,
|
|
1168
|
+
},
|
|
1169
|
+
maxScanRows: {
|
|
1170
|
+
type: "number",
|
|
1171
|
+
description: "Maximum number of data rows to scan (excluding header).",
|
|
1172
|
+
default: 50000,
|
|
1173
|
+
},
|
|
1174
|
+
maxCols: {
|
|
1175
|
+
type: "number",
|
|
1176
|
+
description: "Maximum number of columns to scan/return.",
|
|
1177
|
+
default: 80,
|
|
1178
|
+
},
|
|
1179
|
+
maxCellChars: {
|
|
1180
|
+
type: "number",
|
|
1181
|
+
description: "Maximum characters to return per cell (long cells are truncated).",
|
|
1182
|
+
default: 2000,
|
|
307
1183
|
},
|
|
308
1184
|
},
|
|
309
1185
|
required: ["path"],
|
|
@@ -312,19 +1188,318 @@ export const localFileTools = [
|
|
|
312
1188
|
const filePath = resolveLocalPath(args?.path);
|
|
313
1189
|
if (!existsSync(filePath))
|
|
314
1190
|
throw new Error(`File not found: ${filePath}`);
|
|
315
|
-
const
|
|
316
|
-
const
|
|
317
|
-
const
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
1191
|
+
const maxScanRows = clampInt(args?.maxScanRows, 50000, 1, 200000);
|
|
1192
|
+
const maxCols = clampInt(args?.maxCols, 80, 1, 500);
|
|
1193
|
+
const maxCellChars = clampInt(args?.maxCellChars, 2000, 20, 20000);
|
|
1194
|
+
const offset = clampInt(args?.offset, 0, 0, 1_000_000_000);
|
|
1195
|
+
const limit = clampInt(args?.limit, 50, 1, 5000);
|
|
1196
|
+
const table = await loadXlsxTable(args, { filePath, maxScanRows, maxCols, maxCellChars });
|
|
1197
|
+
const where = compileWhere(table.headers, args?.where);
|
|
1198
|
+
const returnColumnsRaw = Array.isArray(args?.returnColumns)
|
|
1199
|
+
? args.returnColumns.slice(0, 200)
|
|
321
1200
|
: null;
|
|
322
|
-
const
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
1201
|
+
const returnIndices = returnColumnsRaw && returnColumnsRaw.length > 0
|
|
1202
|
+
? returnColumnsRaw
|
|
1203
|
+
.map((c) => resolveColumnIndex(table.headers, c))
|
|
1204
|
+
.filter((idx) => idx >= 0)
|
|
1205
|
+
: Array.from({ length: Math.min(table.headers.length, maxCols) }, (_, i) => i);
|
|
1206
|
+
const selectedHeaders = returnIndices.map((i) => table.headers[i] ?? `col_${i + 1}`);
|
|
1207
|
+
const outRows = [];
|
|
1208
|
+
let matched = 0;
|
|
1209
|
+
for (let i = 0; i < table.dataRows.length; i++) {
|
|
1210
|
+
const row = table.dataRows[i];
|
|
1211
|
+
if (!rowMatchesWhere(row, where))
|
|
1212
|
+
continue;
|
|
1213
|
+
if (matched >= offset && outRows.length < limit) {
|
|
1214
|
+
outRows.push({
|
|
1215
|
+
rowIndex: i + 1,
|
|
1216
|
+
row: returnIndices.map((idx) => row[idx]),
|
|
1217
|
+
});
|
|
1218
|
+
}
|
|
1219
|
+
matched++;
|
|
1220
|
+
if (outRows.length >= limit && matched >= offset + limit)
|
|
1221
|
+
break;
|
|
1222
|
+
}
|
|
1223
|
+
return {
|
|
1224
|
+
path: filePath,
|
|
1225
|
+
sheets: table.sheets,
|
|
1226
|
+
sheetName: table.sheetName,
|
|
1227
|
+
headerRow: table.headerRow,
|
|
1228
|
+
rangeA1: table.rangeA1,
|
|
1229
|
+
scannedRows: table.dataRows.length,
|
|
1230
|
+
matchedRows: matched,
|
|
1231
|
+
returnedRows: outRows.length,
|
|
1232
|
+
offset,
|
|
1233
|
+
limit,
|
|
1234
|
+
headers: selectedHeaders,
|
|
1235
|
+
rows: outRows,
|
|
1236
|
+
};
|
|
1237
|
+
},
|
|
1238
|
+
},
|
|
1239
|
+
{
|
|
1240
|
+
name: "xlsx_aggregate",
|
|
1241
|
+
description: "Aggregate values from a local XLSX (count/sum/avg/min/max) with optional filters. Deterministic, no network.",
|
|
1242
|
+
inputSchema: {
|
|
1243
|
+
type: "object",
|
|
1244
|
+
properties: {
|
|
1245
|
+
path: {
|
|
1246
|
+
type: "string",
|
|
1247
|
+
description: "Path to a local .xlsx file (absolute or relative to current working directory).",
|
|
1248
|
+
},
|
|
1249
|
+
sheetName: {
|
|
1250
|
+
type: "string",
|
|
1251
|
+
description: "Sheet to read. If omitted, the first sheet is used.",
|
|
1252
|
+
},
|
|
1253
|
+
headerRow: {
|
|
1254
|
+
type: "number",
|
|
1255
|
+
description: "1-based header row index. Use 0 for no header row.",
|
|
1256
|
+
default: 1,
|
|
1257
|
+
},
|
|
1258
|
+
rangeA1: {
|
|
1259
|
+
type: "string",
|
|
1260
|
+
description: "Optional A1 range (e.g. A1:D5000) to limit scanning to a specific region.",
|
|
1261
|
+
},
|
|
1262
|
+
where: {
|
|
1263
|
+
type: "array",
|
|
1264
|
+
description: "Optional filters combined with AND. Column can be a header name or 0-based index. Ops: eq, ne, contains, gt, gte, lt, lte, is_empty, not_empty.",
|
|
1265
|
+
items: {
|
|
1266
|
+
type: "object",
|
|
1267
|
+
properties: {
|
|
1268
|
+
column: { type: ["string", "number"] },
|
|
1269
|
+
op: {
|
|
1270
|
+
type: "string",
|
|
1271
|
+
enum: ["eq", "ne", "contains", "gt", "gte", "lt", "lte", "is_empty", "not_empty"],
|
|
1272
|
+
},
|
|
1273
|
+
value: { type: ["string", "number", "boolean", "null"] },
|
|
1274
|
+
caseSensitive: { type: "boolean" },
|
|
1275
|
+
},
|
|
1276
|
+
required: ["column", "op"],
|
|
1277
|
+
},
|
|
1278
|
+
},
|
|
1279
|
+
operation: {
|
|
1280
|
+
type: "string",
|
|
1281
|
+
enum: ["count", "sum", "avg", "min", "max"],
|
|
1282
|
+
description: "Aggregation to compute.",
|
|
1283
|
+
},
|
|
1284
|
+
value: {
|
|
1285
|
+
type: "object",
|
|
1286
|
+
description: "Value definition. Use {type:'column',column:'ColName'} or {type:'ratio',numeratorColumn:'A',denominatorColumn:'B'}.",
|
|
1287
|
+
properties: {
|
|
1288
|
+
type: { type: "string", enum: ["column", "ratio"] },
|
|
1289
|
+
column: { type: ["string", "number"] },
|
|
1290
|
+
numeratorColumn: { type: ["string", "number"] },
|
|
1291
|
+
denominatorColumn: { type: ["string", "number"] },
|
|
1292
|
+
},
|
|
1293
|
+
required: ["type"],
|
|
1294
|
+
},
|
|
1295
|
+
ignoreNonNumeric: {
|
|
1296
|
+
type: "boolean",
|
|
1297
|
+
description: "If true, skips rows where the value can't be parsed as a number (default true).",
|
|
1298
|
+
default: true,
|
|
1299
|
+
},
|
|
1300
|
+
returnRow: {
|
|
1301
|
+
type: "boolean",
|
|
1302
|
+
description: "If true (and operation is min/max), includes the best row.",
|
|
1303
|
+
default: true,
|
|
1304
|
+
},
|
|
1305
|
+
returnColumns: {
|
|
1306
|
+
type: "array",
|
|
1307
|
+
description: "If returnRow is true, optionally choose which columns to include from the best row (header name or 0-based index).",
|
|
1308
|
+
items: { type: ["string", "number"] },
|
|
1309
|
+
},
|
|
1310
|
+
maxScanRows: {
|
|
1311
|
+
type: "number",
|
|
1312
|
+
description: "Maximum number of data rows to scan (excluding header).",
|
|
1313
|
+
default: 50000,
|
|
1314
|
+
},
|
|
1315
|
+
maxCols: {
|
|
1316
|
+
type: "number",
|
|
1317
|
+
description: "Maximum number of columns to scan.",
|
|
1318
|
+
default: 200,
|
|
1319
|
+
},
|
|
1320
|
+
maxCellChars: {
|
|
1321
|
+
type: "number",
|
|
1322
|
+
description: "Maximum characters to return per cell in bestRow (long cells are truncated).",
|
|
1323
|
+
default: 2000,
|
|
1324
|
+
},
|
|
1325
|
+
},
|
|
1326
|
+
required: ["path", "operation"],
|
|
1327
|
+
},
|
|
1328
|
+
handler: async (args) => {
|
|
1329
|
+
const filePath = resolveLocalPath(args?.path);
|
|
1330
|
+
if (!existsSync(filePath))
|
|
1331
|
+
throw new Error(`File not found: ${filePath}`);
|
|
1332
|
+
const operation = String(args?.operation ?? "").trim().toLowerCase();
|
|
1333
|
+
if (!["count", "sum", "avg", "min", "max"].includes(operation)) {
|
|
1334
|
+
throw new Error(`Unsupported operation: ${operation}`);
|
|
1335
|
+
}
|
|
1336
|
+
const maxScanRows = clampInt(args?.maxScanRows, 50000, 1, 200000);
|
|
1337
|
+
const maxCols = clampInt(args?.maxCols, 200, 1, 500);
|
|
1338
|
+
const maxCellChars = clampInt(args?.maxCellChars, 2000, 20, 20000);
|
|
1339
|
+
const ignoreNonNumeric = args?.ignoreNonNumeric !== false;
|
|
1340
|
+
const returnRow = args?.returnRow !== false;
|
|
1341
|
+
const table = await loadXlsxTable(args, { filePath, maxScanRows, maxCols, maxCellChars });
|
|
1342
|
+
const where = compileWhere(table.headers, args?.where);
|
|
1343
|
+
const valueSpec = (args?.value ?? null);
|
|
1344
|
+
const getValue = (row) => {
|
|
1345
|
+
if (!valueSpec || typeof valueSpec !== "object")
|
|
1346
|
+
return null;
|
|
1347
|
+
const t = String(valueSpec.type ?? "").trim().toLowerCase();
|
|
1348
|
+
if (t === "column") {
|
|
1349
|
+
const idx = resolveColumnIndex(table.headers, valueSpec.column);
|
|
1350
|
+
if (idx < 0)
|
|
1351
|
+
return null;
|
|
1352
|
+
return toNumberOrNull(row[idx]);
|
|
1353
|
+
}
|
|
1354
|
+
if (t === "ratio") {
|
|
1355
|
+
const nIdx = resolveColumnIndex(table.headers, valueSpec.numeratorColumn);
|
|
1356
|
+
const dIdx = resolveColumnIndex(table.headers, valueSpec.denominatorColumn);
|
|
1357
|
+
if (nIdx < 0 || dIdx < 0)
|
|
1358
|
+
return null;
|
|
1359
|
+
const n = toNumberOrNull(row[nIdx]);
|
|
1360
|
+
const d = toNumberOrNull(row[dIdx]);
|
|
1361
|
+
if (n === null || d === null || d === 0)
|
|
1362
|
+
return null;
|
|
1363
|
+
return n / d;
|
|
1364
|
+
}
|
|
1365
|
+
return null;
|
|
1366
|
+
};
|
|
1367
|
+
let matchedRows = 0;
|
|
1368
|
+
let usedRows = 0;
|
|
1369
|
+
let skippedRows = 0;
|
|
1370
|
+
let sum = 0;
|
|
1371
|
+
let bestVal = null;
|
|
1372
|
+
let bestRowIndex = null;
|
|
1373
|
+
let bestRow = null;
|
|
1374
|
+
for (let i = 0; i < table.dataRows.length; i++) {
|
|
1375
|
+
const row = table.dataRows[i];
|
|
1376
|
+
if (!rowMatchesWhere(row, where))
|
|
1377
|
+
continue;
|
|
1378
|
+
matchedRows++;
|
|
1379
|
+
if (operation === "count" && !valueSpec)
|
|
1380
|
+
continue;
|
|
1381
|
+
const value = valueSpec ? getValue(row) : null;
|
|
1382
|
+
if (value === null) {
|
|
1383
|
+
skippedRows++;
|
|
1384
|
+
continue;
|
|
1385
|
+
}
|
|
1386
|
+
usedRows++;
|
|
1387
|
+
sum += value;
|
|
1388
|
+
if (operation === "min") {
|
|
1389
|
+
if (bestVal === null || value < bestVal) {
|
|
1390
|
+
bestVal = value;
|
|
1391
|
+
bestRowIndex = i + 1;
|
|
1392
|
+
bestRow = row;
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
else if (operation === "max") {
|
|
1396
|
+
if (bestVal === null || value > bestVal) {
|
|
1397
|
+
bestVal = value;
|
|
1398
|
+
bestRowIndex = i + 1;
|
|
1399
|
+
bestRow = row;
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
let resultValue = null;
|
|
1404
|
+
if (operation === "count") {
|
|
1405
|
+
resultValue = valueSpec ? usedRows : matchedRows;
|
|
1406
|
+
}
|
|
1407
|
+
else if (operation === "sum") {
|
|
1408
|
+
resultValue = usedRows > 0 ? sum : null;
|
|
1409
|
+
}
|
|
1410
|
+
else if (operation === "avg") {
|
|
1411
|
+
resultValue = usedRows > 0 ? sum / usedRows : null;
|
|
1412
|
+
}
|
|
1413
|
+
else if (operation === "min" || operation === "max") {
|
|
1414
|
+
resultValue = bestVal;
|
|
1415
|
+
}
|
|
1416
|
+
let best = null;
|
|
1417
|
+
if (returnRow && (operation === "min" || operation === "max") && bestRowIndex !== null && bestRow) {
|
|
1418
|
+
const returnColumnsRaw = Array.isArray(args?.returnColumns)
|
|
1419
|
+
? args.returnColumns.slice(0, 200)
|
|
1420
|
+
: null;
|
|
1421
|
+
const returnIndices = returnColumnsRaw && returnColumnsRaw.length > 0
|
|
1422
|
+
? returnColumnsRaw
|
|
1423
|
+
.map((c) => resolveColumnIndex(table.headers, c))
|
|
1424
|
+
.filter((idx) => idx >= 0)
|
|
1425
|
+
: Array.from({ length: Math.min(table.headers.length, 50) }, (_, i) => i);
|
|
1426
|
+
best = {
|
|
1427
|
+
rowIndex: bestRowIndex,
|
|
1428
|
+
headers: returnIndices.map((i) => table.headers[i] ?? `col_${i + 1}`),
|
|
1429
|
+
row: returnIndices.map((i) => bestRow[i]),
|
|
1430
|
+
};
|
|
1431
|
+
}
|
|
1432
|
+
if (!ignoreNonNumeric && (operation === "sum" || operation === "avg" || operation === "min" || operation === "max") && skippedRows > 0) {
|
|
1433
|
+
// Caller can choose to treat non-numeric rows as a hard failure.
|
|
1434
|
+
// We keep it non-throwing by default for robustness.
|
|
1435
|
+
}
|
|
1436
|
+
return {
|
|
1437
|
+
path: filePath,
|
|
1438
|
+
sheets: table.sheets,
|
|
1439
|
+
sheetName: table.sheetName,
|
|
1440
|
+
headerRow: table.headerRow,
|
|
1441
|
+
rangeA1: table.rangeA1,
|
|
1442
|
+
operation,
|
|
1443
|
+
value: valueSpec ?? null,
|
|
1444
|
+
scannedRows: table.dataRows.length,
|
|
1445
|
+
matchedRows,
|
|
1446
|
+
usedRows,
|
|
1447
|
+
skippedRows,
|
|
1448
|
+
result: resultValue,
|
|
1449
|
+
bestRow: best,
|
|
1450
|
+
};
|
|
1451
|
+
},
|
|
1452
|
+
},
|
|
1453
|
+
{
|
|
1454
|
+
name: "read_pdf_text",
|
|
1455
|
+
description: "Extract text from a local PDF file for selected pages. Returns bounded text with page markers. Deterministic, no network.",
|
|
1456
|
+
inputSchema: {
|
|
1457
|
+
type: "object",
|
|
1458
|
+
properties: {
|
|
1459
|
+
path: {
|
|
1460
|
+
type: "string",
|
|
1461
|
+
description: "Path to a local .pdf file (absolute or relative to current working directory).",
|
|
1462
|
+
},
|
|
1463
|
+
pageStart: {
|
|
1464
|
+
type: "number",
|
|
1465
|
+
description: "1-based start page (inclusive). Defaults to 1.",
|
|
1466
|
+
default: 1,
|
|
1467
|
+
},
|
|
1468
|
+
pageEnd: {
|
|
1469
|
+
type: "number",
|
|
1470
|
+
description: "1-based end page (inclusive). Defaults to 3.",
|
|
1471
|
+
default: 3,
|
|
1472
|
+
},
|
|
1473
|
+
pageNumbers: {
|
|
1474
|
+
type: "array",
|
|
1475
|
+
description: "Optional explicit list of 1-based pages to extract (overrides pageStart/pageEnd).",
|
|
1476
|
+
items: { type: "number" },
|
|
1477
|
+
},
|
|
1478
|
+
maxChars: {
|
|
1479
|
+
type: "number",
|
|
1480
|
+
description: "Maximum characters to return across all extracted pages (text is truncated).",
|
|
1481
|
+
default: 12000,
|
|
1482
|
+
},
|
|
1483
|
+
},
|
|
1484
|
+
required: ["path"],
|
|
1485
|
+
},
|
|
1486
|
+
handler: async (args) => {
|
|
1487
|
+
const filePath = resolveLocalPath(args?.path);
|
|
1488
|
+
if (!existsSync(filePath))
|
|
1489
|
+
throw new Error(`File not found: ${filePath}`);
|
|
1490
|
+
const maxChars = clampInt(args?.maxChars, 12000, 1000, 200000);
|
|
1491
|
+
const pageNumbersRaw = Array.isArray(args?.pageNumbers) ? args.pageNumbers : null;
|
|
1492
|
+
const explicitPages = pageNumbersRaw
|
|
1493
|
+
? pageNumbersRaw
|
|
1494
|
+
.map((n) => clampInt(n, 0, 0, 100000))
|
|
1495
|
+
.filter((n) => n > 0)
|
|
1496
|
+
: null;
|
|
1497
|
+
const pageStart = clampInt(args?.pageStart, 1, 1, 100000);
|
|
1498
|
+
const pageEnd = clampInt(args?.pageEnd, 3, 1, 100000);
|
|
1499
|
+
const mod = await getPdfParseModule();
|
|
1500
|
+
const PDFParse = mod?.PDFParse;
|
|
1501
|
+
if (typeof PDFParse !== "function") {
|
|
1502
|
+
throw new Error("pdf-parse module missing PDFParse export (unsupported version)");
|
|
328
1503
|
}
|
|
329
1504
|
const buffer = await readFile(filePath);
|
|
330
1505
|
const parser = new PDFParse({ data: buffer });
|
|
@@ -382,5 +1557,729 @@ export const localFileTools = [
|
|
|
382
1557
|
};
|
|
383
1558
|
},
|
|
384
1559
|
},
|
|
1560
|
+
{
|
|
1561
|
+
name: "pdf_search_text",
|
|
1562
|
+
description: "Search text inside a local PDF over selected pages. Returns page numbers and bounded snippets around matches. Deterministic, no network.",
|
|
1563
|
+
inputSchema: {
|
|
1564
|
+
type: "object",
|
|
1565
|
+
properties: {
|
|
1566
|
+
path: {
|
|
1567
|
+
type: "string",
|
|
1568
|
+
description: "Path to a local .pdf file (absolute or relative to current working directory).",
|
|
1569
|
+
},
|
|
1570
|
+
query: {
|
|
1571
|
+
type: "string",
|
|
1572
|
+
description: "Text to search for.",
|
|
1573
|
+
},
|
|
1574
|
+
caseSensitive: {
|
|
1575
|
+
type: "boolean",
|
|
1576
|
+
description: "If true, match case-sensitively (default false).",
|
|
1577
|
+
default: false,
|
|
1578
|
+
},
|
|
1579
|
+
pageStart: {
|
|
1580
|
+
type: "number",
|
|
1581
|
+
description: "1-based start page (inclusive). Defaults to 1.",
|
|
1582
|
+
default: 1,
|
|
1583
|
+
},
|
|
1584
|
+
pageEnd: {
|
|
1585
|
+
type: "number",
|
|
1586
|
+
description: "1-based end page (inclusive). Defaults to 25.",
|
|
1587
|
+
default: 25,
|
|
1588
|
+
},
|
|
1589
|
+
pageNumbers: {
|
|
1590
|
+
type: "array",
|
|
1591
|
+
description: "Optional explicit list of 1-based pages to search (overrides pageStart/pageEnd).",
|
|
1592
|
+
items: { type: "number" },
|
|
1593
|
+
},
|
|
1594
|
+
maxMatches: {
|
|
1595
|
+
type: "number",
|
|
1596
|
+
description: "Maximum matches to return across all pages.",
|
|
1597
|
+
default: 25,
|
|
1598
|
+
},
|
|
1599
|
+
snippetChars: {
|
|
1600
|
+
type: "number",
|
|
1601
|
+
description: "Snippet size (characters) around each match.",
|
|
1602
|
+
default: 180,
|
|
1603
|
+
},
|
|
1604
|
+
},
|
|
1605
|
+
required: ["path", "query"],
|
|
1606
|
+
},
|
|
1607
|
+
handler: async (args) => {
|
|
1608
|
+
const filePath = resolveLocalPath(args?.path);
|
|
1609
|
+
if (!existsSync(filePath))
|
|
1610
|
+
throw new Error(`File not found: ${filePath}`);
|
|
1611
|
+
const queryRaw = String(args?.query ?? "");
|
|
1612
|
+
const query = queryRaw.trim();
|
|
1613
|
+
if (!query)
|
|
1614
|
+
throw new Error("query is required");
|
|
1615
|
+
const caseSensitive = args?.caseSensitive === true;
|
|
1616
|
+
const maxMatches = clampInt(args?.maxMatches, 25, 1, 200);
|
|
1617
|
+
const snippetChars = clampInt(args?.snippetChars, 180, 40, 1000);
|
|
1618
|
+
const pageNumbersRaw = Array.isArray(args?.pageNumbers) ? args.pageNumbers : null;
|
|
1619
|
+
const explicitPages = pageNumbersRaw
|
|
1620
|
+
? pageNumbersRaw
|
|
1621
|
+
.map((n) => clampInt(n, 0, 0, 100000))
|
|
1622
|
+
.filter((n) => n > 0)
|
|
1623
|
+
: null;
|
|
1624
|
+
const pageStart = clampInt(args?.pageStart, 1, 1, 100000);
|
|
1625
|
+
const pageEnd = clampInt(args?.pageEnd, 25, 1, 100000);
|
|
1626
|
+
const mod = await getPdfParseModule();
|
|
1627
|
+
const PDFParse = mod?.PDFParse;
|
|
1628
|
+
if (typeof PDFParse !== "function") {
|
|
1629
|
+
throw new Error("pdf-parse module missing PDFParse export (unsupported version)");
|
|
1630
|
+
}
|
|
1631
|
+
const buffer = await readFile(filePath);
|
|
1632
|
+
const parser = new PDFParse({ data: buffer });
|
|
1633
|
+
let numPages = 0;
|
|
1634
|
+
let extractedPages = [];
|
|
1635
|
+
let pages = [];
|
|
1636
|
+
try {
|
|
1637
|
+
const parseParams = {
|
|
1638
|
+
lineEnforce: true,
|
|
1639
|
+
pageJoiner: "",
|
|
1640
|
+
parseHyperlinks: false,
|
|
1641
|
+
};
|
|
1642
|
+
if (explicitPages && explicitPages.length > 0) {
|
|
1643
|
+
parseParams.partial = explicitPages.slice(0, 200);
|
|
1644
|
+
}
|
|
1645
|
+
else {
|
|
1646
|
+
const start = Math.min(pageStart, pageEnd);
|
|
1647
|
+
const end = Math.max(pageStart, pageEnd);
|
|
1648
|
+
parseParams.first = start;
|
|
1649
|
+
parseParams.last = end;
|
|
1650
|
+
}
|
|
1651
|
+
const result = await parser.getText(parseParams);
|
|
1652
|
+
numPages = Number(result?.total ?? 0);
|
|
1653
|
+
const parsedPages = Array.isArray(result?.pages) ? result.pages : [];
|
|
1654
|
+
extractedPages = parsedPages
|
|
1655
|
+
.map((p) => Number(p?.num ?? 0))
|
|
1656
|
+
.filter((n) => Number.isFinite(n) && n > 0);
|
|
1657
|
+
pages = parsedPages.map((p) => ({
|
|
1658
|
+
num: Number(p?.num ?? 0),
|
|
1659
|
+
text: String(p?.text ?? ""),
|
|
1660
|
+
}));
|
|
1661
|
+
}
|
|
1662
|
+
finally {
|
|
1663
|
+
try {
|
|
1664
|
+
await parser.destroy();
|
|
1665
|
+
}
|
|
1666
|
+
catch {
|
|
1667
|
+
// ignore
|
|
1668
|
+
}
|
|
1669
|
+
}
|
|
1670
|
+
const needle = caseSensitive ? query : query.toLowerCase();
|
|
1671
|
+
const matches = [];
|
|
1672
|
+
for (const p of pages) {
|
|
1673
|
+
const haystackRaw = String(p.text ?? "");
|
|
1674
|
+
const haystack = caseSensitive ? haystackRaw : haystackRaw.toLowerCase();
|
|
1675
|
+
let from = 0;
|
|
1676
|
+
while (matches.length < maxMatches) {
|
|
1677
|
+
const idx = haystack.indexOf(needle, from);
|
|
1678
|
+
if (idx === -1)
|
|
1679
|
+
break;
|
|
1680
|
+
const start = Math.max(0, idx - Math.floor(snippetChars / 2));
|
|
1681
|
+
const end = Math.min(haystackRaw.length, start + snippetChars);
|
|
1682
|
+
const snippet = haystackRaw.slice(start, end).replace(/\s+/g, " ").trim();
|
|
1683
|
+
matches.push({ page: p.num, index: idx, snippet });
|
|
1684
|
+
from = idx + Math.max(1, needle.length);
|
|
1685
|
+
}
|
|
1686
|
+
if (matches.length >= maxMatches)
|
|
1687
|
+
break;
|
|
1688
|
+
}
|
|
1689
|
+
return {
|
|
1690
|
+
path: filePath,
|
|
1691
|
+
query,
|
|
1692
|
+
caseSensitive,
|
|
1693
|
+
numPages,
|
|
1694
|
+
pagesIncluded: extractedPages,
|
|
1695
|
+
maxMatches,
|
|
1696
|
+
matchCount: matches.length,
|
|
1697
|
+
matches,
|
|
1698
|
+
};
|
|
1699
|
+
},
|
|
1700
|
+
},
|
|
1701
|
+
{
|
|
1702
|
+
name: "read_text_file",
|
|
1703
|
+
description: "Read a local text file (txt/md/xml/json/etc) and return a bounded text slice. Deterministic, no network.",
|
|
1704
|
+
inputSchema: {
|
|
1705
|
+
type: "object",
|
|
1706
|
+
properties: {
|
|
1707
|
+
path: {
|
|
1708
|
+
type: "string",
|
|
1709
|
+
description: "Path to a local text file (absolute or relative to current working directory).",
|
|
1710
|
+
},
|
|
1711
|
+
encoding: {
|
|
1712
|
+
type: "string",
|
|
1713
|
+
description: "File encoding (default: utf8).",
|
|
1714
|
+
default: "utf8",
|
|
1715
|
+
},
|
|
1716
|
+
startChar: {
|
|
1717
|
+
type: "number",
|
|
1718
|
+
description: "0-based character offset to start reading from (default: 0).",
|
|
1719
|
+
default: 0,
|
|
1720
|
+
},
|
|
1721
|
+
maxChars: {
|
|
1722
|
+
type: "number",
|
|
1723
|
+
description: "Maximum characters to return (text is truncated).",
|
|
1724
|
+
default: 12000,
|
|
1725
|
+
},
|
|
1726
|
+
},
|
|
1727
|
+
required: ["path"],
|
|
1728
|
+
},
|
|
1729
|
+
handler: async (args) => {
|
|
1730
|
+
const filePath = resolveLocalPath(args?.path);
|
|
1731
|
+
if (!existsSync(filePath))
|
|
1732
|
+
throw new Error(`File not found: ${filePath}`);
|
|
1733
|
+
const encoding = String(args?.encoding ?? "utf8");
|
|
1734
|
+
const startChar = clampInt(args?.startChar, 0, 0, 50_000_000);
|
|
1735
|
+
const maxChars = clampInt(args?.maxChars, 12000, 1, 200000);
|
|
1736
|
+
const all = await readFile(filePath, { encoding });
|
|
1737
|
+
const sliced = all.slice(startChar);
|
|
1738
|
+
const truncated = sliced.length > maxChars;
|
|
1739
|
+
const text = truncated ? sliced.slice(0, maxChars) : sliced;
|
|
1740
|
+
return {
|
|
1741
|
+
path: filePath,
|
|
1742
|
+
encoding,
|
|
1743
|
+
startChar,
|
|
1744
|
+
maxChars,
|
|
1745
|
+
truncated,
|
|
1746
|
+
text,
|
|
1747
|
+
};
|
|
1748
|
+
},
|
|
1749
|
+
},
|
|
1750
|
+
{
|
|
1751
|
+
name: "read_json_file",
|
|
1752
|
+
description: "Read a local JSON file and return a bounded JSON preview (depth/item/string truncation). Deterministic, no network.",
|
|
1753
|
+
inputSchema: {
|
|
1754
|
+
type: "object",
|
|
1755
|
+
properties: {
|
|
1756
|
+
path: {
|
|
1757
|
+
type: "string",
|
|
1758
|
+
description: "Path to a local .json file (absolute or relative to current working directory).",
|
|
1759
|
+
},
|
|
1760
|
+
maxDepth: {
|
|
1761
|
+
type: "number",
|
|
1762
|
+
description: "Maximum depth to include (default: 8).",
|
|
1763
|
+
default: 8,
|
|
1764
|
+
},
|
|
1765
|
+
maxItems: {
|
|
1766
|
+
type: "number",
|
|
1767
|
+
description: "Maximum items (array elements or object keys) per container (default: 200).",
|
|
1768
|
+
default: 200,
|
|
1769
|
+
},
|
|
1770
|
+
maxStringChars: {
|
|
1771
|
+
type: "number",
|
|
1772
|
+
description: "Maximum characters per string value (default: 2000).",
|
|
1773
|
+
default: 2000,
|
|
1774
|
+
},
|
|
1775
|
+
},
|
|
1776
|
+
required: ["path"],
|
|
1777
|
+
},
|
|
1778
|
+
handler: async (args) => {
|
|
1779
|
+
const filePath = resolveLocalPath(args?.path);
|
|
1780
|
+
if (!existsSync(filePath))
|
|
1781
|
+
throw new Error(`File not found: ${filePath}`);
|
|
1782
|
+
const maxDepth = clampInt(args?.maxDepth, 8, 1, 30);
|
|
1783
|
+
const maxItems = clampInt(args?.maxItems, 200, 1, 2000);
|
|
1784
|
+
const maxStringChars = clampInt(args?.maxStringChars, 2000, 20, 20000);
|
|
1785
|
+
const raw = await readFile(filePath, { encoding: "utf8" });
|
|
1786
|
+
let parsed;
|
|
1787
|
+
try {
|
|
1788
|
+
parsed = JSON.parse(raw);
|
|
1789
|
+
}
|
|
1790
|
+
catch (err) {
|
|
1791
|
+
throw new Error(`Invalid JSON: ${err?.message ?? String(err)}`);
|
|
1792
|
+
}
|
|
1793
|
+
const state = { truncated: false };
|
|
1794
|
+
const value = pruneJsonForPreview(parsed, { maxDepth, maxItems, maxStringChars }, state);
|
|
1795
|
+
return {
|
|
1796
|
+
path: filePath,
|
|
1797
|
+
maxDepth,
|
|
1798
|
+
maxItems,
|
|
1799
|
+
maxStringChars,
|
|
1800
|
+
truncated: state.truncated,
|
|
1801
|
+
rootType: Array.isArray(parsed) ? "array" : typeof parsed,
|
|
1802
|
+
value,
|
|
1803
|
+
};
|
|
1804
|
+
},
|
|
1805
|
+
},
|
|
1806
|
+
{
|
|
1807
|
+
name: "json_select",
|
|
1808
|
+
description: "Select a sub-value from a local JSON file using a JSON Pointer (RFC 6901) and return a bounded preview. Deterministic, no network.",
|
|
1809
|
+
inputSchema: {
|
|
1810
|
+
type: "object",
|
|
1811
|
+
properties: {
|
|
1812
|
+
path: {
|
|
1813
|
+
type: "string",
|
|
1814
|
+
description: "Path to a local .json file (absolute or relative to current working directory).",
|
|
1815
|
+
},
|
|
1816
|
+
pointer: {
|
|
1817
|
+
type: "string",
|
|
1818
|
+
description: "JSON Pointer (RFC 6901). Example: '/a/b/0/name'. Use '' or '/' for the root value.",
|
|
1819
|
+
default: "",
|
|
1820
|
+
},
|
|
1821
|
+
maxDepth: {
|
|
1822
|
+
type: "number",
|
|
1823
|
+
description: "Maximum depth to include (default: 8).",
|
|
1824
|
+
default: 8,
|
|
1825
|
+
},
|
|
1826
|
+
maxItems: {
|
|
1827
|
+
type: "number",
|
|
1828
|
+
description: "Maximum items (array elements or object keys) per container (default: 200).",
|
|
1829
|
+
default: 200,
|
|
1830
|
+
},
|
|
1831
|
+
maxStringChars: {
|
|
1832
|
+
type: "number",
|
|
1833
|
+
description: "Maximum characters per string value (default: 2000).",
|
|
1834
|
+
default: 2000,
|
|
1835
|
+
},
|
|
1836
|
+
},
|
|
1837
|
+
required: ["path"],
|
|
1838
|
+
},
|
|
1839
|
+
handler: async (args) => {
|
|
1840
|
+
const filePath = resolveLocalPath(args?.path);
|
|
1841
|
+
if (!existsSync(filePath))
|
|
1842
|
+
throw new Error(`File not found: ${filePath}`);
|
|
1843
|
+
const pointer = String(args?.pointer ?? "");
|
|
1844
|
+
const maxDepth = clampInt(args?.maxDepth, 8, 1, 30);
|
|
1845
|
+
const maxItems = clampInt(args?.maxItems, 200, 1, 2000);
|
|
1846
|
+
const maxStringChars = clampInt(args?.maxStringChars, 2000, 20, 20000);
|
|
1847
|
+
const raw = await readFile(filePath, { encoding: "utf8" });
|
|
1848
|
+
let parsed;
|
|
1849
|
+
try {
|
|
1850
|
+
parsed = JSON.parse(raw);
|
|
1851
|
+
}
|
|
1852
|
+
catch (err) {
|
|
1853
|
+
throw new Error(`Invalid JSON: ${err?.message ?? String(err)}`);
|
|
1854
|
+
}
|
|
1855
|
+
const selected = jsonPointerGet(parsed, pointer);
|
|
1856
|
+
if (!selected.found) {
|
|
1857
|
+
return {
|
|
1858
|
+
path: filePath,
|
|
1859
|
+
pointer,
|
|
1860
|
+
found: false,
|
|
1861
|
+
truncated: false,
|
|
1862
|
+
value: null,
|
|
1863
|
+
};
|
|
1864
|
+
}
|
|
1865
|
+
const state = { truncated: false };
|
|
1866
|
+
const value = pruneJsonForPreview(selected.value, { maxDepth, maxItems, maxStringChars }, state);
|
|
1867
|
+
return {
|
|
1868
|
+
path: filePath,
|
|
1869
|
+
pointer,
|
|
1870
|
+
found: true,
|
|
1871
|
+
maxDepth,
|
|
1872
|
+
maxItems,
|
|
1873
|
+
maxStringChars,
|
|
1874
|
+
truncated: state.truncated,
|
|
1875
|
+
value,
|
|
1876
|
+
};
|
|
1877
|
+
},
|
|
1878
|
+
},
|
|
1879
|
+
{
|
|
1880
|
+
name: "read_jsonl_file",
|
|
1881
|
+
description: "Read a local JSONL file and return bounded parsed rows. Deterministic, no network.",
|
|
1882
|
+
inputSchema: {
|
|
1883
|
+
type: "object",
|
|
1884
|
+
properties: {
|
|
1885
|
+
path: {
|
|
1886
|
+
type: "string",
|
|
1887
|
+
description: "Path to a local .jsonl file (absolute or relative to current working directory).",
|
|
1888
|
+
},
|
|
1889
|
+
encoding: {
|
|
1890
|
+
type: "string",
|
|
1891
|
+
description: "File encoding (default: utf8).",
|
|
1892
|
+
default: "utf8",
|
|
1893
|
+
},
|
|
1894
|
+
offsetLines: {
|
|
1895
|
+
type: "number",
|
|
1896
|
+
description: "Number of lines to skip before returning results.",
|
|
1897
|
+
default: 0,
|
|
1898
|
+
},
|
|
1899
|
+
limitLines: {
|
|
1900
|
+
type: "number",
|
|
1901
|
+
description: "Maximum number of non-empty lines to return.",
|
|
1902
|
+
default: 200,
|
|
1903
|
+
},
|
|
1904
|
+
parseJson: {
|
|
1905
|
+
type: "boolean",
|
|
1906
|
+
description: "If true, parses each line as JSON (default true). If false, returns raw text lines.",
|
|
1907
|
+
default: true,
|
|
1908
|
+
},
|
|
1909
|
+
maxLineChars: {
|
|
1910
|
+
type: "number",
|
|
1911
|
+
description: "Maximum characters per returned raw line (default 4000).",
|
|
1912
|
+
default: 4000,
|
|
1913
|
+
},
|
|
1914
|
+
maxDepth: {
|
|
1915
|
+
type: "number",
|
|
1916
|
+
description: "Maximum depth to include for parsed JSON lines (default: 6).",
|
|
1917
|
+
default: 6,
|
|
1918
|
+
},
|
|
1919
|
+
maxItems: {
|
|
1920
|
+
type: "number",
|
|
1921
|
+
description: "Maximum items per container for parsed JSON lines (default: 100).",
|
|
1922
|
+
default: 100,
|
|
1923
|
+
},
|
|
1924
|
+
maxStringChars: {
|
|
1925
|
+
type: "number",
|
|
1926
|
+
description: "Maximum characters per string for parsed JSON lines (default: 1000).",
|
|
1927
|
+
default: 1000,
|
|
1928
|
+
},
|
|
1929
|
+
},
|
|
1930
|
+
required: ["path"],
|
|
1931
|
+
},
|
|
1932
|
+
handler: async (args) => {
|
|
1933
|
+
const filePath = resolveLocalPath(args?.path);
|
|
1934
|
+
if (!existsSync(filePath))
|
|
1935
|
+
throw new Error(`File not found: ${filePath}`);
|
|
1936
|
+
const encoding = String(args?.encoding ?? "utf8");
|
|
1937
|
+
const offsetLines = clampInt(args?.offsetLines, 0, 0, 5_000_000);
|
|
1938
|
+
const limitLines = clampInt(args?.limitLines, 200, 1, 5000);
|
|
1939
|
+
const parseJson = args?.parseJson !== false;
|
|
1940
|
+
const maxLineChars = clampInt(args?.maxLineChars, 4000, 200, 50000);
|
|
1941
|
+
const maxDepth = clampInt(args?.maxDepth, 6, 1, 30);
|
|
1942
|
+
const maxItems = clampInt(args?.maxItems, 100, 1, 2000);
|
|
1943
|
+
const maxStringChars = clampInt(args?.maxStringChars, 1000, 20, 20000);
|
|
1944
|
+
const text = await readFile(filePath, { encoding });
|
|
1945
|
+
const linesAll = String(text).split(/\r?\n/);
|
|
1946
|
+
const out = [];
|
|
1947
|
+
const errors = [];
|
|
1948
|
+
let seenNonEmpty = 0;
|
|
1949
|
+
for (let i = 0; i < linesAll.length; i++) {
|
|
1950
|
+
const raw = String(linesAll[i] ?? "");
|
|
1951
|
+
if (raw.trim().length === 0)
|
|
1952
|
+
continue;
|
|
1953
|
+
if (seenNonEmpty < offsetLines) {
|
|
1954
|
+
seenNonEmpty++;
|
|
1955
|
+
continue;
|
|
1956
|
+
}
|
|
1957
|
+
if (out.length >= limitLines)
|
|
1958
|
+
break;
|
|
1959
|
+
const lineNumber = i + 1;
|
|
1960
|
+
if (!parseJson) {
|
|
1961
|
+
const truncated = raw.length > maxLineChars ? raw.slice(0, maxLineChars) + "..." : raw;
|
|
1962
|
+
out.push({ lineNumber, value: truncated, raw: undefined });
|
|
1963
|
+
continue;
|
|
1964
|
+
}
|
|
1965
|
+
try {
|
|
1966
|
+
const parsed = JSON.parse(raw);
|
|
1967
|
+
const state = { truncated: false };
|
|
1968
|
+
const pruned = pruneJsonForPreview(parsed, { maxDepth, maxItems, maxStringChars }, state);
|
|
1969
|
+
out.push({ lineNumber, value: pruned });
|
|
1970
|
+
}
|
|
1971
|
+
catch (err) {
|
|
1972
|
+
errors.push({ lineNumber, error: err?.message ?? String(err) });
|
|
1973
|
+
}
|
|
1974
|
+
}
|
|
1975
|
+
return {
|
|
1976
|
+
path: filePath,
|
|
1977
|
+
encoding,
|
|
1978
|
+
offsetLines,
|
|
1979
|
+
limitLines,
|
|
1980
|
+
parseJson,
|
|
1981
|
+
returnedLines: out.length,
|
|
1982
|
+
errorCount: errors.length,
|
|
1983
|
+
errors: errors.slice(0, 10),
|
|
1984
|
+
lines: out,
|
|
1985
|
+
};
|
|
1986
|
+
},
|
|
1987
|
+
},
|
|
1988
|
+
{
|
|
1989
|
+
name: "zip_list_files",
|
|
1990
|
+
description: "List entries in a local ZIP file. Deterministic, no network.",
|
|
1991
|
+
inputSchema: {
|
|
1992
|
+
type: "object",
|
|
1993
|
+
properties: {
|
|
1994
|
+
path: {
|
|
1995
|
+
type: "string",
|
|
1996
|
+
description: "Path to a local .zip file (absolute or relative to current working directory).",
|
|
1997
|
+
},
|
|
1998
|
+
maxEntries: {
|
|
1999
|
+
type: "number",
|
|
2000
|
+
description: "Maximum entries to return.",
|
|
2001
|
+
default: 200,
|
|
2002
|
+
},
|
|
2003
|
+
},
|
|
2004
|
+
required: ["path"],
|
|
2005
|
+
},
|
|
2006
|
+
handler: async (args) => {
|
|
2007
|
+
const filePath = resolveLocalPath(args?.path);
|
|
2008
|
+
if (!existsSync(filePath))
|
|
2009
|
+
throw new Error(`File not found: ${filePath}`);
|
|
2010
|
+
const maxEntries = clampInt(args?.maxEntries, 200, 1, 5000);
|
|
2011
|
+
const result = await zipListEntries(filePath, maxEntries);
|
|
2012
|
+
return {
|
|
2013
|
+
path: filePath,
|
|
2014
|
+
maxEntries,
|
|
2015
|
+
returnedEntries: result.entries.length,
|
|
2016
|
+
truncated: result.truncated,
|
|
2017
|
+
entries: result.entries,
|
|
2018
|
+
};
|
|
2019
|
+
},
|
|
2020
|
+
},
|
|
2021
|
+
{
|
|
2022
|
+
name: "zip_read_text_file",
|
|
2023
|
+
description: "Read a text file inside a local ZIP archive and return bounded text. Deterministic, no network.",
|
|
2024
|
+
inputSchema: {
|
|
2025
|
+
type: "object",
|
|
2026
|
+
properties: {
|
|
2027
|
+
path: {
|
|
2028
|
+
type: "string",
|
|
2029
|
+
description: "Path to a local .zip file (absolute or relative to current working directory).",
|
|
2030
|
+
},
|
|
2031
|
+
innerPath: {
|
|
2032
|
+
type: "string",
|
|
2033
|
+
description: "Path of the entry inside the ZIP (use zip_list_files to discover names).",
|
|
2034
|
+
},
|
|
2035
|
+
caseSensitive: {
|
|
2036
|
+
type: "boolean",
|
|
2037
|
+
description: "If true, entry match is case-sensitive (default true).",
|
|
2038
|
+
default: true,
|
|
2039
|
+
},
|
|
2040
|
+
encoding: {
|
|
2041
|
+
type: "string",
|
|
2042
|
+
description: "Text encoding for the entry (default: utf8).",
|
|
2043
|
+
default: "utf8",
|
|
2044
|
+
},
|
|
2045
|
+
maxChars: {
|
|
2046
|
+
type: "number",
|
|
2047
|
+
description: "Maximum characters to return.",
|
|
2048
|
+
default: 12000,
|
|
2049
|
+
},
|
|
2050
|
+
maxBytes: {
|
|
2051
|
+
type: "number",
|
|
2052
|
+
description: "Maximum uncompressed bytes to read (safety cap).",
|
|
2053
|
+
default: 5000000,
|
|
2054
|
+
},
|
|
2055
|
+
},
|
|
2056
|
+
required: ["path", "innerPath"],
|
|
2057
|
+
},
|
|
2058
|
+
handler: async (args) => {
|
|
2059
|
+
const zipPath = resolveLocalPath(args?.path);
|
|
2060
|
+
if (!existsSync(zipPath))
|
|
2061
|
+
throw new Error(`File not found: ${zipPath}`);
|
|
2062
|
+
const innerPath = String(args?.innerPath ?? "").trim();
|
|
2063
|
+
if (!innerPath)
|
|
2064
|
+
throw new Error("innerPath is required");
|
|
2065
|
+
const caseSensitive = args?.caseSensitive !== false;
|
|
2066
|
+
const encoding = String(args?.encoding ?? "utf8");
|
|
2067
|
+
const maxChars = clampInt(args?.maxChars, 12000, 200, 200000);
|
|
2068
|
+
const maxBytes = clampInt(args?.maxBytes, 5000000, 1000, 50_000_000);
|
|
2069
|
+
const { buffer, entry } = await zipReadEntryBuffer(zipPath, innerPath, {
|
|
2070
|
+
maxBytes,
|
|
2071
|
+
caseSensitive,
|
|
2072
|
+
});
|
|
2073
|
+
const all = buffer.toString(encoding);
|
|
2074
|
+
const truncated = all.length > maxChars;
|
|
2075
|
+
const text = truncated ? all.slice(0, maxChars) : all;
|
|
2076
|
+
return {
|
|
2077
|
+
path: zipPath,
|
|
2078
|
+
innerPath: entry.fileName,
|
|
2079
|
+
encoding,
|
|
2080
|
+
sizeBytes: buffer.length,
|
|
2081
|
+
maxChars,
|
|
2082
|
+
truncated,
|
|
2083
|
+
text,
|
|
2084
|
+
};
|
|
2085
|
+
},
|
|
2086
|
+
},
|
|
2087
|
+
{
|
|
2088
|
+
name: "zip_extract_file",
|
|
2089
|
+
description: "Extract a single file from a local ZIP archive to a local output directory (zip-slip safe). Deterministic, no network.",
|
|
2090
|
+
inputSchema: {
|
|
2091
|
+
type: "object",
|
|
2092
|
+
properties: {
|
|
2093
|
+
path: {
|
|
2094
|
+
type: "string",
|
|
2095
|
+
description: "Path to a local .zip file (absolute or relative to current working directory).",
|
|
2096
|
+
},
|
|
2097
|
+
innerPath: {
|
|
2098
|
+
type: "string",
|
|
2099
|
+
description: "Path of the entry inside the ZIP (use zip_list_files to discover names).",
|
|
2100
|
+
},
|
|
2101
|
+
caseSensitive: {
|
|
2102
|
+
type: "boolean",
|
|
2103
|
+
description: "If true, entry match is case-sensitive (default true).",
|
|
2104
|
+
default: true,
|
|
2105
|
+
},
|
|
2106
|
+
outputDir: {
|
|
2107
|
+
type: "string",
|
|
2108
|
+
description: "Directory to extract into (absolute or relative). Default: .tmp/nodebench_zip_extract",
|
|
2109
|
+
default: ".tmp/nodebench_zip_extract",
|
|
2110
|
+
},
|
|
2111
|
+
overwrite: {
|
|
2112
|
+
type: "boolean",
|
|
2113
|
+
description: "If true, overwrites an existing output file (default false).",
|
|
2114
|
+
default: false,
|
|
2115
|
+
},
|
|
2116
|
+
maxBytes: {
|
|
2117
|
+
type: "number",
|
|
2118
|
+
description: "Maximum uncompressed bytes to extract (safety cap).",
|
|
2119
|
+
default: 25000000,
|
|
2120
|
+
},
|
|
2121
|
+
},
|
|
2122
|
+
required: ["path", "innerPath"],
|
|
2123
|
+
},
|
|
2124
|
+
handler: async (args) => {
|
|
2125
|
+
const zipPath = resolveLocalPath(args?.path);
|
|
2126
|
+
if (!existsSync(zipPath))
|
|
2127
|
+
throw new Error(`File not found: ${zipPath}`);
|
|
2128
|
+
const innerPath = String(args?.innerPath ?? "").trim();
|
|
2129
|
+
if (!innerPath)
|
|
2130
|
+
throw new Error("innerPath is required");
|
|
2131
|
+
const outputDir = resolveLocalPath(args?.outputDir ?? ".tmp/nodebench_zip_extract");
|
|
2132
|
+
const overwrite = args?.overwrite === true;
|
|
2133
|
+
const caseSensitive = args?.caseSensitive !== false;
|
|
2134
|
+
const maxBytes = clampInt(args?.maxBytes, 25000000, 1000, 200_000_000);
|
|
2135
|
+
const { buffer, entry } = await zipReadEntryBuffer(zipPath, innerPath, {
|
|
2136
|
+
maxBytes,
|
|
2137
|
+
caseSensitive,
|
|
2138
|
+
});
|
|
2139
|
+
const extractedPath = safeJoinInsideDir(outputDir, entry.fileName);
|
|
2140
|
+
await (await import("node:fs/promises")).mkdir(path.dirname(extractedPath), { recursive: true });
|
|
2141
|
+
const alreadyExists = existsSync(extractedPath);
|
|
2142
|
+
if (alreadyExists && !overwrite) {
|
|
2143
|
+
return {
|
|
2144
|
+
path: zipPath,
|
|
2145
|
+
innerPath: entry.fileName,
|
|
2146
|
+
outputDir,
|
|
2147
|
+
extractedPath,
|
|
2148
|
+
sizeBytes: buffer.length,
|
|
2149
|
+
existed: true,
|
|
2150
|
+
overwritten: false,
|
|
2151
|
+
};
|
|
2152
|
+
}
|
|
2153
|
+
await (await import("node:fs/promises")).writeFile(extractedPath, buffer);
|
|
2154
|
+
return {
|
|
2155
|
+
path: zipPath,
|
|
2156
|
+
innerPath: entry.fileName,
|
|
2157
|
+
outputDir,
|
|
2158
|
+
extractedPath,
|
|
2159
|
+
sizeBytes: buffer.length,
|
|
2160
|
+
existed: alreadyExists,
|
|
2161
|
+
overwritten: alreadyExists ? overwrite : false,
|
|
2162
|
+
};
|
|
2163
|
+
},
|
|
2164
|
+
},
|
|
2165
|
+
{
|
|
2166
|
+
name: "read_docx_text",
|
|
2167
|
+
description: "Extract text from a local DOCX (Office OpenXML) file. Deterministic, no network.",
|
|
2168
|
+
inputSchema: {
|
|
2169
|
+
type: "object",
|
|
2170
|
+
properties: {
|
|
2171
|
+
path: {
|
|
2172
|
+
type: "string",
|
|
2173
|
+
description: "Path to a local .docx file (absolute or relative to current working directory).",
|
|
2174
|
+
},
|
|
2175
|
+
maxChars: {
|
|
2176
|
+
type: "number",
|
|
2177
|
+
description: "Maximum characters to return (text is truncated).",
|
|
2178
|
+
default: 12000,
|
|
2179
|
+
},
|
|
2180
|
+
maxBytes: {
|
|
2181
|
+
type: "number",
|
|
2182
|
+
description: "Maximum uncompressed bytes to read from word/document.xml (safety cap).",
|
|
2183
|
+
default: 20000000,
|
|
2184
|
+
},
|
|
2185
|
+
},
|
|
2186
|
+
required: ["path"],
|
|
2187
|
+
},
|
|
2188
|
+
handler: async (args) => {
|
|
2189
|
+
const filePath = resolveLocalPath(args?.path);
|
|
2190
|
+
if (!existsSync(filePath))
|
|
2191
|
+
throw new Error(`File not found: ${filePath}`);
|
|
2192
|
+
const maxChars = clampInt(args?.maxChars, 12000, 1000, 200000);
|
|
2193
|
+
const maxBytes = clampInt(args?.maxBytes, 20000000, 1000, 200_000_000);
|
|
2194
|
+
const { buffer } = await zipReadEntryBuffer(filePath, "word/document.xml", {
|
|
2195
|
+
maxBytes,
|
|
2196
|
+
caseSensitive: true,
|
|
2197
|
+
});
|
|
2198
|
+
const xml = buffer.toString("utf8");
|
|
2199
|
+
let text = docxXmlToText(xml);
|
|
2200
|
+
const truncated = text.length > maxChars;
|
|
2201
|
+
if (truncated)
|
|
2202
|
+
text = text.slice(0, maxChars);
|
|
2203
|
+
return {
|
|
2204
|
+
path: filePath,
|
|
2205
|
+
source: "word/document.xml",
|
|
2206
|
+
maxChars,
|
|
2207
|
+
truncated,
|
|
2208
|
+
text,
|
|
2209
|
+
};
|
|
2210
|
+
},
|
|
2211
|
+
},
|
|
2212
|
+
{
|
|
2213
|
+
name: "read_pptx_text",
|
|
2214
|
+
description: "Extract text from a local PPTX (Office OpenXML) file. Deterministic, no network.",
|
|
2215
|
+
inputSchema: {
|
|
2216
|
+
type: "object",
|
|
2217
|
+
properties: {
|
|
2218
|
+
path: {
|
|
2219
|
+
type: "string",
|
|
2220
|
+
description: "Path to a local .pptx file (absolute or relative to current working directory).",
|
|
2221
|
+
},
|
|
2222
|
+
maxChars: {
|
|
2223
|
+
type: "number",
|
|
2224
|
+
description: "Maximum characters to return (text is truncated).",
|
|
2225
|
+
default: 12000,
|
|
2226
|
+
},
|
|
2227
|
+
maxSlides: {
|
|
2228
|
+
type: "number",
|
|
2229
|
+
description: "Maximum slides to process (default: 60).",
|
|
2230
|
+
default: 60,
|
|
2231
|
+
},
|
|
2232
|
+
maxBytesPerSlide: {
|
|
2233
|
+
type: "number",
|
|
2234
|
+
description: "Maximum uncompressed bytes to read per slide XML (safety cap).",
|
|
2235
|
+
default: 10000000,
|
|
2236
|
+
},
|
|
2237
|
+
},
|
|
2238
|
+
required: ["path"],
|
|
2239
|
+
},
|
|
2240
|
+
handler: async (args) => {
|
|
2241
|
+
const filePath = resolveLocalPath(args?.path);
|
|
2242
|
+
if (!existsSync(filePath))
|
|
2243
|
+
throw new Error(`File not found: ${filePath}`);
|
|
2244
|
+
const maxChars = clampInt(args?.maxChars, 12000, 1000, 200000);
|
|
2245
|
+
const maxSlides = clampInt(args?.maxSlides, 60, 1, 500);
|
|
2246
|
+
const maxBytesPerSlide = clampInt(args?.maxBytesPerSlide, 10000000, 1000, 200_000_000);
|
|
2247
|
+
const listing = await zipListEntries(filePath, 5000);
|
|
2248
|
+
const slides = listing.entries
|
|
2249
|
+
.map((e) => e.fileName)
|
|
2250
|
+
.filter((n) => /^ppt\/slides\/slide\d+\.xml$/i.test(n))
|
|
2251
|
+
.map((n) => {
|
|
2252
|
+
const m = n.match(/slide(\d+)\.xml$/i);
|
|
2253
|
+
return { name: n, index: m ? Number.parseInt(m[1], 10) : 0 };
|
|
2254
|
+
})
|
|
2255
|
+
.filter((s) => Number.isFinite(s.index) && s.index > 0)
|
|
2256
|
+
.sort((a, b) => a.index - b.index)
|
|
2257
|
+
.slice(0, maxSlides);
|
|
2258
|
+
let text = "";
|
|
2259
|
+
for (const slide of slides) {
|
|
2260
|
+
const { buffer } = await zipReadEntryBuffer(filePath, slide.name, {
|
|
2261
|
+
maxBytes: maxBytesPerSlide,
|
|
2262
|
+
caseSensitive: true,
|
|
2263
|
+
});
|
|
2264
|
+
const xml = buffer.toString("utf8");
|
|
2265
|
+
const slideText = pptxSlideXmlToText(xml);
|
|
2266
|
+
text += `\n\n[SLIDE ${slide.index}]\n${slideText}\n`;
|
|
2267
|
+
if (text.length > maxChars)
|
|
2268
|
+
break;
|
|
2269
|
+
}
|
|
2270
|
+
text = text.trim();
|
|
2271
|
+
const truncated = text.length > maxChars;
|
|
2272
|
+
if (truncated)
|
|
2273
|
+
text = text.slice(0, maxChars);
|
|
2274
|
+
return {
|
|
2275
|
+
path: filePath,
|
|
2276
|
+
slideCount: slides.length,
|
|
2277
|
+
slidesIncluded: slides.map((s) => s.index),
|
|
2278
|
+
maxChars,
|
|
2279
|
+
truncated,
|
|
2280
|
+
text,
|
|
2281
|
+
};
|
|
2282
|
+
},
|
|
2283
|
+
},
|
|
385
2284
|
];
|
|
386
2285
|
//# sourceMappingURL=localFileTools.js.map
|