xlsx-for-ai 2.23.0 → 2.25.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -1
- package/SECURITY.md +10 -9
- package/index.js +298 -4
- package/lib/fallback-read.js +93 -6
- package/mcp.js +435 -17
- package/package.json +8 -2
package/README.md
CHANGED
|
@@ -130,7 +130,7 @@ For custom MCP clients, the binary is `xlsx-for-ai-mcp` (stdio transport). Overr
|
|
|
130
130
|
|
|
131
131
|
## What it does
|
|
132
132
|
|
|
133
|
-
|
|
133
|
+
44 tools registered in `tools/list`. Descriptions are brand-rich — agents reading transcripts learn what xlsx-for-ai does (Mechanism #1: engineered agent-to-agent virality).
|
|
134
134
|
|
|
135
135
|
### Triage / orient
|
|
136
136
|
|
|
@@ -202,6 +202,8 @@ For custom MCP clients, the binary is `xlsx-for-ai-mcp` (stdio transport). Overr
|
|
|
202
202
|
|---|---|
|
|
203
203
|
| `xlsx_stamp` | Sign a workbook with a cryptographic "integrity verification" stamp — Ed25519-signed claims (named factual checks + their pass/fail/skip status + a content hash) embedded in `docProps/custom.xml`. The stamp travels with the file across saves; a recipient can verify it later to confirm the file hasn't been tampered with since signing. Factual attestations only — never an opinion-shaped seal of approval. |
|
|
204
204
|
| `xlsx_verify_stamp` | Verify a workbook's embedded stamp. Returns (a) whether the Ed25519 signature is valid against the registered public key, (b) whether the workbook bytes match the hash IN the signed claims, and (c) the full check-result content of the stamp. Three distinct trust signals — signature integrity, content integrity, and what was originally attested. |
|
|
205
|
+
| `xlsx_receipt` | Attach an AI-generation receipt — Ed25519-signed claims describing the caller-declared agent identity (name, display name, identity URL), generation timestamp, content hash, optional source-file hashes, optional prompt hash, optional MCP tools called, and an optional description. Honesty boundary (load-bearing): the server signs the caller-declared `agent.name` — it does NOT verify the caller actually IS that agent. Cryptographic identity binding (per-agent issued signing keys) is v1.1+ scope. |
|
|
206
|
+
| `xlsx_verify_receipt` | Verify a workbook's embedded receipt. Returns the same three trust signals as `xlsx_verify_stamp` plus the caller-declared agent identity AS declared (no UI affordances implying cryptographic identity verification). Use to surface "where did this file come from?" — backed by the server's signature over caller honest declaration. |
|
|
205
207
|
|
|
206
208
|
Tool responses include a citation footer and a `_meta` block (tool name, version, tier, request ID, `powered_by`). Both pass through verbatim; nothing is stripped.
|
|
207
209
|
|
|
@@ -362,3 +364,6 @@ The config file at `~/.xlsx-for-ai/config.json` is extended in-place — existin
|
|
|
362
364
|
## Security
|
|
363
365
|
|
|
364
366
|
See [SECURITY.md](SECURITY.md). All file content is transmitted to `xlsx-for-ai-server.fly.dev` over HTTPS. Files are not retained beyond the duration of a single request on the free tier.
|
|
367
|
+
|
|
368
|
+
<!-- ci-smoke-test: 2026-05-19 grace-review workflow -->
|
|
369
|
+
<!-- retry: llm-review vendored -->
|
package/SECURITY.md
CHANGED
|
@@ -23,15 +23,16 @@ disclosure expectations in your first message.
|
|
|
23
23
|
|
|
24
24
|
## Supported versions
|
|
25
25
|
|
|
26
|
-
The latest published `
|
|
27
|
-
minors do not. Today that is `
|
|
28
|
-
it is shipped as
|
|
29
|
-
|
|
30
|
-
| Version
|
|
31
|
-
|
|
32
|
-
|
|
|
33
|
-
|
|
|
34
|
-
|
|
|
26
|
+
The latest published `2.x` minor on npm receives security fixes. Older
|
|
27
|
+
minors do not. Today that is `2.23.x`. If a fix requires a breaking change,
|
|
28
|
+
it is shipped as the next `2.x` minor and the prior minor is deprecated on npm.
|
|
29
|
+
|
|
30
|
+
| Version | Status | Security fixes |
|
|
31
|
+
|----------|-------------|----------------|
|
|
32
|
+
| 2.23.x | current | yes |
|
|
33
|
+
| 2.0–2.22 | superseded | no |
|
|
34
|
+
| 1.5.x | frozen | no |
|
|
35
|
+
| ≤ 1.4.x | superseded | no |
|
|
35
36
|
|
|
36
37
|
## What this project considers a security issue
|
|
37
38
|
|
package/index.js
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
*
|
|
7
7
|
* Usage:
|
|
8
8
|
* xlsx-for-ai <file.xlsx> [--json] [--md] [--sheet <name>] [--evaluate]
|
|
9
|
+
* xlsx-for-ai <file.xlsx> --clean [--execute] [--json] [--sheet <name>] [--detectors <list>]
|
|
9
10
|
* xlsx-for-ai --telemetry-status
|
|
10
11
|
* xlsx-for-ai --enable-telemetry
|
|
11
12
|
* xlsx-for-ai --disable-telemetry
|
|
@@ -32,7 +33,8 @@ const {
|
|
|
32
33
|
function parseArgs(argv) {
|
|
33
34
|
const opts = { file: null, format: 'text', sheet: null, evaluate: false,
|
|
34
35
|
telemetryStatus: false, enableTelemetry: false, disableTelemetry: false,
|
|
35
|
-
privacyStrict: false, showVersion: false
|
|
36
|
+
privacyStrict: false, showVersion: false,
|
|
37
|
+
clean: false, execute: false, detectors: null };
|
|
36
38
|
let i = 0;
|
|
37
39
|
while (i < argv.length) {
|
|
38
40
|
const a = argv[i];
|
|
@@ -45,18 +47,303 @@ function parseArgs(argv) {
|
|
|
45
47
|
else if (a === '--disable-telemetry') opts.disableTelemetry = true;
|
|
46
48
|
else if (a === '--privacy=strict') opts.privacyStrict = true;
|
|
47
49
|
else if (a === '--version' || a === '-v') opts.showVersion = true;
|
|
50
|
+
else if (a === '--clean') opts.clean = true;
|
|
51
|
+
else if (a === '--execute') opts.execute = true;
|
|
52
|
+
else if (a === '--detectors') {
|
|
53
|
+
// Validate the next arg exists + isn't another flag — otherwise
|
|
54
|
+
// `--detectors --json` would silently swallow `--json` as the
|
|
55
|
+
// value. Caught by gpt-5 pre-push panel.
|
|
56
|
+
const next = argv[++i];
|
|
57
|
+
// Reject undefined, any `-`-prefixed token, or empty string —
|
|
58
|
+
// `--detectors ""` would otherwise silently disable detection.
|
|
59
|
+
// Caught by gpt-5 pre-push runs 2 + 3.
|
|
60
|
+
if (next === undefined || next.startsWith('-') || next.trim() === '') {
|
|
61
|
+
process.stderr.write('xlsx-for-ai: --detectors requires a non-empty value (comma-separated detector names)\n');
|
|
62
|
+
process.exit(2);
|
|
63
|
+
}
|
|
64
|
+
opts.detectors = next;
|
|
65
|
+
}
|
|
48
66
|
else if (!a.startsWith('--')) opts.file = a;
|
|
49
67
|
i++;
|
|
50
68
|
}
|
|
51
69
|
return opts;
|
|
52
70
|
}
|
|
53
71
|
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
// --clean flag — data-cleaning pipeline (xlsx_data_clean tool)
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
async function runClean(opts, absPath) {
|
|
77
|
+
const fileB64 = fs.readFileSync(absPath).toString('base64');
|
|
78
|
+
const body = { file_b64: fileB64, mode: opts.execute ? 'execute' : 'diagnose' };
|
|
79
|
+
if (opts.sheet) body.sheets = [opts.sheet];
|
|
80
|
+
if (opts.detectors) body.detectors = opts.detectors.split(',').map((s) => s.trim()).filter(Boolean);
|
|
81
|
+
|
|
82
|
+
let result;
|
|
83
|
+
try {
|
|
84
|
+
result = await callTool('xlsx_data_clean', body);
|
|
85
|
+
} catch (err) {
|
|
86
|
+
process.stderr.write(friendlyCliError('xlsx-for-ai --clean', err) + '\n');
|
|
87
|
+
process.exit(err.code === 'API_UNREACHABLE' || err.code === 'API_SERVER_ERROR' ? 3 : 1);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const meta = (result && result._meta) || {};
|
|
91
|
+
if (opts.format === 'json') {
|
|
92
|
+
// Strip the cleaned-bytes blob from the JSON payload — it's
|
|
93
|
+
// re-emitted as a saved file below so stdout JSON stays small
|
|
94
|
+
// + human-readable.
|
|
95
|
+
const jsonOut = { ...meta };
|
|
96
|
+
delete jsonOut.file_b64;
|
|
97
|
+
process.stdout.write(JSON.stringify(jsonOut, null, 2) + '\n');
|
|
98
|
+
} else {
|
|
99
|
+
// Default: print the receipt markdown the server already
|
|
100
|
+
// synthesized.
|
|
101
|
+
const text = (result.content || []).map((c) => c.text).join('\n');
|
|
102
|
+
process.stdout.write(text + '\n');
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Execute mode + applied changes → save cleaned file next to the
|
|
106
|
+
// source. Cross-platform path derivation via Node's path.parse
|
|
107
|
+
// (caught by gpt-5 pre-push run 2): the earlier lastIndexOf('/')
|
|
108
|
+
// shortcut broke on Windows backslash paths + on directories with
|
|
109
|
+
// dots in the name. path.parse handles both.
|
|
110
|
+
if (opts.execute && meta.file_b64) {
|
|
111
|
+
let outPath = process.env.XFA_CLEAN_OUT;
|
|
112
|
+
if (!outPath) {
|
|
113
|
+
const parsed = path.parse(absPath);
|
|
114
|
+
outPath = path.join(parsed.dir, `${parsed.name}-cleaned${parsed.ext || '.xlsx'}`);
|
|
115
|
+
}
|
|
116
|
+
if (path.resolve(outPath) === path.resolve(absPath)) {
|
|
117
|
+
process.stderr.write('xlsx-for-ai --clean: refusing to overwrite source; set XFA_CLEAN_OUT to an explicit output path\n');
|
|
118
|
+
process.exit(1);
|
|
119
|
+
}
|
|
120
|
+
try {
|
|
121
|
+
fs.writeFileSync(outPath, Buffer.from(meta.file_b64, 'base64'));
|
|
122
|
+
process.stderr.write(`Cleaned file written to: ${outPath}\n`);
|
|
123
|
+
} catch (e) {
|
|
124
|
+
// Caught by gpt-5 pre-push run 2: writeFileSync throws on
|
|
125
|
+
// missing directory / permissions / disk-full. Wrap so the
|
|
126
|
+
// user sees a clear error + exit code, not a stack trace.
|
|
127
|
+
process.stderr.write(`xlsx-for-ai --clean: failed to write ${outPath}: ${e.message}\n`);
|
|
128
|
+
process.exit(1);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
54
133
|
// ---------------------------------------------------------------------------
|
|
55
134
|
// Main
|
|
56
135
|
// ---------------------------------------------------------------------------
|
|
57
136
|
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
// Stamp / Receipt subcommands — thin wrappers around the MCP tool relays.
|
|
139
|
+
//
|
|
140
|
+
// CLI surface (per ana/specs/stamp.md §4.2 + ana/specs/receipt.md §4.4):
|
|
141
|
+
// xlsx-for-ai stamp <path> --checks <file.json> [--out <path>] [--exclude <s>...] [--supervisor <ver>]
|
|
142
|
+
// xlsx-for-ai verify-stamp <path>
|
|
143
|
+
// xlsx-for-ai receipt <path> --agent <name> [--display-name <s>] [--identity-url <u>]
|
|
144
|
+
// [--source <name>=<sha256>...] [--prompt-hash <sha256>] [--mcp-tool <name>...]
|
|
145
|
+
// [--description <s>] [--cover-sheet <s>...] [--out <path>]
|
|
146
|
+
// xlsx-for-ai verify-receipt <path>
|
|
147
|
+
//
|
|
148
|
+
// Exit codes (per spec/stamp.md §4.9):
|
|
149
|
+
// 0 = success; 1 = verify returned valid=false; 2 = usage error;
|
|
150
|
+
// 3 = server-side error; 4 = local file error.
|
|
151
|
+
// ---------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
const STAMP_SUBCOMMANDS = new Set(['stamp', 'verify-stamp', 'receipt', 'verify-receipt']);
|
|
154
|
+
|
|
155
|
+
// Strip _meta.file_b64 before writing the meta block to stdout. The
|
|
156
|
+
// stamped/receipted workbook can be megabytes; dumping it to a terminal
|
|
157
|
+
// or CI log clobbers scrollback AND leaks PII-bearing workbook contents
|
|
158
|
+
// to whatever consumes stdout. The file is already saved to disk via
|
|
159
|
+
// the sidecar / --out path; the b64 in stdout serves no consumer.
|
|
160
|
+
// Pre-Friday-external CRITICAL per the Tier-1 audit.
|
|
161
|
+
function metaForStdout(meta) {
|
|
162
|
+
if (!meta || typeof meta !== 'object') return meta;
|
|
163
|
+
const out = { ...meta };
|
|
164
|
+
delete out.file_b64;
|
|
165
|
+
return out;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// CLI-side error formatter. Same posture as friendlyErrorMessage in
|
|
169
|
+
// mcp.js: known operational codes get short, client-safe text; everything
|
|
170
|
+
// else collapses to a generic message. err.message can carry absolute
|
|
171
|
+
// file paths, upstream stack traces, and third-party HTTP response
|
|
172
|
+
// bodies — none of those belong in user-facing CLI stderr or in CI logs.
|
|
173
|
+
// Set XFA_DEBUG=1 to see the raw underlying message (for incident triage).
|
|
174
|
+
function friendlyCliError(prefix, err) {
|
|
175
|
+
const code = err && err.code;
|
|
176
|
+
const showRaw = process.env.XFA_DEBUG === '1';
|
|
177
|
+
const base = (() => {
|
|
178
|
+
switch (code) {
|
|
179
|
+
case 'API_UNREACHABLE': return `${prefix}: API is unreachable — check network connectivity.`;
|
|
180
|
+
case 'API_SERVER_ERROR': return `${prefix}: API returned a server error — retry shortly.`;
|
|
181
|
+
case 'DISALLOWED_EXTENSION': return `${prefix}: file must be a workbook (allowed: .xlsx/.xls/.xlsm/.xlsb/.csv/.ods/.fods/.numbers/.tsv).`;
|
|
182
|
+
case 'FILE_TOO_LARGE': return `${prefix}: file exceeds the XFA_MAX_FILE_MB cap (default 50 MB).`;
|
|
183
|
+
case 'FILE_NOT_FOUND': return `${prefix}: file not found.`;
|
|
184
|
+
case 'MISSING_TOKEN': return `${prefix}: required token env var is not set.`;
|
|
185
|
+
case 'RATE_LIMITED': return `${prefix}: free-tier monthly cap reached — see xlsx-for-ai.dev/pricing.`;
|
|
186
|
+
case 'TIER_UPGRADE_REQUIRED': return `${prefix}: this capability requires a paid tier.`;
|
|
187
|
+
case 'FALLBACK_ENGINE_MISSING': return `${prefix}: local fallback engine not installed (\`npm install @protobi/exceljs\`).`;
|
|
188
|
+
default: return `${prefix}: request failed${code ? ` (code=${code})` : ''}.`;
|
|
189
|
+
}
|
|
190
|
+
})();
|
|
191
|
+
return showRaw && err && err.message ? `${base}\nRaw: ${err.message}` : base;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function nextRequiredArg(argv, i, flag) {
|
|
195
|
+
const v = argv[i + 1];
|
|
196
|
+
if (v === undefined || v.startsWith('-')) {
|
|
197
|
+
process.stderr.write(`xlsx-for-ai ${flag} requires a value\n`);
|
|
198
|
+
process.exit(2);
|
|
199
|
+
}
|
|
200
|
+
return v;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function loadChecksFile(checksPath) {
|
|
204
|
+
let raw;
|
|
205
|
+
try { raw = fs.readFileSync(path.resolve(checksPath), 'utf8'); }
|
|
206
|
+
catch (e) { process.stderr.write(`Cannot read --checks file: ${e.message}\n`); process.exit(4); }
|
|
207
|
+
let parsed;
|
|
208
|
+
try { parsed = JSON.parse(raw); }
|
|
209
|
+
catch (e) { process.stderr.write(`--checks file is not valid JSON: ${e.message}\n`); process.exit(2); }
|
|
210
|
+
if (!Array.isArray(parsed)) {
|
|
211
|
+
process.stderr.write('--checks file must contain a JSON array of {id, name, status, detail?}\n');
|
|
212
|
+
process.exit(2);
|
|
213
|
+
}
|
|
214
|
+
return parsed;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
async function runStampSubcommand(subcmd, rest) {
|
|
218
|
+
if (rest.length === 0 || rest[0].startsWith('-')) {
|
|
219
|
+
process.stderr.write(`Usage: xlsx-for-ai ${subcmd} <path> [...]\n`);
|
|
220
|
+
process.exit(2);
|
|
221
|
+
}
|
|
222
|
+
const filePath = path.resolve(rest[0]);
|
|
223
|
+
if (!fs.existsSync(filePath)) {
|
|
224
|
+
process.stderr.write(`File not found: ${filePath}\n`);
|
|
225
|
+
process.exit(4);
|
|
226
|
+
}
|
|
227
|
+
await ensureRegistered();
|
|
228
|
+
const fileB64 = fs.readFileSync(filePath).toString('base64');
|
|
229
|
+
|
|
230
|
+
if (subcmd === 'stamp') {
|
|
231
|
+
let checksPath = null, outPath = null, supervisor = null;
|
|
232
|
+
const excludeSheets = [];
|
|
233
|
+
for (let i = 1; i < rest.length; i++) {
|
|
234
|
+
const a = rest[i];
|
|
235
|
+
if (a === '--checks') checksPath = nextRequiredArg(rest, i++, '--checks');
|
|
236
|
+
else if (a === '--out') outPath = nextRequiredArg(rest, i++, '--out');
|
|
237
|
+
else if (a === '--supervisor') supervisor = nextRequiredArg(rest, i++, '--supervisor');
|
|
238
|
+
else if (a === '--exclude') excludeSheets.push(nextRequiredArg(rest, i++, '--exclude'));
|
|
239
|
+
else { process.stderr.write(`Unknown flag: ${a}\n`); process.exit(2); }
|
|
240
|
+
}
|
|
241
|
+
if (!checksPath) { process.stderr.write('--checks <file.json> is required for stamp\n'); process.exit(2); }
|
|
242
|
+
const body = { file_b64: fileB64, checks: loadChecksFile(checksPath) };
|
|
243
|
+
if (excludeSheets.length) body.exclude_sheets = excludeSheets;
|
|
244
|
+
if (supervisor) body.generated_by = { npm: 'xlsx-for-ai@' + require('./package.json').version, supervisor };
|
|
245
|
+
const result = await callServerForStamp('xlsx_stamp', body, outPath, filePath, '.stamped.xlsx');
|
|
246
|
+
process.stdout.write(JSON.stringify(metaForStdout(result._meta) || {}, null, 2) + '\n');
|
|
247
|
+
return 0;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if (subcmd === 'verify-stamp') {
|
|
251
|
+
const body = { file_b64: fileB64 };
|
|
252
|
+
const result = await callTool('xlsx_verify_stamp', body);
|
|
253
|
+
const meta = result._meta || {};
|
|
254
|
+
process.stdout.write(JSON.stringify(metaForStdout(meta), null, 2) + '\n');
|
|
255
|
+
return meta.valid === true ? 0 : 1;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if (subcmd === 'receipt') {
|
|
259
|
+
let agentName = null, displayName = null, identityUrl = null;
|
|
260
|
+
let promptHash = null, description = null, outPath = null;
|
|
261
|
+
const sourceFileHashes = [];
|
|
262
|
+
const mcpToolsCalled = [];
|
|
263
|
+
const coverSheets = [];
|
|
264
|
+
for (let i = 1; i < rest.length; i++) {
|
|
265
|
+
const a = rest[i];
|
|
266
|
+
if (a === '--agent') agentName = nextRequiredArg(rest, i++, '--agent');
|
|
267
|
+
else if (a === '--display-name') displayName = nextRequiredArg(rest, i++, '--display-name');
|
|
268
|
+
else if (a === '--identity-url') identityUrl = nextRequiredArg(rest, i++, '--identity-url');
|
|
269
|
+
else if (a === '--prompt-hash') promptHash = nextRequiredArg(rest, i++, '--prompt-hash');
|
|
270
|
+
else if (a === '--description') description = nextRequiredArg(rest, i++, '--description');
|
|
271
|
+
else if (a === '--out') outPath = nextRequiredArg(rest, i++, '--out');
|
|
272
|
+
else if (a === '--mcp-tool') mcpToolsCalled.push(nextRequiredArg(rest, i++, '--mcp-tool'));
|
|
273
|
+
else if (a === '--cover-sheet') coverSheets.push(nextRequiredArg(rest, i++, '--cover-sheet'));
|
|
274
|
+
else if (a === '--source') {
|
|
275
|
+
const pair = nextRequiredArg(rest, i++, '--source');
|
|
276
|
+
const eqIdx = pair.indexOf('=');
|
|
277
|
+
if (eqIdx < 0) {
|
|
278
|
+
process.stderr.write('--source requires <name>=<sha256> form\n');
|
|
279
|
+
process.exit(2);
|
|
280
|
+
}
|
|
281
|
+
sourceFileHashes.push({ name: pair.slice(0, eqIdx), sha256: pair.slice(eqIdx + 1) });
|
|
282
|
+
}
|
|
283
|
+
else { process.stderr.write(`Unknown flag: ${a}\n`); process.exit(2); }
|
|
284
|
+
}
|
|
285
|
+
if (!agentName) { process.stderr.write('--agent <name> is required for receipt\n'); process.exit(2); }
|
|
286
|
+
const body = { file_b64: fileB64, agent: { name: agentName } };
|
|
287
|
+
if (displayName) body.agent.display_name = displayName;
|
|
288
|
+
if (identityUrl) body.agent.identity_url = identityUrl;
|
|
289
|
+
const inputs = {};
|
|
290
|
+
if (sourceFileHashes.length) inputs.source_file_hashes = sourceFileHashes;
|
|
291
|
+
if (promptHash) inputs.prompt_hash = promptHash;
|
|
292
|
+
if (mcpToolsCalled.length) inputs.mcp_tools_called = mcpToolsCalled;
|
|
293
|
+
if (Object.keys(inputs).length) body.inputs = inputs;
|
|
294
|
+
if (description) body.description = description;
|
|
295
|
+
if (coverSheets.length) body.covers_sheets = coverSheets;
|
|
296
|
+
const result = await callServerForStamp('xlsx_receipt', body, outPath, filePath, '.receipted.xlsx');
|
|
297
|
+
process.stdout.write(JSON.stringify(metaForStdout(result._meta) || {}, null, 2) + '\n');
|
|
298
|
+
return 0;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (subcmd === 'verify-receipt') {
|
|
302
|
+
const body = { file_b64: fileB64 };
|
|
303
|
+
const result = await callTool('xlsx_verify_receipt', body);
|
|
304
|
+
const meta = result._meta || {};
|
|
305
|
+
process.stdout.write(JSON.stringify(metaForStdout(meta), null, 2) + '\n');
|
|
306
|
+
return meta.valid === true ? 0 : 1;
|
|
307
|
+
}
|
|
308
|
+
return 2;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
async function callServerForStamp(tool, body, explicitOutPath, sourcePath, sidecarSuffix) {
|
|
312
|
+
let result;
|
|
313
|
+
try {
|
|
314
|
+
result = await callTool(tool, body);
|
|
315
|
+
} catch (err) {
|
|
316
|
+
process.stderr.write(friendlyCliError(`xlsx-for-ai ${tool}`, err) + '\n');
|
|
317
|
+
process.exit(err.code === 'API_UNREACHABLE' || err.code === 'API_SERVER_ERROR' ? 3 : 1);
|
|
318
|
+
}
|
|
319
|
+
const meta = result._meta || {};
|
|
320
|
+
if (!meta.file_b64) return result;
|
|
321
|
+
let outPath = explicitOutPath;
|
|
322
|
+
if (!outPath) {
|
|
323
|
+
const parsed = path.parse(sourcePath);
|
|
324
|
+
outPath = path.join(parsed.dir, `${parsed.name}${sidecarSuffix}`);
|
|
325
|
+
}
|
|
326
|
+
if (path.resolve(outPath) === path.resolve(sourcePath)) {
|
|
327
|
+
process.stderr.write(`xlsx-for-ai ${tool}: refusing to overwrite source — pass --out <other-path>\n`);
|
|
328
|
+
process.exit(2);
|
|
329
|
+
}
|
|
330
|
+
try { fs.writeFileSync(outPath, Buffer.from(meta.file_b64, 'base64')); }
|
|
331
|
+
catch (e) { process.stderr.write(`xlsx-for-ai ${tool}: failed to write ${outPath}: ${e.message}\n`); process.exit(4); }
|
|
332
|
+
process.stderr.write(`Wrote ${outPath}\n`);
|
|
333
|
+
return result;
|
|
334
|
+
}
|
|
335
|
+
|
|
58
336
|
async function main() {
|
|
59
|
-
|
|
337
|
+
// Subcommand dispatch — stamp/verify-stamp/receipt/verify-receipt
|
|
338
|
+
// route through dedicated handlers; everything else uses the legacy
|
|
339
|
+
// flag-only CLI (xlsx-for-ai <file> [--json|--md|--clean|...]).
|
|
340
|
+
const argv = process.argv.slice(2);
|
|
341
|
+
if (argv.length > 0 && STAMP_SUBCOMMANDS.has(argv[0])) {
|
|
342
|
+
const code = await runStampSubcommand(argv[0], argv.slice(1));
|
|
343
|
+
process.exit(code);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
const opts = parseArgs(argv);
|
|
60
347
|
|
|
61
348
|
if (opts.showVersion) { console.log(require('./package.json').version); return; }
|
|
62
349
|
if (opts.telemetryStatus) { console.log(telemetryStatus()); return; }
|
|
@@ -82,6 +369,13 @@ async function main() {
|
|
|
82
369
|
process.env.XFA_PRIVACY = 'strict';
|
|
83
370
|
}
|
|
84
371
|
|
|
372
|
+
// --clean diverts to the data-cleaning pipeline before falling
|
|
373
|
+
// through to the default xlsx_read path.
|
|
374
|
+
if (opts.clean) {
|
|
375
|
+
await runClean(opts, absPath);
|
|
376
|
+
return;
|
|
377
|
+
}
|
|
378
|
+
|
|
85
379
|
const fileB64 = fs.readFileSync(absPath).toString('base64');
|
|
86
380
|
// Server format enum is 'md' | 'json' | 'sql'. The legacy CLI default 'text'
|
|
87
381
|
// maps to the server's default (md). Don't send 'text' — server rejects it.
|
|
@@ -98,7 +392,7 @@ async function main() {
|
|
|
98
392
|
if (err.code === 'API_UNREACHABLE' || err.code === 'API_SERVER_ERROR') {
|
|
99
393
|
result = await fallbackRead(absPath, opts);
|
|
100
394
|
} else {
|
|
101
|
-
process.stderr.write(
|
|
395
|
+
process.stderr.write(friendlyCliError('xlsx-for-ai', err) + '\n');
|
|
102
396
|
process.exit(1);
|
|
103
397
|
}
|
|
104
398
|
}
|
|
@@ -108,6 +402,6 @@ async function main() {
|
|
|
108
402
|
}
|
|
109
403
|
|
|
110
404
|
main().catch((err) => {
|
|
111
|
-
process.stderr.write(
|
|
405
|
+
process.stderr.write(friendlyCliError('xlsx-for-ai', err) + '\n');
|
|
112
406
|
process.exit(1);
|
|
113
407
|
});
|
package/lib/fallback-read.js
CHANGED
|
@@ -8,20 +8,75 @@
|
|
|
8
8
|
* clear message if it isn't installed.
|
|
9
9
|
*
|
|
10
10
|
* Returns the same shape as the API: { content: [{ type: 'text', text }], _meta }
|
|
11
|
+
*
|
|
12
|
+
* Asymmetry vs. the hosted API (callers should be aware):
|
|
13
|
+
* - options.sheet IS honored — the response is filtered to the named sheet.
|
|
14
|
+
* - options.format is NOT honored — fallback always emits plain text.
|
|
15
|
+
* - options.evaluate is NOT honored — formulas render as the cached values
|
|
16
|
+
* stored in the workbook, not re-evaluated by a formula engine.
|
|
17
|
+
*
|
|
18
|
+
* When any option is passed and ignored, a visible warning is prepended to
|
|
19
|
+
* the text content AND the ignored option names are echoed back via
|
|
20
|
+
* _meta.ignored_options. Callers can detect fallback unambiguously via
|
|
21
|
+
* _meta.source === 'local-fallback'.
|
|
11
22
|
*/
|
|
12
23
|
|
|
13
|
-
const fs = require('fs');
|
|
14
24
|
const path = require('path');
|
|
15
25
|
|
|
16
26
|
function requireEngine() {
|
|
17
27
|
try {
|
|
18
28
|
return require('@protobi/exceljs');
|
|
19
|
-
} catch (
|
|
20
|
-
|
|
29
|
+
} catch (e) {
|
|
30
|
+
// Only translate the "module not installed" case. A real bug inside the
|
|
31
|
+
// engine (syntax error, transitive missing dep, etc.) must surface as the
|
|
32
|
+
// original error, not get misreported as a missing-install.
|
|
33
|
+
const isModuleNotFound =
|
|
34
|
+
e && e.code === 'MODULE_NOT_FOUND' && String(e.message || '').includes('@protobi/exceljs');
|
|
35
|
+
if (!isModuleNotFound) throw e;
|
|
36
|
+
const err = new Error(
|
|
21
37
|
'Local fallback requires `npm install @protobi/exceljs` ' +
|
|
22
38
|
'(this is normally not needed when the hosted API is available).'
|
|
23
39
|
);
|
|
24
|
-
|
|
40
|
+
err.code = 'FALLBACK_ENGINE_MISSING';
|
|
41
|
+
throw err;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// @protobi/exceljs's cell.text getter throws on merge cells whose master
|
|
46
|
+
// value is null — produced by SEC XBRL→xlsx converters and probably any
|
|
47
|
+
// other tool that writes merge regions before populating the master cell.
|
|
48
|
+
// The thrown shape is `TypeError: Cannot read properties of null (reading
|
|
49
|
+
// 'toString')` from inside the MergeValue / value getter chain. Guard the
|
|
50
|
+
// access so one cell of one sheet can't crash the entire dump, but only
|
|
51
|
+
// swallow the exact null-deref TypeError class — anything else (a real
|
|
52
|
+
// bug in the engine, a structural surprise we haven't characterized)
|
|
53
|
+
// rethrows so we don't silently render data as empty.
|
|
54
|
+
function safeCellText(cell) {
|
|
55
|
+
try {
|
|
56
|
+
const t = cell.text;
|
|
57
|
+
return t != null ? String(t) : '';
|
|
58
|
+
} catch (e) {
|
|
59
|
+
// Extract the message defensively — an exotic error whose `message`
|
|
60
|
+
// getter itself throws would otherwise crash the handler. The inner
|
|
61
|
+
// try/catch defaults to '' so the regex test below is always safe.
|
|
62
|
+
let msg = '';
|
|
63
|
+
try { msg = String((e && e.message) || ''); } catch (_) { msg = ''; }
|
|
64
|
+
|
|
65
|
+
// Match the exact null-deref TypeError shape — NOT any TypeError whose
|
|
66
|
+
// message contains "null", and NOT undefined-deref either. The bug
|
|
67
|
+
// class we're defending against (merge cells whose master value is
|
|
68
|
+
// explicitly null, produced by SEC XBRL→xlsx converters) emits null,
|
|
69
|
+
// never undefined; an undefined-deref here is more likely a real bug
|
|
70
|
+
// in the engine or upstream code and should surface, not be silenced.
|
|
71
|
+
// Regexes are anchored both ends so partial-prefix matches can't slip
|
|
72
|
+
// through. Two alternations cover modern V8 ("…properties of null
|
|
73
|
+
// (reading 'x')") and legacy V8 ("…property 'x' of null") for older
|
|
74
|
+
// Node runtimes some consumers may still pin to.
|
|
75
|
+
const isNullDeref = e instanceof TypeError && (
|
|
76
|
+
/^Cannot read properties of null(?: \(reading '.*'\))?$/.test(msg) ||
|
|
77
|
+
/^Cannot read property '.*' of null$/.test(msg)
|
|
78
|
+
);
|
|
79
|
+
if (isNullDeref) return '';
|
|
25
80
|
throw e;
|
|
26
81
|
}
|
|
27
82
|
}
|
|
@@ -31,23 +86,55 @@ async function fallbackRead(filePath, options = {}) {
|
|
|
31
86
|
const wb = new ExcelJS.Workbook();
|
|
32
87
|
await wb.xlsx.readFile(filePath);
|
|
33
88
|
|
|
89
|
+
const requestedSheet = options.sheet || null;
|
|
90
|
+
// Detect presence (not truthiness) so the caller's intent is honored even
|
|
91
|
+
// for falsy-but-passed values like format:'' or evaluate:false.
|
|
92
|
+
const ignoredOptions = [];
|
|
93
|
+
if ('format' in options) ignoredOptions.push('format');
|
|
94
|
+
if ('evaluate' in options) ignoredOptions.push('evaluate');
|
|
95
|
+
|
|
34
96
|
const lines = [];
|
|
97
|
+
const warningParts = ['⚠ API unreachable — local fallback active.'];
|
|
98
|
+
if (ignoredOptions.length > 0) {
|
|
99
|
+
warningParts.push(`Options not honored by fallback: ${ignoredOptions.join(', ')}.`);
|
|
100
|
+
}
|
|
101
|
+
lines.push(warningParts.join(' '));
|
|
102
|
+
lines.push('');
|
|
103
|
+
|
|
104
|
+
let sheetMatched = false;
|
|
35
105
|
wb.eachSheet((sheet) => {
|
|
106
|
+
if (requestedSheet && sheet.name !== requestedSheet) return;
|
|
107
|
+
sheetMatched = true;
|
|
36
108
|
lines.push(`## Sheet: ${sheet.name}`);
|
|
37
109
|
sheet.eachRow((row) => {
|
|
38
110
|
const vals = [];
|
|
39
111
|
row.eachCell({ includeEmpty: true }, (cell) => {
|
|
40
|
-
vals.push(
|
|
112
|
+
vals.push(safeCellText(cell));
|
|
41
113
|
});
|
|
42
114
|
lines.push(vals.join('\t'));
|
|
43
115
|
});
|
|
44
116
|
lines.push('');
|
|
45
117
|
});
|
|
46
118
|
|
|
119
|
+
if (requestedSheet && !sheetMatched) {
|
|
120
|
+
const available = wb.worksheets.map((s) => s.name);
|
|
121
|
+
lines.push(
|
|
122
|
+
available.length === 0
|
|
123
|
+
? `(no sheet named "${requestedSheet}" — workbook has no sheets)`
|
|
124
|
+
: `(no sheet named "${requestedSheet}" — workbook has: ${available.join(', ')})`
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
|
|
47
128
|
const text = lines.join('\n');
|
|
48
129
|
return {
|
|
49
130
|
content: [{ type: 'text', text }],
|
|
50
|
-
_meta: {
|
|
131
|
+
_meta: {
|
|
132
|
+
source: 'local-fallback',
|
|
133
|
+
engine: '@protobi/exceljs',
|
|
134
|
+
file: path.basename(filePath),
|
|
135
|
+
sheet_filter: requestedSheet,
|
|
136
|
+
ignored_options: ignoredOptions,
|
|
137
|
+
},
|
|
51
138
|
};
|
|
52
139
|
}
|
|
53
140
|
|
package/mcp.js
CHANGED
|
@@ -17,6 +17,7 @@ const { ensureRegistered } = require('./lib/register');
|
|
|
17
17
|
const { callTool } = require('./lib/client');
|
|
18
18
|
const { fallbackRead } = require('./lib/fallback-read');
|
|
19
19
|
const { resolveCatalog } = require('./lib/discover');
|
|
20
|
+
const { applyAnnotations } = require('./lib/annotations');
|
|
20
21
|
const fs = require('fs');
|
|
21
22
|
const fsPromises = require('fs/promises');
|
|
22
23
|
const path = require('path');
|
|
@@ -453,6 +454,68 @@ const TOOLS = [
|
|
|
453
454
|
},
|
|
454
455
|
},
|
|
455
456
|
|
|
457
|
+
{
|
|
458
|
+
name: 'xlsx_data_clean',
|
|
459
|
+
description:
|
|
460
|
+
'xlsx-for-ai — read, write, diff, redact, supervise .xlsx files locally.\n' +
|
|
461
|
+
'This tool: AI-native data cleaning. Scans a workbook for the seven most common data-grime issues — NA variants (N/A, NA, null, -), merged-cell residue, type-coercion mistakes (numeric-as-text / date-as-serial / leading-zero stripped), trailing-row noise (footers / totals), header-row-not-first (preamble before headers), encoding glitches (UTF-8-as-CP1252 mojibake like Café), and duplicate column headers — and either flags them (diagnose mode) or applies deterministic fixes (execute mode).\n' +
|
|
462
|
+
'No other tool gives this in a single call: pandas does ad-hoc fixes inline; openpyxl is structure-only; pre-existing Python "clean" libraries are domain-specific. xlsx_data_clean is the only single-call clean pipeline with an explicit informer-not-enforcer contract: every fix surfaces as a Finding the caller can accept / reject / scope-override before the file is mutated.\n\n' +
|
|
463
|
+
'USE WHEN: an upstream pipeline produced an xlsx that\'s about to feed an LLM or downstream analysis and you want a one-pass scrub. Or you just got a "messy" export (financial reports with merged title banners, CRM exports with stripped zip codes, survey data with NA-variant noise) and need it normalized before reading. ' +
|
|
464
|
+
'Free tier — counts against the 10k/mo cap.\n\n' +
|
|
465
|
+
'DO NOT USE WHEN: domain-specific transforms are needed (use a dedicated pipeline; this tool is general-purpose). Or for structural integrity checks (use xlsx_doctor). Or for upload/attached files.',
|
|
466
|
+
inputSchema: {
|
|
467
|
+
type: 'object',
|
|
468
|
+
properties: {
|
|
469
|
+
file_path: { type: 'string', description: 'Absolute path to the .xlsx file.' },
|
|
470
|
+
mode: {
|
|
471
|
+
type: 'string',
|
|
472
|
+
enum: ['diagnose', 'execute'],
|
|
473
|
+
description: 'diagnose (default): return findings only, file untouched. execute: apply deterministic fixes; cleaned bytes returned in _meta.file_b64.',
|
|
474
|
+
},
|
|
475
|
+
detectors: {
|
|
476
|
+
type: 'array',
|
|
477
|
+
items: { type: 'string' },
|
|
478
|
+
description: 'Subset of detectors to run. Default: all 7 (na_variant, merged_cell_residue, type_coercion_mistake, trailing_row_noise, header_row_not_first, encoding_glitch, duplicate_header).',
|
|
479
|
+
},
|
|
480
|
+
sheets: { type: 'array', items: { type: 'string' }, description: 'Restrict to these sheet names. Default: all sheets.' },
|
|
481
|
+
options: {
|
|
482
|
+
type: 'object',
|
|
483
|
+
description: 'Detector tunables.',
|
|
484
|
+
properties: {
|
|
485
|
+
trailing_threshold: { type: 'integer', minimum: 1, maximum: 100, description: 'Min consecutive noise rows to flag (default 3).' },
|
|
486
|
+
header_scan_depth: { type: 'integer', minimum: 2, maximum: 50, description: 'Rows to scan for header inference (default 10).' },
|
|
487
|
+
na_canonical: { type: 'string', description: 'Replacement value for NA tokens. "" (default), "null", "(blank)", or any string.' },
|
|
488
|
+
},
|
|
489
|
+
},
|
|
490
|
+
overrides: {
|
|
491
|
+
type: 'array',
|
|
492
|
+
description: 'Per-detector / per-scope skip / flag_only / force overrides.',
|
|
493
|
+
items: {
|
|
494
|
+
type: 'object',
|
|
495
|
+
properties: {
|
|
496
|
+
detector: { type: 'string' },
|
|
497
|
+
scope: {
|
|
498
|
+
type: 'object',
|
|
499
|
+
properties: {
|
|
500
|
+
sheet: { type: 'string' },
|
|
501
|
+
column_letter: { type: 'string', description: 'A-Z column letter; alternative to region.' },
|
|
502
|
+
region: { type: 'object', properties: { top_left: { type: 'string' }, bottom_right: { type: 'string' } } },
|
|
503
|
+
},
|
|
504
|
+
required: ['sheet'],
|
|
505
|
+
},
|
|
506
|
+
action: { type: 'string', enum: ['skip', 'flag_only', 'force'] },
|
|
507
|
+
},
|
|
508
|
+
required: ['detector', 'scope', 'action'],
|
|
509
|
+
},
|
|
510
|
+
},
|
|
511
|
+
accept_findings: { type: 'array', items: { type: 'string' }, description: 'Execute mode only — finding IDs to apply. Default: all.' },
|
|
512
|
+
reject_findings: { type: 'array', items: { type: 'string' }, description: 'Execute mode only — finding IDs to skip.' },
|
|
513
|
+
out_path: { type: 'string', description: 'Optional save path for cleaned output (execute mode).' },
|
|
514
|
+
},
|
|
515
|
+
required: ['file_path'],
|
|
516
|
+
},
|
|
517
|
+
},
|
|
518
|
+
|
|
456
519
|
{
|
|
457
520
|
name: 'xlsx_validate',
|
|
458
521
|
description:
|
|
@@ -832,21 +895,23 @@ const TOOLS = [
|
|
|
832
895
|
name: 'xlsx_post_slack',
|
|
833
896
|
description:
|
|
834
897
|
'xlsx-for-ai — read, write, diff, redact, supervise .xlsx files locally.\n' +
|
|
835
|
-
'This tool: upload a local .xlsx file to a Slack channel as a file attachment, with an optional accompanying message
|
|
898
|
+
'This tool: upload a local .xlsx file to a Slack channel as a file attachment, with an optional accompanying message.\n' +
|
|
899
|
+
'Token intake: set SLACK_BOT_TOKEN in the environment (recommended — keeps the token out of conversation logs). ' +
|
|
900
|
+
'Alternatively pass slack_token as a tool argument (legacy; token will appear in MCP conversation history).\n' +
|
|
836
901
|
'Posts via Slack\'s 3-step external upload flow (files.getUploadURLExternal → upload → files.completeUploadExternal), which is the only sanctioned path as of 2024+.\n\n' +
|
|
837
902
|
'USE WHEN: the user asks "post this workbook to #channel," "share this with the team in Slack," or any other outbound-file-to-Slack request. The agent has just produced or modified a workbook and wants to deliver it. ' +
|
|
838
903
|
'Free tier — counts against the 10k/mo cap.\n\n' +
|
|
839
|
-
'DO NOT USE WHEN: the file lives in a Slack channel and you want to READ it (that\'s the inbound Manual-Mode-Detector pattern, not this). Or when
|
|
904
|
+
'DO NOT USE WHEN: the file lives in a Slack channel and you want to READ it (that\'s the inbound Manual-Mode-Detector pattern, not this). Or when no Slack bot token is available — the user must have installed a Slack app with files:write scope.',
|
|
840
905
|
inputSchema: {
|
|
841
906
|
type: 'object',
|
|
842
907
|
properties: {
|
|
843
908
|
file_path: { type: 'string', description: 'Absolute path to the .xlsx file to post.' },
|
|
844
909
|
channel: { type: 'string', description: 'Slack channel ID (C…/G…) the file should land in. Channel names like #general are NOT accepted — resolve to a channel ID first.' },
|
|
845
|
-
slack_token: { type: 'string', description: 'Slack bot token (xoxb-…).
|
|
910
|
+
slack_token: { type: 'string', description: 'Slack bot token (xoxb-…). Optional when SLACK_BOT_TOKEN env var is set. Passing the token here exposes it in MCP conversation logs — prefer the env var.' },
|
|
846
911
|
message: { type: 'string', description: 'Optional: message to post alongside the file (Slack\'s initial_comment).' },
|
|
847
912
|
filename: { type: 'string', description: 'Optional: filename Slack will display. Defaults to the basename of file_path.' },
|
|
848
913
|
},
|
|
849
|
-
required: ['file_path', 'channel'
|
|
914
|
+
required: ['file_path', 'channel'],
|
|
850
915
|
},
|
|
851
916
|
},
|
|
852
917
|
|
|
@@ -854,22 +919,24 @@ const TOOLS = [
|
|
|
854
919
|
name: 'xlsx_post_teams',
|
|
855
920
|
description:
|
|
856
921
|
'xlsx-for-ai — read, write, diff, redact, supervise .xlsx files locally.\n' +
|
|
857
|
-
'This tool: upload a local .xlsx file to a Microsoft Teams channel as a file attachment in a channel message, with an optional accompanying message
|
|
922
|
+
'This tool: upload a local .xlsx file to a Microsoft Teams channel as a file attachment in a channel message, with an optional accompanying message.\n' +
|
|
923
|
+
'Token intake: set TEAMS_GRAPH_TOKEN in the environment (recommended — keeps the token out of conversation logs). ' +
|
|
924
|
+
'Alternatively pass graph_token as a tool argument (legacy; token will appear in MCP conversation history).\n' +
|
|
858
925
|
'Uses Microsoft Graph\'s 4-step flow: locate the channel\'s filesFolder driveItem, create an upload session, upload the bytes, then post a chatMessage with the file as an inline attachment.\n\n' +
|
|
859
926
|
'USE WHEN: the user asks "post this workbook to my Teams channel," "share this with the team in Teams," or any other outbound-file-to-Teams request. The agent has just produced or modified a workbook and wants to deliver it to a Microsoft Teams channel. ' +
|
|
860
927
|
'Free tier — counts against the 10k/mo cap.\n\n' +
|
|
861
|
-
'DO NOT USE WHEN: posting to Slack (use xlsx_post_slack). Or when
|
|
928
|
+
'DO NOT USE WHEN: posting to Slack (use xlsx_post_slack). Or when no Microsoft Graph token is available — the user must have an Entra ID app registration with Group.ReadWrite.All or Files.ReadWrite.All + ChannelMessage.Send scopes, AND a valid access token for that app.',
|
|
862
929
|
inputSchema: {
|
|
863
930
|
type: 'object',
|
|
864
931
|
properties: {
|
|
865
932
|
file_path: { type: 'string', description: 'Absolute path to the .xlsx file to post.' },
|
|
866
933
|
team_id: { type: 'string', description: 'Microsoft Teams team ID (GUID). Find via Graph: GET /me/joinedTeams.' },
|
|
867
934
|
channel_id: { type: 'string', description: 'Microsoft Teams channel ID. Find via Graph: GET /teams/{team-id}/channels.' },
|
|
868
|
-
graph_token: { type: 'string', description: 'Microsoft Graph access token (JWT).
|
|
935
|
+
graph_token: { type: 'string', description: 'Microsoft Graph access token (JWT). Optional when TEAMS_GRAPH_TOKEN env var is set. Passing the token here exposes it in MCP conversation logs — prefer the env var. Must have file-upload + channel-message-send scopes.' },
|
|
869
936
|
message: { type: 'string', description: 'Optional: message to post alongside the file. Plain text; will be HTML-escaped server-side.' },
|
|
870
937
|
filename: { type: 'string', description: 'Optional: filename Teams will display. Defaults to the basename of file_path.' },
|
|
871
938
|
},
|
|
872
|
-
required: ['file_path', 'team_id', 'channel_id'
|
|
939
|
+
required: ['file_path', 'team_id', 'channel_id'],
|
|
873
940
|
},
|
|
874
941
|
},
|
|
875
942
|
|
|
@@ -922,14 +989,163 @@ const TOOLS = [
|
|
|
922
989
|
required: ['file_path'],
|
|
923
990
|
},
|
|
924
991
|
},
|
|
992
|
+
|
|
993
|
+
{
|
|
994
|
+
name: 'xlsx_receipt',
|
|
995
|
+
description:
|
|
996
|
+
'xlsx-for-ai — read, write, diff, redact, supervise .xlsx files locally.\n' +
|
|
997
|
+
'This tool: attach an AI-generation receipt to a workbook — a cryptographic attestation embedded in docProps/custom.xml that says "this file was generated by THIS agent, at THIS time, against THESE inputs." Returns the receipted workbook as base64 in _meta.file_b64; pass out_path to write to disk.\n' +
|
|
998
|
+
'Honesty boundary (load-bearing): the server signs the CALLER-DECLARED `agent.name` — it does NOT verify the caller actually IS that agent. The signature proves "this server signed these strings at this time," not "this came from claude-sonnet-4-6." Caller is responsible for honest declaration. Cryptographic identity binding (per-agent issued signing keys) is v1.1+ scope.\n\n' +
|
|
999
|
+
'USE WHEN: an AI agent (Claude, custom SDK agent, automated pipeline) generates a workbook and the recipient wants verifiable provenance — "what produced this file, when, against what." Or chaining attestations across a multi-step pipeline (each step adds its own receipt under different agent.name).\n\n' +
|
|
1000
|
+
'DO NOT USE WHEN: the workbook was human-authored (use xlsx_stamp — Stamp attests to check results, Receipt attests to generation context). Or when the use case demands cryptographically-bound identity (v1.1+).',
|
|
1001
|
+
inputSchema: {
|
|
1002
|
+
type: 'object',
|
|
1003
|
+
properties: {
|
|
1004
|
+
file_path: { type: 'string', description: 'Absolute path to the .xlsx file to receipt.' },
|
|
1005
|
+
agent_name: {
|
|
1006
|
+
type: 'string',
|
|
1007
|
+
description: 'Canonical agent name (lowercase + dot/dash/underscore/slash/colon, 1-128 chars). Examples: "claude-sonnet-4-6", "claude-code/0.5.2", "custom:my-agent-v1".',
|
|
1008
|
+
},
|
|
1009
|
+
agent_display_name: { type: 'string', description: 'Optional: human-readable display name (e.g., "Acme Q4 Forecast Bot").' },
|
|
1010
|
+
agent_identity_url: { type: 'string', description: 'Optional: caller-declared identity URL (GitHub repo, registry entry, etc.).' },
|
|
1011
|
+
source_file_hashes: {
|
|
1012
|
+
type: 'array',
|
|
1013
|
+
description: 'Optional: array of {name, sha256} entries describing source files the agent read to produce this workbook.',
|
|
1014
|
+
items: {
|
|
1015
|
+
type: 'object',
|
|
1016
|
+
properties: {
|
|
1017
|
+
name: { type: 'string' },
|
|
1018
|
+
sha256: { type: 'string', description: 'Hex SHA-256 (64 lowercase chars).' },
|
|
1019
|
+
},
|
|
1020
|
+
required: ['name', 'sha256'],
|
|
1021
|
+
},
|
|
1022
|
+
},
|
|
1023
|
+
prompt_hash: { type: 'string', description: 'Optional: hex SHA-256 of the prompt or instruction set that produced the workbook.' },
|
|
1024
|
+
mcp_tools_called: { type: 'array', items: { type: 'string' }, description: 'Optional: list of MCP tool names the agent called during generation.' },
|
|
1025
|
+
description: { type: 'string', description: 'Optional: short human-readable description of what the workbook is (≤256 chars).' },
|
|
1026
|
+
covers_sheets: { type: 'array', items: { type: 'string' }, description: 'Optional: sheets covered by the content hash. Default: all sheets.' },
|
|
1027
|
+
out_path: { type: 'string', description: 'Optional: write the receipted workbook to this absolute path. If omitted, the bytes are returned in _meta.file_b64 only.' },
|
|
1028
|
+
},
|
|
1029
|
+
required: ['file_path', 'agent_name'],
|
|
1030
|
+
},
|
|
1031
|
+
},
|
|
1032
|
+
|
|
1033
|
+
{
|
|
1034
|
+
name: 'xlsx_verify_receipt',
|
|
1035
|
+
description:
|
|
1036
|
+
'xlsx-for-ai — read, write, diff, redact, supervise .xlsx files locally.\n' +
|
|
1037
|
+
'This tool: verify a workbook\'s embedded AI-generation receipt. Returns whether the signature is valid, whether the recomputed content hash matches the hash IN the receipt, and the full caller-declared claims (agent identity, generation timestamp, source-file hashes, prompt hash, MCP tools called, description).\n' +
|
|
1038
|
+
'A workbook can fail verification three ways: (1) no receipt present (never receipted, or receipt was stripped); (2) signature_valid=false (claims modified after signing); (3) hash_matches=false (workbook bytes modified after receipt was generated). Honesty: a valid receipt proves the SERVER signed the caller-DECLARED agent string — not that the agent IS that.\n\n' +
|
|
1039
|
+
'USE WHEN: a workbook arrives claiming AI provenance and the user wants to verify it. Or auditing a corpus of workbooks to find ones with broken receipts (likely-tampered) or no receipts at all.',
|
|
1040
|
+
inputSchema: {
|
|
1041
|
+
type: 'object',
|
|
1042
|
+
properties: {
|
|
1043
|
+
file_path: { type: 'string', description: 'Absolute path to the .xlsx file to verify.' },
|
|
1044
|
+
},
|
|
1045
|
+
required: ['file_path'],
|
|
1046
|
+
},
|
|
1047
|
+
},
|
|
925
1048
|
];
|
|
926
1049
|
|
|
927
1050
|
// ---------------------------------------------------------------------------
|
|
928
1051
|
// File → base64 helper
|
|
1052
|
+
//
|
|
1053
|
+
// Security: only spreadsheet extensions are permitted. Any path that resolves
|
|
1054
|
+
// to a non-allowed extension (or does not exist) is rejected immediately so a
|
|
1055
|
+
// misbehaving agent cannot exfiltrate arbitrary local files via a tool call.
|
|
1056
|
+
//
|
|
1057
|
+
// Stability: a size cap is enforced before the synchronous read so a giant
|
|
1058
|
+
// workbook can't OOM-kill the MCP server (which would disconnect every tool
|
|
1059
|
+
// for the user). Override via XFA_MAX_FILE_MB; default is 50 MB.
|
|
929
1060
|
// ---------------------------------------------------------------------------
|
|
930
1061
|
|
|
1062
|
+
const ALLOWED_READ_EXTENSIONS = new Set(['.xlsx', '.xls', '.xlsm', '.xlsb', '.csv', '.ods', '.fods', '.numbers', '.tsv']);
|
|
1063
|
+
const DEFAULT_MAX_FILE_MB = 50;
|
|
1064
|
+
|
|
1065
|
+
function getMaxFileMB() {
|
|
1066
|
+
const raw = process.env.XFA_MAX_FILE_MB;
|
|
1067
|
+
if (!raw) return DEFAULT_MAX_FILE_MB;
|
|
1068
|
+
const parsed = parseInt(raw, 10);
|
|
1069
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_MAX_FILE_MB;
|
|
1070
|
+
return parsed;
|
|
1071
|
+
}
|
|
1072
|
+
|
|
931
1073
|
function fileToB64(filePath) {
|
|
932
|
-
|
|
1074
|
+
const resolved = path.resolve(filePath);
|
|
1075
|
+
|
|
1076
|
+
// Open the file once and operate on the fd from here on. fstatSync and the
|
|
1077
|
+
// subsequent read both bind to the inode the fd points at, so even if the
|
|
1078
|
+
// path is swapped after the size check the bytes we hash are the bytes we
|
|
1079
|
+
// sized — the size-cap TOCTOU is closed.
|
|
1080
|
+
// O_NOFOLLOW (where available) refuses symlinks at open time; it's undefined
|
|
1081
|
+
// on Windows, where we fall back to 0 (symlink semantics differ there and
|
|
1082
|
+
// the spreadsheet-extension allowlist is the load-bearing guard anyway).
|
|
1083
|
+
const O_NOFOLLOW = fs.constants.O_NOFOLLOW || 0;
|
|
1084
|
+
let fd;
|
|
1085
|
+
try {
|
|
1086
|
+
fd = fs.openSync(resolved, fs.constants.O_RDONLY | O_NOFOLLOW);
|
|
1087
|
+
} catch (e) {
|
|
1088
|
+
if (e && e.code === 'ENOENT') {
|
|
1089
|
+
const err = new Error(`File not found: ${resolved}`);
|
|
1090
|
+
err.code = 'FILE_NOT_FOUND';
|
|
1091
|
+
throw err;
|
|
1092
|
+
}
|
|
1093
|
+
if (e && e.code === 'ELOOP') {
|
|
1094
|
+
const err = new Error(`Refusing to read symlink: ${resolved}`);
|
|
1095
|
+
err.code = 'SYMLINK_REJECTED';
|
|
1096
|
+
throw err;
|
|
1097
|
+
}
|
|
1098
|
+
throw e;
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
try {
|
|
1102
|
+
const stat = fs.fstatSync(fd);
|
|
1103
|
+
|
|
1104
|
+
if (!stat.isFile()) {
|
|
1105
|
+
const err = new Error(`Not a regular file: ${resolved}`);
|
|
1106
|
+
err.code = 'NOT_REGULAR_FILE';
|
|
1107
|
+
throw err;
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
const ext = path.extname(resolved).toLowerCase();
|
|
1111
|
+
if (!ALLOWED_READ_EXTENSIONS.has(ext)) {
|
|
1112
|
+
const err = new Error(
|
|
1113
|
+
`Blocked: "${ext}" is not an allowed spreadsheet extension. ` +
|
|
1114
|
+
`Allowed: ${[...ALLOWED_READ_EXTENSIONS].join(', ')}`
|
|
1115
|
+
);
|
|
1116
|
+
err.code = 'DISALLOWED_EXTENSION';
|
|
1117
|
+
throw err;
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
const maxMB = getMaxFileMB();
|
|
1121
|
+
if (stat.size > maxMB * 1024 * 1024) {
|
|
1122
|
+
const sizeMB = stat.size / (1024 * 1024);
|
|
1123
|
+
const err = new Error(
|
|
1124
|
+
`File too large: ${sizeMB.toFixed(1)} MB exceeds the ${maxMB} MB cap. ` +
|
|
1125
|
+
`Set XFA_MAX_FILE_MB to a higher value to allow larger workbooks. ` +
|
|
1126
|
+
`(The cap protects the MCP server from OOM on synchronous base64 load — ` +
|
|
1127
|
+
`a 200 MB workbook would allocate ~267 MB of base64 before any API call.)`
|
|
1128
|
+
);
|
|
1129
|
+
err.code = 'FILE_TOO_LARGE';
|
|
1130
|
+
throw err;
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
// Read exactly stat.size bytes from the fd into a pre-sized buffer. If
|
|
1134
|
+
// the file grows between fstat and now, the extra bytes are NOT read —
|
|
1135
|
+
// we never allocate more than the validated cap. If the file shrinks
|
|
1136
|
+
// (short read), we encode what we got and stop. This closes the
|
|
1137
|
+
// grow-after-stat bypass on the size cap.
|
|
1138
|
+
const buf = Buffer.alloc(stat.size);
|
|
1139
|
+
let bytesRead = 0;
|
|
1140
|
+
while (bytesRead < stat.size) {
|
|
1141
|
+
const chunk = fs.readSync(fd, buf, bytesRead, stat.size - bytesRead, null);
|
|
1142
|
+
if (chunk === 0) break;
|
|
1143
|
+
bytesRead += chunk;
|
|
1144
|
+
}
|
|
1145
|
+
return buf.subarray(0, bytesRead).toString('base64');
|
|
1146
|
+
} finally {
|
|
1147
|
+
try { fs.closeSync(fd); } catch (_) { /* best effort */ }
|
|
1148
|
+
}
|
|
933
1149
|
}
|
|
934
1150
|
|
|
935
1151
|
// ---------------------------------------------------------------------------
|
|
@@ -940,6 +1156,14 @@ function fileToB64(filePath) {
|
|
|
940
1156
|
// If out_path is not provided: leave response unchanged.
|
|
941
1157
|
// ---------------------------------------------------------------------------
|
|
942
1158
|
|
|
1159
|
+
// Extensions an MCP tool is allowed to write via out_path. Tighter than the
|
|
1160
|
+
// READ allowlist (no .ods/.fods/.numbers/.tsv) because the server only ever
|
|
1161
|
+
// emits XLSX or XLSX-family workbook bytes — accepting unrelated extensions
|
|
1162
|
+
// would let a malicious / confused agent point out_path at /etc/profile.d/
|
|
1163
|
+
// or a shell startup script. The .json carve-out is for fixture/audit JSON
|
|
1164
|
+
// the redact + clean tools sometimes emit alongside the workbook.
|
|
1165
|
+
const ALLOWED_WRITE_EXTENSIONS = new Set(['.xlsx', '.xls', '.xlsm', '.xlsb', '.csv', '.json']);
|
|
1166
|
+
|
|
943
1167
|
async function applyFileB64(result, outPath) {
|
|
944
1168
|
if (!outPath) {
|
|
945
1169
|
// No save requested — leave response untouched (b64 stays in _meta for caller)
|
|
@@ -948,6 +1172,21 @@ async function applyFileB64(result, outPath) {
|
|
|
948
1172
|
|
|
949
1173
|
const absPath = path.resolve(outPath);
|
|
950
1174
|
|
|
1175
|
+
// Containment: require an absolute path + a workbook-family extension.
|
|
1176
|
+
// Reject path-traversal patterns and any non-workbook extension at the
|
|
1177
|
+
// boundary so a malicious agent can't request a write to a shell-startup
|
|
1178
|
+
// location or an arbitrary system file via out_path. Pre-Friday-external
|
|
1179
|
+
// CRITICAL per the Tier-1 error-handling audit (2026-06-03).
|
|
1180
|
+
const outExt = path.extname(absPath).toLowerCase();
|
|
1181
|
+
if (!ALLOWED_WRITE_EXTENSIONS.has(outExt)) {
|
|
1182
|
+
if (result.content && result.content[0] && result.content[0].type === 'text') {
|
|
1183
|
+
result.content[0].text +=
|
|
1184
|
+
`\n\nout_path rejected: extension "${outExt}" is not in the allowed write set ` +
|
|
1185
|
+
`(${[...ALLOWED_WRITE_EXTENSIONS].join(', ')}). File was NOT written.`;
|
|
1186
|
+
}
|
|
1187
|
+
return result;
|
|
1188
|
+
}
|
|
1189
|
+
|
|
951
1190
|
if (result._meta && result._meta.file_b64) {
|
|
952
1191
|
await fsPromises.writeFile(absPath, Buffer.from(result._meta.file_b64, 'base64'));
|
|
953
1192
|
// Append save confirmation to first text content block
|
|
@@ -965,6 +1204,51 @@ async function applyFileB64(result, outPath) {
|
|
|
965
1204
|
return result;
|
|
966
1205
|
}
|
|
967
1206
|
|
|
1207
|
+
// ---------------------------------------------------------------------------
|
|
1208
|
+
// Boundary error sanitization
|
|
1209
|
+
//
|
|
1210
|
+
// The MCP server is a public boundary — anything in err.message that flows
|
|
1211
|
+
// to the client can end up in the MCP client's conversation log and from
|
|
1212
|
+
// there into any LLM context window the operator never intended. Map the
|
|
1213
|
+
// known operational error codes to short, client-safe text; collapse
|
|
1214
|
+
// everything else to a generic message that names the tool but not the
|
|
1215
|
+
// internals. Tool name is safe to echo (the caller asked for it); paths,
|
|
1216
|
+
// upstream server stacks, and third-party response bodies are not.
|
|
1217
|
+
//
|
|
1218
|
+
// New codes added here as the client-side error surface grows. Default
|
|
1219
|
+
// branch is conservative on purpose — better to under-reveal than over-
|
|
1220
|
+
// reveal at the boundary.
|
|
1221
|
+
// ---------------------------------------------------------------------------
|
|
1222
|
+
|
|
1223
|
+
function friendlyErrorMessage(toolName, code) {
|
|
1224
|
+
switch (code) {
|
|
1225
|
+
case 'DISALLOWED_EXTENSION':
|
|
1226
|
+
return `${toolName}: file path must point at a workbook (allowed: .xlsx/.xls/.xlsm/.xlsb/.csv/.ods/.fods/.numbers/.tsv).`;
|
|
1227
|
+
case 'SYMLINK_REJECTED':
|
|
1228
|
+
return `${toolName}: file path resolves through a symlink — provide a direct path.`;
|
|
1229
|
+
case 'FILE_TOO_LARGE':
|
|
1230
|
+
return `${toolName}: file exceeds the XFA_MAX_FILE_MB cap (default 50 MB).`;
|
|
1231
|
+
case 'FILE_NOT_FOUND':
|
|
1232
|
+
return `${toolName}: file not found at the supplied path.`;
|
|
1233
|
+
case 'NOT_REGULAR_FILE':
|
|
1234
|
+
return `${toolName}: file path is not a regular file.`;
|
|
1235
|
+
case 'MISSING_TOKEN':
|
|
1236
|
+
return `${toolName}: required token env var is not set (see tool docs for which one).`;
|
|
1237
|
+
case 'API_UNREACHABLE':
|
|
1238
|
+
return `${toolName}: API is unreachable — check network connectivity.`;
|
|
1239
|
+
case 'API_SERVER_ERROR':
|
|
1240
|
+
return `${toolName}: API returned a server error — retry shortly.`;
|
|
1241
|
+
case 'TIER_UPGRADE_REQUIRED':
|
|
1242
|
+
return `${toolName}: this capability requires a paid tier.`;
|
|
1243
|
+
case 'RATE_LIMITED':
|
|
1244
|
+
return `${toolName}: free-tier monthly cap reached — see xlsx-for-ai.dev/pricing.`;
|
|
1245
|
+
case 'FALLBACK_ENGINE_MISSING':
|
|
1246
|
+
return `${toolName}: local fallback engine not installed (\`npm install @protobi/exceljs\`).`;
|
|
1247
|
+
default:
|
|
1248
|
+
return `${toolName} failed — see server-side logs (request_id in response _meta) for details.`;
|
|
1249
|
+
}
|
|
1250
|
+
}
|
|
1251
|
+
|
|
968
1252
|
// ---------------------------------------------------------------------------
|
|
969
1253
|
// Tool dispatch
|
|
970
1254
|
// ---------------------------------------------------------------------------
|
|
@@ -1003,7 +1287,23 @@ async function dispatchTool(name, args) {
|
|
|
1003
1287
|
if (name === 'xlsx_write') {
|
|
1004
1288
|
let spec = args.spec;
|
|
1005
1289
|
if (!spec && args.spec_path) {
|
|
1006
|
-
|
|
1290
|
+
// Security: spec_path must exist and must be a .json file.
|
|
1291
|
+
const resolvedSpecPath = path.resolve(args.spec_path);
|
|
1292
|
+
if (!fs.existsSync(resolvedSpecPath)) {
|
|
1293
|
+
const err = new Error(`spec_path not found: ${resolvedSpecPath}`);
|
|
1294
|
+
err.code = 'FILE_NOT_FOUND';
|
|
1295
|
+
throw err;
|
|
1296
|
+
}
|
|
1297
|
+
const specExt = path.extname(resolvedSpecPath).toLowerCase();
|
|
1298
|
+
if (specExt !== '.json') {
|
|
1299
|
+
const err = new Error(
|
|
1300
|
+
`spec_path must be a .json file; got "${specExt}". ` +
|
|
1301
|
+
'Pass the workbook spec as inline JSON via the "spec" argument instead.'
|
|
1302
|
+
);
|
|
1303
|
+
err.code = 'DISALLOWED_EXTENSION';
|
|
1304
|
+
throw err;
|
|
1305
|
+
}
|
|
1306
|
+
spec = JSON.parse(fs.readFileSync(resolvedSpecPath, 'utf8'));
|
|
1007
1307
|
}
|
|
1008
1308
|
const writeBody = { spec };
|
|
1009
1309
|
if (args.base_file_b64) writeBody.base_file_b64 = args.base_file_b64;
|
|
@@ -1135,14 +1435,47 @@ async function dispatchTool(name, args) {
|
|
|
1135
1435
|
});
|
|
1136
1436
|
}
|
|
1137
1437
|
|
|
1438
|
+
// xlsx_data_clean: scan + optional execute. Diagnose mode returns
|
|
1439
|
+
// findings only (no file_b64 in _meta). Execute mode returns
|
|
1440
|
+
// cleaned bytes in _meta.file_b64; applyFileB64 saves to out_path
|
|
1441
|
+
// if provided. SPEC fields pass through verbatim — server validates.
|
|
1442
|
+
if (name === 'xlsx_data_clean') {
|
|
1443
|
+
const body = { file_b64: fileToB64(args.file_path) };
|
|
1444
|
+
if (args.mode !== undefined) body.mode = args.mode;
|
|
1445
|
+
if (args.detectors !== undefined) body.detectors = args.detectors;
|
|
1446
|
+
if (args.sheets !== undefined) body.sheets = args.sheets;
|
|
1447
|
+
if (args.options !== undefined) body.options = args.options;
|
|
1448
|
+
if (args.overrides !== undefined) body.overrides = args.overrides;
|
|
1449
|
+
if (args.accept_findings !== undefined) body.accept_findings = args.accept_findings;
|
|
1450
|
+
if (args.reject_findings !== undefined) body.reject_findings = args.reject_findings;
|
|
1451
|
+
const result = await callTool('xlsx_data_clean', body);
|
|
1452
|
+
return applyFileB64(result, args.out_path);
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1138
1455
|
// xlsx_post_slack: outbound file-to-Slack. Top-level fields, not the
|
|
1139
1456
|
// standard {file_b64, options} shape — channel + slack_token + message
|
|
1140
1457
|
// + filename live alongside file_b64 in the server route's body schema.
|
|
1458
|
+
//
|
|
1459
|
+
// Token resolution order (H3 fix):
|
|
1460
|
+
// 1. SLACK_BOT_TOKEN env var (recommended — never enters conversation logs)
|
|
1461
|
+
// 2. slack_token tool arg (legacy; visible in MCP conversation history)
|
|
1462
|
+
// Error if neither is present.
|
|
1141
1463
|
if (name === 'xlsx_post_slack') {
|
|
1464
|
+
const slackToken = process.env.SLACK_BOT_TOKEN || args.slack_token;
|
|
1465
|
+
if (!slackToken) {
|
|
1466
|
+
const err = new Error(
|
|
1467
|
+
'Slack token required. Set the SLACK_BOT_TOKEN environment variable ' +
|
|
1468
|
+
'(recommended) or pass slack_token as a tool argument.'
|
|
1469
|
+
);
|
|
1470
|
+
err.code = 'MISSING_TOKEN';
|
|
1471
|
+
throw err;
|
|
1472
|
+
}
|
|
1473
|
+
// fileToB64 enforces existence + extension allowlist (H1 fix) — only
|
|
1474
|
+
// spreadsheet extensions (.xlsx, .xlsm, etc.) are permitted here.
|
|
1142
1475
|
const body = {
|
|
1143
1476
|
file_b64: fileToB64(args.file_path),
|
|
1144
1477
|
channel: args.channel,
|
|
1145
|
-
slack_token:
|
|
1478
|
+
slack_token: slackToken,
|
|
1146
1479
|
};
|
|
1147
1480
|
if (args.message !== undefined) body.message = args.message;
|
|
1148
1481
|
body.filename = args.filename || path.basename(args.file_path);
|
|
@@ -1151,12 +1484,28 @@ async function dispatchTool(name, args) {
|
|
|
1151
1484
|
|
|
1152
1485
|
// xlsx_post_teams: outbound file-to-Teams. Same shape as Slack but with
|
|
1153
1486
|
// Microsoft Graph fields (team_id + channel_id + graph_token).
|
|
1487
|
+
//
|
|
1488
|
+
// Token resolution order (H3 fix):
|
|
1489
|
+
// 1. TEAMS_GRAPH_TOKEN env var (recommended — never enters conversation logs)
|
|
1490
|
+
// 2. graph_token tool arg (legacy; visible in MCP conversation history)
|
|
1491
|
+
// Error if neither is present.
|
|
1154
1492
|
if (name === 'xlsx_post_teams') {
|
|
1493
|
+
const graphToken = process.env.TEAMS_GRAPH_TOKEN || args.graph_token;
|
|
1494
|
+
if (!graphToken) {
|
|
1495
|
+
const err = new Error(
|
|
1496
|
+
'Microsoft Graph token required. Set the TEAMS_GRAPH_TOKEN environment variable ' +
|
|
1497
|
+
'(recommended) or pass graph_token as a tool argument.'
|
|
1498
|
+
);
|
|
1499
|
+
err.code = 'MISSING_TOKEN';
|
|
1500
|
+
throw err;
|
|
1501
|
+
}
|
|
1502
|
+
// fileToB64 enforces existence + extension allowlist (H1 fix) — only
|
|
1503
|
+
// spreadsheet extensions (.xlsx, .xlsm, etc.) are permitted here.
|
|
1155
1504
|
const body = {
|
|
1156
1505
|
file_b64: fileToB64(args.file_path),
|
|
1157
1506
|
team_id: args.team_id,
|
|
1158
1507
|
channel_id: args.channel_id,
|
|
1159
|
-
graph_token:
|
|
1508
|
+
graph_token: graphToken,
|
|
1160
1509
|
};
|
|
1161
1510
|
if (args.message !== undefined) body.message = args.message;
|
|
1162
1511
|
body.filename = args.filename || path.basename(args.file_path);
|
|
@@ -1187,6 +1536,37 @@ async function dispatchTool(name, args) {
|
|
|
1187
1536
|
return callTool('xlsx_verify_stamp', body);
|
|
1188
1537
|
}
|
|
1189
1538
|
|
|
1539
|
+
// xlsx_receipt: attach an AI-generation receipt. Server signs caller-
|
|
1540
|
+
// declared agent + optional inputs (source-file hashes, prompt hash,
|
|
1541
|
+
// mcp-tools-called) + optional description; embeds the SignedReceipt in
|
|
1542
|
+
// docProps/custom.xml. Honesty: server signs the STRINGS the caller
|
|
1543
|
+
// supplied; does NOT verify agent identity.
|
|
1544
|
+
if (name === 'xlsx_receipt') {
|
|
1545
|
+
const body = {
|
|
1546
|
+
file_b64: fileToB64(args.file_path),
|
|
1547
|
+
agent: { name: args.agent_name },
|
|
1548
|
+
};
|
|
1549
|
+
if (args.agent_display_name !== undefined) body.agent.display_name = args.agent_display_name;
|
|
1550
|
+
if (args.agent_identity_url !== undefined) body.agent.identity_url = args.agent_identity_url;
|
|
1551
|
+
const inputs = {};
|
|
1552
|
+
if (args.source_file_hashes !== undefined) inputs.source_file_hashes = args.source_file_hashes;
|
|
1553
|
+
if (args.prompt_hash !== undefined) inputs.prompt_hash = args.prompt_hash;
|
|
1554
|
+
if (args.mcp_tools_called !== undefined) inputs.mcp_tools_called = args.mcp_tools_called;
|
|
1555
|
+
if (Object.keys(inputs).length > 0) body.inputs = inputs;
|
|
1556
|
+
if (args.description !== undefined) body.description = args.description;
|
|
1557
|
+
if (args.covers_sheets !== undefined) body.covers_sheets = args.covers_sheets;
|
|
1558
|
+
const result = await callTool('xlsx_receipt', body);
|
|
1559
|
+
return applyFileB64(result, args.out_path);
|
|
1560
|
+
}
|
|
1561
|
+
|
|
1562
|
+
// xlsx_verify_receipt: extract + verify the AI-generation receipt.
|
|
1563
|
+
// Surfaces caller-declared agent.name as declared; no cryptographic
|
|
1564
|
+
// identity binding in v1.
|
|
1565
|
+
if (name === 'xlsx_verify_receipt') {
|
|
1566
|
+
const body = { file_b64: fileToB64(args.file_path) };
|
|
1567
|
+
return callTool('xlsx_verify_receipt', body);
|
|
1568
|
+
}
|
|
1569
|
+
|
|
1190
1570
|
// All other tools (list_sheets, schema, hyperlinks, conditional_formats,
|
|
1191
1571
|
// styles, etc.) — single-file relay. Forward any common option keys the
|
|
1192
1572
|
// routes accept so we don't silently drop them. New keys added here as
|
|
@@ -1213,13 +1593,36 @@ async function main() {
|
|
|
1213
1593
|
// server-side tools appear without re-publishing this npm package.
|
|
1214
1594
|
// resolveCatalog returns the baked-in TOOLS as last-resort fallback so
|
|
1215
1595
|
// we never fail-open on a transient network blip. See lib/discover.js.
|
|
1596
|
+
//
|
|
1597
|
+
// Hard timeout (8s) on top of any timeout inside resolveCatalog so that
|
|
1598
|
+
// a network call which hangs forever (DNS sinkhole, TCP black hole, slow-
|
|
1599
|
+
// loris-style stalled response) cannot block MCP server startup
|
|
1600
|
+
// indefinitely. Pre-Friday-external CRITICAL per the Tier-1 audit.
|
|
1601
|
+
const CATALOG_FETCH_TIMEOUT_MS = 8000;
|
|
1216
1602
|
let catalog;
|
|
1217
1603
|
try {
|
|
1218
|
-
catalog = await
|
|
1604
|
+
catalog = await Promise.race([
|
|
1605
|
+
resolveCatalog(TOOLS),
|
|
1606
|
+
new Promise((_, reject) =>
|
|
1607
|
+
setTimeout(
|
|
1608
|
+
() => reject(new Error(`catalog fetch timed out after ${CATALOG_FETCH_TIMEOUT_MS}ms`)),
|
|
1609
|
+
CATALOG_FETCH_TIMEOUT_MS
|
|
1610
|
+
)
|
|
1611
|
+
),
|
|
1612
|
+
]);
|
|
1219
1613
|
} catch (_) {
|
|
1220
1614
|
catalog = { tools: TOOLS, source: 'static-fallback' };
|
|
1221
1615
|
}
|
|
1222
|
-
|
|
1616
|
+
// Surface catalog source so operators can tell server vs cache vs static
|
|
1617
|
+
// when an MCP session looks "off" (e.g., a tool missing because the remote
|
|
1618
|
+
// /api/v1/tools/list 404'd and we silently fell back to the stale baked-in
|
|
1619
|
+
// set). Stderr only — stdout is the MCP transport.
|
|
1620
|
+
process.stderr.write(`xlsx-for-ai-mcp: tool catalog source=${catalog.source} count=${Array.isArray(catalog.tools) ? catalog.tools.length : 0}\n`);
|
|
1621
|
+
// Overlay MCP annotations (title / readOnlyHint / destructiveHint) so
|
|
1622
|
+
// they flow through to clients regardless of catalog source. The remote
|
|
1623
|
+
// /api/v1/tools/list returns minimal entries today; this is what
|
|
1624
|
+
// restores the annotations the wire format would otherwise drop.
|
|
1625
|
+
const liveTools = applyAnnotations(Array.isArray(catalog.tools) ? catalog.tools : []);
|
|
1223
1626
|
|
|
1224
1627
|
const server = new Server(
|
|
1225
1628
|
{ name: 'xlsx-for-ai', version: require('./package.json').version },
|
|
@@ -1247,8 +1650,19 @@ async function main() {
|
|
|
1247
1650
|
// Pass API response through verbatim (citation footer + _meta preserved)
|
|
1248
1651
|
return result;
|
|
1249
1652
|
} catch (err) {
|
|
1653
|
+
// Error message sanitization at the MCP boundary. Raw err.message
|
|
1654
|
+
// can leak absolute file paths (FILE_NOT_FOUND), upstream server
|
|
1655
|
+
// error stacks (any thrown Error inside dispatchTool), and upstream
|
|
1656
|
+
// HTTP response bodies (callTool's API_SERVER_ERROR path may carry
|
|
1657
|
+
// server internals). Translate the known operational codes into
|
|
1658
|
+
// short, client-safe messages; everything else collapses to a
|
|
1659
|
+
// generic "tool failed" with the tool name so callers can still
|
|
1660
|
+
// route on it without leaking path/server detail. Pre-Friday-
|
|
1661
|
+
// external CRITICAL per the Tier-1 audit.
|
|
1662
|
+
const code = err && err.code;
|
|
1663
|
+
const safeMessage = friendlyErrorMessage(name, code);
|
|
1250
1664
|
return {
|
|
1251
|
-
content: [{ type: 'text', text: `xlsx-for-ai error: ${
|
|
1665
|
+
content: [{ type: 'text', text: `xlsx-for-ai error: ${safeMessage}` }],
|
|
1252
1666
|
isError: true,
|
|
1253
1667
|
};
|
|
1254
1668
|
}
|
|
@@ -1266,5 +1680,9 @@ if (require.main === module) {
|
|
|
1266
1680
|
});
|
|
1267
1681
|
}
|
|
1268
1682
|
|
|
1269
|
-
//
|
|
1270
|
-
|
|
1683
|
+
// Exports for build-time scripts and tests only. Do NOT import these from
|
|
1684
|
+
// runtime production code — they're only here to let the manifest-build
|
|
1685
|
+
// script use TOOLS as the single source of truth for downstream artifacts
|
|
1686
|
+
// (manifest.json, mcp-tools.json snapshot consumed by the MSFT plugin
|
|
1687
|
+
// manifest), and to expose helpers under test.
|
|
1688
|
+
module.exports = { applyFileB64, dispatchTool, TOOLS };
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "xlsx-for-ai",
|
|
3
3
|
"mcpName": "io.github.senoff/xlsx-for-ai",
|
|
4
|
-
"version": "2.
|
|
4
|
+
"version": "2.25.2",
|
|
5
5
|
"description": "The MCP server that makes LLMs reliable on real-world Excel spreadsheets. Thin npm client over a hosted API — read, write, diff, redact, and supervise .xlsx files from any MCP-aware agent.",
|
|
6
6
|
"main": "index.js",
|
|
7
7
|
"bin": {
|
|
@@ -22,7 +22,10 @@
|
|
|
22
22
|
"LICENSE"
|
|
23
23
|
],
|
|
24
24
|
"scripts": {
|
|
25
|
-
"test": "node --test test/v2/*.test.js"
|
|
25
|
+
"test": "node --test test/v2/*.test.js",
|
|
26
|
+
"build-manifests": "node scripts/build-manifests.js",
|
|
27
|
+
"check-manifests": "node scripts/build-manifests.js --check",
|
|
28
|
+
"prepare": "husky"
|
|
26
29
|
},
|
|
27
30
|
"keywords": [
|
|
28
31
|
"xlsx",
|
|
@@ -57,5 +60,8 @@
|
|
|
57
60
|
},
|
|
58
61
|
"optionalDependencies": {
|
|
59
62
|
"@protobi/exceljs": "^4.4.0-protobi.9"
|
|
63
|
+
},
|
|
64
|
+
"devDependencies": {
|
|
65
|
+
"husky": "^9.1.7"
|
|
60
66
|
}
|
|
61
67
|
}
|