xlsx-for-ai 3.0.6 → 3.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/lib/client.js +93 -17
  2. package/mcp.js +207 -22
  3. package/package.json +1 -1
package/lib/client.js CHANGED
@@ -8,19 +8,53 @@
8
8
  * post(path, body, opts) — POST JSON, returns parsed response body
9
9
  * callTool(toolName, body) — POST /api/v1/tools/<toolName> with auth
10
10
  *
11
- * Retries once on network errors. Maps HTTP errors to structured Error objects.
11
+ * 15s per-attempt timeout, up to 3 attempts (45s ceiling). On retry the
12
+ * AbortController for the prior attempt has already fired, so any
13
+ * keep-alive socket undici held on the failing attempt is released
14
+ * before the retry opens a fresh one. Surfaces phase timing to stderr
15
+ * for production-incident diagnosis (SPM P1 2026-06-06: hosted tool
16
+ * calls timing out in Claude Desktop — server saw ~200ms responses
17
+ * but client saw 2-4 minute round-trips; the gap is in the connection
18
+ * dial / IPC layer, observability captures which next time).
12
19
  */
13
20
 
14
21
  const { readConfig } = require('./config');
15
22
  const { version: CLIENT_VERSION } = require('../package.json');
16
23
 
17
24
  const DEFAULT_API = 'https://api.xlsx-for-ai.dev';
18
- const TIMEOUT_MS = 30_000;
25
+ // Per-attempt timeout. Was 30s pre-3.0.7. Tighter so a stuck dial
26
+ // (IPv6 black hole, stale keep-alive) fails fast and the retry path
27
+ // reopens a fresh socket. 3 attempts × 15s = 45s ceiling, well under
28
+ // Claude Desktop's 60s client-side initialize timeout AND under the
29
+ // MCP tools/call timeout class.
30
+ const TIMEOUT_MS = 15_000;
31
+ const MAX_ATTEMPTS = 3;
19
32
 
20
33
  function apiBase() {
21
34
  return (process.env.XLSX_FOR_AI_API || DEFAULT_API).replace(/\/$/, '');
22
35
  }
23
36
 
37
+ // Stderr structured timing log. stdout is the MCP transport in the
38
+ // mcp-server context; never write timing data there.
39
+ function emitTiming(toolPath, attempt, phase, elapsedMs, extra) {
40
+ // One-line JSON so future log-shipper can grep / parse. Kept compact
41
+ // to stay inside Claude Desktop's MCP log buffer.
42
+ const obs = {
43
+ t: 'xlsx-for-ai-mcp.timing',
44
+ path: toolPath,
45
+ attempt,
46
+ phase,
47
+ elapsed_ms: Math.round(elapsedMs),
48
+ };
49
+ if (extra) Object.assign(obs, extra);
50
+ try {
51
+ process.stderr.write(JSON.stringify(obs) + '\n');
52
+ } catch (_) {
53
+ // EPIPE on stderr is swallowed by the mcp.js top-level guard;
54
+ // here we just no-op so a missing log sink doesn't break the call.
55
+ }
56
+ }
57
+
24
58
  async function fetchWithTimeout(url, init) {
25
59
  const controller = new AbortController();
26
60
  const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
@@ -49,32 +83,71 @@ async function post(path, body, opts = {}) {
49
83
  headers['X-XFA-Privacy'] = 'strict';
50
84
  }
51
85
 
86
+ const requestStart = Date.now();
87
+ const jsonBody = JSON.stringify(body);
88
+ // Byte length (UTF-8), not code-unit length — multi-byte chars in the
89
+ // body would otherwise be undercounted in observability.
90
+ const bodyBytes = Buffer.byteLength(jsonBody, 'utf8');
91
+ // `attempt: -1` is the convention for non-attempt-scoped events
92
+ // (per-request setup / teardown); keeps the 1..MAX_ATTEMPTS scope
93
+ // unambiguous in log analysis.
94
+ emitTiming(path, -1, 'send', 0, { body_bytes: bodyBytes });
95
+
52
96
  let res;
53
- try {
54
- res = await fetchWithTimeout(url, {
55
- method: 'POST',
56
- headers,
57
- body: JSON.stringify(body),
58
- });
59
- } catch (err) {
60
- // One retry on network error
97
+ let lastErr;
98
+ let winningAttempt = -1;
99
+ for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt += 1) {
100
+ const attemptStart = Date.now();
61
101
  try {
62
102
  res = await fetchWithTimeout(url, {
63
103
  method: 'POST',
64
104
  headers,
65
- body: JSON.stringify(body),
105
+ body: jsonBody,
106
+ });
107
+ const attemptElapsed = Date.now() - attemptStart;
108
+ emitTiming(path, attempt, 'response-headers', attemptElapsed, {
109
+ status: res.status,
110
+ });
111
+ lastErr = null;
112
+ winningAttempt = attempt;
113
+ break;
114
+ } catch (err) {
115
+ lastErr = err;
116
+ const attemptElapsed = Date.now() - attemptStart;
117
+ const errName = err && err.name ? err.name : 'Unknown';
118
+ const errCode = err && err.code ? err.code : null;
119
+ emitTiming(path, attempt, 'attempt-failed', attemptElapsed, {
120
+ error_name: errName,
121
+ error_code: errCode,
66
122
  });
67
- } catch (err2) {
68
- const e = new Error(`xlsx-for-ai API unreachable: ${err2.message}`);
69
- e.code = 'API_UNREACHABLE';
70
- throw e;
123
+ // No sleep between retries — let the underlying socket pool refresh
124
+ // on the next fetch call. A sleep would just lengthen the total
125
+ // wait and the SPM-evidenced symptom is already a dial stall that
126
+ // a fresh socket fixes.
71
127
  }
72
128
  }
73
129
 
130
+ if (!res) {
131
+ const totalElapsed = Date.now() - requestStart;
132
+ emitTiming(path, MAX_ATTEMPTS, 'all-attempts-failed', totalElapsed);
133
+ const e = new Error(`xlsx-for-ai API unreachable: ${lastErr ? lastErr.message : 'unknown'}`);
134
+ e.code = 'API_UNREACHABLE';
135
+ throw e;
136
+ }
137
+
74
138
  if (!res.ok) {
75
139
  let payload;
76
140
  try { payload = await res.json(); } catch (_) { payload = null; }
77
- const msg = payload?.error || payload?.message || res.statusText;
141
+ // Prefer the structured `{code, message}` shape our server emits; the
142
+ // top-level `error` / `message` fall-back keeps the older surfaces
143
+ // working. `payload.error` could be an OBJECT — coerce to .message
144
+ // first to avoid stringifying [object Object].
145
+ const errField = payload?.error;
146
+ const msg = (errField && typeof errField === 'object' ? errField.message : errField)
147
+ || payload?.message
148
+ || res.statusText;
149
+ const totalElapsed = Date.now() - requestStart;
150
+ emitTiming(path, winningAttempt, 'http-error', totalElapsed, { status: res.status });
78
151
  const e = new Error(`xlsx-for-ai API error ${res.status}: ${msg}`);
79
152
  e.status = res.status;
80
153
  e.payload = payload;
@@ -82,7 +155,10 @@ async function post(path, body, opts = {}) {
82
155
  throw e;
83
156
  }
84
157
 
85
- return res.json();
158
+ const json = await res.json();
159
+ const totalElapsed = Date.now() - requestStart;
160
+ emitTiming(path, winningAttempt, 'body-complete', totalElapsed);
161
+ return json;
86
162
  }
87
163
 
88
164
  async function callTool(toolName, body) {
package/mcp.js CHANGED
@@ -20,6 +20,7 @@ const { resolveCatalog } = require('./lib/discover');
20
20
  const { applyAnnotations, sanitizeForMcp } = require('./lib/annotations');
21
21
  const fs = require('fs');
22
22
  const fsPromises = require('fs/promises');
23
+ const os = require('os');
23
24
  const path = require('path');
24
25
 
25
26
  // ---------------------------------------------------------------------------
@@ -109,24 +110,95 @@ const TOOLS = [
109
110
  {
110
111
  name: 'xlsx_write',
111
112
  description:
112
- 'create or update a LOCAL .xlsx file from a structured spec.\n' +
113
- 'DEFAULT creates a new workbook from spec. Pass base_file_b64 to edit-in-place instead. Workbook bytes return in _meta.file_b64 (base64) NOT in content[0].text.\n\n' +
114
- 'ALWAYS pass out_path when the user wants the written file saved to disk.\n' +
115
- 'WITHOUT out_path: workbook bytes return in _meta.file_b64 (base64) — caller must save them.\n' +
116
- 'The response text confirms whether a save happenedtrust the response, do not infer.\n\n' +
117
- 'USE WHEN: the user wants to write or edit a spreadsheet at a LOCAL file path. ' +
118
- 'Supports multi-sheet workbooks, formulas, named ranges, and table definitions. ' +
119
- 'Server-validated before writing — safer than generating xlsx bytes directly.\n\n' +
120
- 'DO NOT USE WHEN: working in a sandbox without local filesystem write access. ' +
121
- 'Or when the user wants to edit an uploaded file in place (there is no local path to write to).',
113
+ 'create or update a LOCAL .xlsx file from a structured spec.\n\n' +
114
+ 'Spec shape: `{sheets: [{name, cells: [{address, value | formula}]}]}`. Each cell has an A1 address ("A1", "B2") and EITHER `value` (string|number|boolean|null) OR `formula` (string, no leading "="). Minimal example:\n' +
115
+ '`{"sheets":[{"name":"Sheet1","cells":[{"address":"A1","value":"id"},{"address":"A2","value":1},{"address":"B2","formula":"A2*2"}]}]}`\n\n' +
116
+ 'ALWAYS pass out_path to save to disk. Without out_path the workbook bytes return in _meta.file_b64.\n\n' +
117
+ 'USE WHEN: the user wants to write or edit a spreadsheet at a LOCAL file path. Server-validated before writing safer than generating xlsx bytes directly.\n\n' +
118
+ 'DO NOT USE WHEN: working in a sandbox without local filesystem write access. Or editing an uploaded file in place (there is no local path to write to).',
122
119
  inputSchema: {
123
120
  type: 'object',
124
121
  properties: {
125
- spec: { type: 'object', description: 'Workbook spec object.' },
126
- spec_path: { type: 'string', description: 'Path to a JSON spec file (alternative to inline spec).' },
127
- out_path: { type: 'string', description: 'Destination .xlsx path.' },
122
+ spec: {
123
+ type: 'object',
124
+ description:
125
+ 'Workbook spec. Shape: {sheets: [{name: string, cells: [{address, value | formula}]}]}. ' +
126
+ 'Each cell has an A1-style `address` (regex ^[A-Za-z]+\\d+$) and EXACTLY ONE of `value` ' +
127
+ '(string|number|boolean|null) or `formula` (string WITHOUT leading "=" — e.g. "SUM(A1:A10)" not "=SUM(A1:A10)"). ' +
128
+ 'Example: {"sheets":[{"name":"Sheet1","cells":[{"address":"A1","value":"id"},{"address":"A2","value":1},{"address":"B2","formula":"A2*2"}]}]}',
129
+ properties: {
130
+ sheets: {
131
+ type: 'array',
132
+ minItems: 1,
133
+ description: 'One or more sheets. Each sheet is { name: string, cells: array }.',
134
+ items: {
135
+ type: 'object',
136
+ required: ['name', 'cells'],
137
+ properties: {
138
+ name: {
139
+ type: 'string',
140
+ minLength: 1,
141
+ description: 'Sheet name (non-empty).',
142
+ },
143
+ cells: {
144
+ type: 'array',
145
+ description: 'List of cells to write. Order does not matter; addresses are absolute.',
146
+ items: {
147
+ type: 'object',
148
+ required: ['address'],
149
+ description: 'Cell entry. Provide EXACTLY ONE of `value` or `formula`.',
150
+ properties: {
151
+ address: {
152
+ type: 'string',
153
+ pattern: '^[A-Za-z]+\\d+$',
154
+ description: 'A1-style cell address — e.g. "A1", "B2", "AA10".',
155
+ },
156
+ value: {
157
+ type: ['string', 'number', 'boolean', 'null'],
158
+ description: 'Cell value: string, number, boolean, or null. Mutually exclusive with `formula`.',
159
+ },
160
+ formula: {
161
+ type: 'string',
162
+ // No leading `=` — the server expects bare expressions.
163
+ // `^(?!=)` is a negative lookahead that rejects an `=`
164
+ // as the first character; ECMA-262 supported.
165
+ pattern: '^(?!=).+',
166
+ description: 'Excel formula, WITHOUT leading "=". E.g. "SUM(A1:A10)" not "=SUM(A1:A10)". Mutually exclusive with `value`.',
167
+ },
168
+ },
169
+ // Enforce the value-XOR-formula rule at the schema layer
170
+ // so a strict client (or future server) rejects malformed
171
+ // cells before the request fires. SPM 2026-06-06
172
+ // wild-adoption follow-up.
173
+ oneOf: [
174
+ { required: ['value'], not: { required: ['formula'] } },
175
+ { required: ['formula'], not: { required: ['value'] } },
176
+ ],
177
+ },
178
+ },
179
+ },
180
+ },
181
+ },
182
+ },
183
+ required: ['sheets'],
184
+ },
185
+ spec_path: {
186
+ type: 'string',
187
+ description: 'Path to a .json file carrying the spec (alternative to inline spec for large workbooks).',
188
+ },
189
+ out_path: {
190
+ type: 'string',
191
+ description: 'Destination .xlsx path. Required when the caller wants the file saved to disk.',
192
+ },
193
+ base_file_b64: {
194
+ type: 'string',
195
+ description: 'Optional base64 of an existing .xlsx to edit-in-place. When omitted, a fresh workbook is created.',
196
+ },
128
197
  },
129
- required: ['out_path'],
198
+ // out_path is the typical caller's choice but not strictly required —
199
+ // when omitted, the workbook bytes return in _meta.file_b64 and the
200
+ // caller saves them (or feeds them to another tool). spec / spec_path
201
+ // is the only hard requirement.
130
202
  },
131
203
  },
132
204
  {
@@ -1193,8 +1265,19 @@ function getMaxFileMB() {
1193
1265
  return parsed;
1194
1266
  }
1195
1267
 
1268
+ // Expand a leading `~` to the user's home dir so tilde-prefixed paths the
1269
+ // model passes ("~/Desktop/foo.xlsx") don't dead-end with ENOENT. SPM P1
1270
+ // 2026-06-06 "secondary" finding — a cheap friction-reducer.
1271
+ // Only the leading character; we don't try to resolve `~user/foo` patterns.
1272
+ function expandTilde(p) {
1273
+ if (typeof p !== 'string' || p.length === 0) return p;
1274
+ if (p === '~') return os.homedir();
1275
+ if (p.startsWith('~/')) return path.join(os.homedir(), p.slice(2));
1276
+ return p;
1277
+ }
1278
+
1196
1279
  function fileToB64(filePath) {
1197
- const resolved = path.resolve(filePath);
1280
+ const resolved = path.resolve(expandTilde(filePath));
1198
1281
 
1199
1282
  // Open the file once and operate on the fd from here on. fstatSync and the
1200
1283
  // subsequent read both bind to the inode the fd points at, so even if the
@@ -1343,7 +1426,63 @@ async function applyFileB64(result, outPath) {
1343
1426
  // reveal at the boundary.
1344
1427
  // ---------------------------------------------------------------------------
1345
1428
 
1346
- function friendlyErrorMessage(toolName, code) {
1429
+ // Defense in depth on the 4xx inline message. The SPEC's bet is that
1430
+ // 4xx server messages describe the CALLER'S OWN INPUT (which field,
1431
+ // what was expected) — but a wrapped 4xx path could still carry
1432
+ // absolute file paths, emails, JWTs / Bearer tokens, Slack tokens,
1433
+ // or other PII. Scrub those before surfacing, replace with `<…>`
1434
+ // placeholders so the caller still sees the SHAPE of the message
1435
+ // without the sensitive payload.
1436
+ //
1437
+ // `<…>` was picked over a more verbose `[redacted-x]` so it's
1438
+ // visually compact and unambiguously not real input.
1439
+ const PII_SCRUBBERS = [
1440
+ // Bearer / Authorization tokens — match before generic JWT pattern.
1441
+ [/\bBearer\s+[A-Za-z0-9._~+/-]{8,}=*/g, '<bearer>'],
1442
+ // JSON Web Tokens. Three dot-separated base64url segments, the first
1443
+ // starting with `eyJ` (the canonical JWT header prefix).
1444
+ [/\beyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\b/g, '<jwt>'],
1445
+ // Slack bot / user / app tokens.
1446
+ [/\bxox[bpoars]-[A-Za-z0-9-]{10,}\b/g, '<slack-token>'],
1447
+ // Our own API keys.
1448
+ [/\bxfa_[a-z]+_[A-Za-z0-9]{16,}\b/g, '<xfa-key>'],
1449
+ // Generic 32+ char hex (api keys / hashes).
1450
+ [/\b[a-f0-9]{32,}\b/gi, '<hex>'],
1451
+ // Emails.
1452
+ [/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g, '<email>'],
1453
+ // POSIX absolute paths under /Users, /home, /var, /opt, /tmp, /etc, /private.
1454
+ [/\/(?:Users|home|var|opt|tmp|etc|private)\/[^\s'"`)\]]+/g, '<path>'],
1455
+ // Windows absolute paths.
1456
+ [/[A-Za-z]:\\[^\s'"`)\]]+/g, '<path>'],
1457
+ ];
1458
+
1459
+ // Strip the well-known low-signal noise an inline 4xx surface message
1460
+ // could carry: leading "xlsx-for-ai API error 4xx: " prefix from
1461
+ // lib/client.js, scrub PII via PII_SCRUBBERS, bound the length so a
1462
+ // pathological payload can't blow up the conversation log.
1463
+ const INLINE_4XX_MAX_LEN = 280;
1464
+ function shapeInline4xxMessage(raw) {
1465
+ if (typeof raw !== 'string') return '';
1466
+ let s = raw.replace(/^xlsx-for-ai API error \d+:\s*/i, '').trim();
1467
+ for (const [pattern, replacement] of PII_SCRUBBERS) {
1468
+ s = s.replace(pattern, replacement);
1469
+ }
1470
+ if (s.length > INLINE_4XX_MAX_LEN) {
1471
+ s = s.slice(0, INLINE_4XX_MAX_LEN - 1) + '…';
1472
+ }
1473
+ return s;
1474
+ }
1475
+
1476
+ function friendlyErrorMessage(toolName, err) {
1477
+ // err may be undefined (defensive) or any thrown value. Extract the
1478
+ // fields we care about safely.
1479
+ const code = err && err.code;
1480
+ const status = err && err.status;
1481
+ const payload = err && err.payload;
1482
+
1483
+ // Known client-side / mcp.js error codes — keep their pre-existing
1484
+ // short text. Ordered before the 4xx default so the specific message
1485
+ // wins.
1347
1486
  switch (code) {
1348
1487
  case 'DISALLOWED_EXTENSION':
1349
1488
  return `${toolName}: file path must point at a workbook (allowed: .xlsx/.xls/.xlsm/.xlsb/.csv/.ods/.fods/.numbers/.tsv).`;
@@ -1359,8 +1498,6 @@ function friendlyErrorMessage(toolName, code) {
1359
1498
  return `${toolName}: required token env var is not set (see tool docs for which one).`;
1360
1499
  case 'API_UNREACHABLE':
1361
1500
  return `${toolName}: API is unreachable — check network connectivity.`;
1362
- case 'API_SERVER_ERROR':
1363
- return `${toolName}: API returned a server error — retry shortly.`;
1364
1501
  case 'TIER_UPGRADE_REQUIRED':
1365
1502
  return `${toolName}: this capability requires a paid tier.`;
1366
1503
  case 'RATE_LIMITED':
@@ -1368,8 +1505,57 @@ function friendlyErrorMessage(toolName, code) {
1368
1505
  case 'FALLBACK_ENGINE_MISSING':
1369
1506
  return `${toolName}: local fallback engine not installed (\`npm install @protobi/exceljs\`).`;
1370
1507
  default:
1371
- return `${toolName} failed — see server-side logs (request_id in response _meta) for details.`;
1508
+ break;
1509
+ }
1510
+
1511
+ // 4xx client-error class: surface the server's validation message
1512
+ // inline. SPM 2026-06-06 wild-adoption SPEC. The 4xx surface
1513
+ // describes the CALLER'S OWN INPUT shape ("spec.sheets must be an
1514
+ // array", "cells[3].address is not a valid Excel address"); the
1515
+ // caller needs that message to fix their call. 5xx stays generic
1516
+ // (it can carry upstream internals).
1517
+ //
1518
+ // Known specific HTTP statuses are mapped first so they keep their
1519
+ // short curated text:
1520
+ if (code === 'API_CLIENT_ERROR') {
1521
+ if (status === 429) {
1522
+ return `${toolName}: free-tier monthly cap reached — see xlsx-for-ai.dev/pricing.`;
1523
+ }
1524
+ if (status === 402) {
1525
+ return `${toolName}: this capability requires a paid tier.`;
1526
+ }
1527
+ // Generic 4xx: surface the server message. Prefer the structured
1528
+ // shape, fall through to the flat message, fall through to the
1529
+ // wrapped err.message (stripped of the "API error 4xx:" prefix).
1530
+ let inline = '';
1531
+ if (payload && typeof payload === 'object') {
1532
+ const structured = payload.error;
1533
+ if (structured && typeof structured === 'object' && typeof structured.message === 'string') {
1534
+ inline = structured.message;
1535
+ } else if (typeof payload.message === 'string') {
1536
+ inline = payload.message;
1537
+ } else if (typeof payload.error === 'string') {
1538
+ inline = payload.error;
1539
+ }
1540
+ }
1541
+ if (!inline && err && typeof err.message === 'string') {
1542
+ inline = err.message;
1543
+ }
1544
+ const shaped = shapeInline4xxMessage(inline);
1545
+ if (shaped) {
1546
+ return `${toolName}: ${shaped}`;
1547
+ }
1548
+ // Graceful fallback when no message is available (empty/absent
1549
+ // payload, non-string fields): generic with tool name, no
1550
+ // `undefined`, no `[object Object]`.
1551
+ return `${toolName}: invalid request (no detail provided).`;
1552
+ }
1553
+
1554
+ // 5xx and everything else — stay generic. Security boundary preserved.
1555
+ if (code === 'API_SERVER_ERROR') {
1556
+ return `${toolName}: API returned a server error — retry shortly.`;
1372
1557
  }
1558
+ return `${toolName} failed — see server-side logs (request_id in response _meta) for details.`;
1373
1559
  }
1374
1560
 
1375
1561
  // ---------------------------------------------------------------------------
@@ -1861,8 +2047,7 @@ async function main() {
1861
2047
  // generic "tool failed" with the tool name so callers can still
1862
2048
  // route on it without leaking path/server detail. Pre-Friday-
1863
2049
  // external CRITICAL per the Tier-1 audit.
1864
- const code = err && err.code;
1865
- const safeMessage = friendlyErrorMessage(name, code);
2050
+ const safeMessage = friendlyErrorMessage(name, err);
1866
2051
  return {
1867
2052
  content: [{ type: 'text', text: `xlsx-for-ai error: ${safeMessage}` }],
1868
2053
  isError: true,
@@ -1969,4 +2154,4 @@ if (require.main === module) {
1969
2154
  // script use TOOLS as the single source of truth for downstream artifacts
1970
2155
  // (manifest.json, mcp-tools.json snapshot consumed by the MSFT plugin
1971
2156
  // manifest), and to expose helpers under test.
1972
- module.exports = { applyFileB64, dispatchTool, TOOLS };
2157
+ module.exports = { applyFileB64, dispatchTool, TOOLS, friendlyErrorMessage };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "xlsx-for-ai",
3
3
  "mcpName": "io.github.senoff/xlsx-for-ai",
4
- "version": "3.0.6",
4
+ "version": "3.0.9",
5
5
  "description": "The MCP server that makes LLMs reliable on real-world Excel spreadsheets. Thin npm client over a hosted API — read, write, diff, redact, and supervise .xlsx files from any MCP-aware agent.",
6
6
  "main": "index.js",
7
7
  "bin": {