@mkterswingman/5mghost-wonder 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2,7 +2,7 @@
2
2
  // src/cli.ts
3
3
  // Main entry point for the `wonder` CLI.
4
4
  // Telemetry wired by P1-05. Auth wired by P1-04.
5
- import { resolveWonderPaths } from "./platform/paths.js";
5
+ import { resolveWonderPaths, migrateLegacyWonderDir } from "./platform/paths.js";
6
6
  import { dispatchWonderCommand } from "./commands/index.js";
7
7
  import { runHelpCommand } from "./commands/help.js";
8
8
  import { createWonderTelemetryRuntime } from "./telemetry/runtime.js";
@@ -19,6 +19,14 @@ process.on("unhandledRejection", (reason) => {
19
19
  process.exit(1);
20
20
  });
21
21
  const argv = process.argv.slice(2);
22
+ // Phase 5 migration: move pre-Phase-5 `~/.wonder/` contents into the aligned
23
+ // `~/.mkterswingman/5mghost-wonder/`. Idempotent and silent on no-op.
24
+ try {
25
+ migrateLegacyWonderDir();
26
+ }
27
+ catch {
28
+ // Migration failure must never break the CLI startup.
29
+ }
22
30
  const paths = resolveWonderPaths();
23
31
  const io = {
24
32
  stdout: (m) => process.stdout.write(m + "\n"),
@@ -5,7 +5,9 @@
5
5
  // - auth: mkterswingman auth JWT/PAT
6
6
  // - wecom-cookie: WeCom cookies.json presence + live validity probe
7
7
  // - pandoc: CLI on PATH (used by use-skill for docx/pptx text)
8
- // - soffice: CLI on PATH + real executable (broken symlinks are rejected)
8
+ // - soffice: CLI on PATH + real executable (optional only the visual-
9
+ // layout xlsx render and docx/pptx PDF conversion need it; the default
10
+ // JSON read path does not shell out to soffice)
9
11
  // - docx-skill: docx SKILL.md present in plugin cache or user skills dir
10
12
  // - pptx-skill: pptx SKILL.md present in plugin cache or user skills dir
11
13
  // - cache: local export cache directory state (informational)
@@ -14,10 +16,9 @@
14
16
  // Each check produces { label, ok, hint? }. The command exits 0 iff all
15
17
  // required checks pass. docx/pptx skills are optional because some consumers
16
18
  // (raw JSON, non-Claude AI clients) do not need the Anthropic bundled skills.
17
- import { accessSync, constants, existsSync, readdirSync, realpathSync, statSync } from "node:fs";
18
- import { delimiter, join, resolve } from "node:path";
19
+ import { accessSync, constants, existsSync, realpathSync, statSync } from "node:fs";
20
+ import { delimiter, resolve } from "node:path";
19
21
  import { spawnSync } from "node:child_process";
20
- import { homedir } from "node:os";
21
22
  import { resolveWonderPaths } from "../platform/paths.js";
22
23
  import { getCookieStatus } from "../wecom/cookies.js";
23
24
  import { describeCacheDir } from "../wecom/cache.js";
@@ -42,12 +43,13 @@ function findOnPath(binName) {
42
43
  }
43
44
  return null;
44
45
  }
45
- function checkExecutable(binName, installHints) {
46
+ function checkExecutable(binName, installHints, optional = false) {
46
47
  const found = findOnPath(binName);
47
48
  if (!found) {
48
49
  return {
49
50
  label: binName,
50
51
  ok: false,
52
+ optional,
51
53
  hint: installHints[process.platform] ?? installHints["default"] ?? `install ${binName}`,
52
54
  };
53
55
  }
@@ -61,6 +63,7 @@ function checkExecutable(binName, installHints) {
61
63
  return {
62
64
  label: binName,
63
65
  ok: false,
66
+ optional,
64
67
  hint: `${binName} is a broken symlink at ${found}. Reinstall: ${installHints[process.platform] ?? installHints["default"]}`,
65
68
  detail: found,
66
69
  };
@@ -74,6 +77,7 @@ function checkExecutable(binName, installHints) {
74
77
  return {
75
78
  label: binName,
76
79
  ok: false,
80
+ optional,
77
81
  hint: `${binName} is on PATH (${found}) but failed to execute. Reinstall: ${installHints[process.platform] ?? installHints["default"]}`,
78
82
  detail: `realpath=${realTarget}`,
79
83
  };
@@ -84,50 +88,7 @@ function checkExecutable(binName, installHints) {
84
88
  detail: found === realTarget ? found : `${found} → ${realTarget}`,
85
89
  };
86
90
  }
87
- function checkSkillFile(skillName, opts) {
88
- // Check plugin cache first (Anthropic-bundled skills)
89
- const pluginGlob = join(opts.home, ".claude-internal", "plugins", "cache", "anthropic-agent-skills");
90
- let foundPluginVersion = null;
91
- if (existsSync(pluginGlob)) {
92
- try {
93
- for (const entry of readdirSync(pluginGlob, { withFileTypes: true })) {
94
- if (!entry.isDirectory())
95
- continue;
96
- const candidate = join(pluginGlob, entry.name, "skills", skillName, "SKILL.md");
97
- if (existsSync(candidate)) {
98
- foundPluginVersion = entry.name;
99
- break;
100
- }
101
- }
102
- }
103
- catch {
104
- /* fall through to user skills */
105
- }
106
- }
107
- if (foundPluginVersion) {
108
- return {
109
- label: `${skillName}-skill`,
110
- ok: true,
111
- detail: `plugin cache (${foundPluginVersion})`,
112
- };
113
- }
114
- // User-installed skills (both -internal and plain variants)
115
- for (const claudeRoot of [".claude-internal", ".claude"]) {
116
- const userSkill = join(opts.home, claudeRoot, "skills", skillName, "SKILL.md");
117
- if (existsSync(userSkill)) {
118
- return { label: `${skillName}-skill`, ok: true, detail: userSkill };
119
- }
120
- }
121
- return {
122
- label: `${skillName}-skill`,
123
- ok: false,
124
- optional: true,
125
- hint: `For Claude consumers that read .${skillName}: ` +
126
- `mkdir -p ~/.claude-internal/skills/${skillName} && ` +
127
- `curl -fsSL https://raw.githubusercontent.com/anthropics/skills/main/skills/${skillName}/SKILL.md ` +
128
- `-o ~/.claude-internal/skills/${skillName}/SKILL.md`,
129
- };
130
- }
91
+ // (checkSkillFile removed in Phase 5 — see the note at the check call site.)
131
92
  export async function runCheckCommand(_argv, context) {
132
93
  const paths = resolveWonderPaths({ homeDir: context.homeDir });
133
94
  const items = [];
@@ -195,17 +156,29 @@ export async function runCheckCommand(_argv, context) {
195
156
  win32: "winget install pandoc",
196
157
  default: "https://pandoc.org/installing.html",
197
158
  }));
198
- // ── LibreOffice soffice ─────────────────────────────────────────────────
159
+ // ── LibreOffice soffice (optional) ──────────────────────────────────────
160
+ // The default JSON read path (`wonder read <url>` / `--tab`) never touches
161
+ // soffice. It is only needed for (a) the optional xlsx visual-layout render
162
+ // step and (b) docx/pptx → PDF conversion inside the AI-side use skill.
163
+ // Mark as optional so a missing install does not make `wonder check` exit 1.
199
164
  items.push(checkExecutable("soffice", {
200
165
  darwin: "brew install --cask libreoffice",
201
166
  linux: "sudo apt install -y libreoffice (or: sudo dnf install -y libreoffice)",
202
167
  win32: "winget install LibreOffice.LibreOffice",
203
168
  default: "https://www.libreoffice.org/download",
204
- }));
169
+ },
170
+ /* optional */ true));
205
171
  // ── docx / pptx skill files ─────────────────────────────────────────────
206
- const homeForSkills = context.homeDir || homedir();
207
- items.push(checkSkillFile("docx", { home: homeForSkills }));
208
- items.push(checkSkillFile("pptx", { home: homeForSkills }));
172
+ // Phase 5: intentionally NOT checked here.
173
+ //
174
+ // wonder is a headless CLI; it does not know which AI client (Claude /
175
+ // Codex / Gemini / OpenClaw / …) is invoking it, and each client reads
176
+ // skills from its own `skillsDir` (~/.claude/skills, ~/.codex/skills,
177
+ // ~/.gemini/skills, etc). A wonder-side check that only looked under
178
+ // `~/.claude*/skills` would false-positive for users calling from Codex
179
+ // or Gemini. The dependable place for "does my AI know how to read
180
+ // .docx / .pptx?" is the `setup-5mghost-wonder` skill, which runs in
181
+ // the calling AI and can inspect its own skills directory.
209
182
  // ── Cache directory (informational) ─────────────────────────────────────
210
183
  const cache = describeCacheDir(paths.cacheDir);
211
184
  items.push({
@@ -119,6 +119,10 @@ export async function runReadCommand(args, context) {
119
119
  sourceUrl: url,
120
120
  cookies,
121
121
  saveDir: resolvedSaveDir,
122
+ onProgress: (pct) => {
123
+ // Progress → stderr only, so stdout stays valid JSON for pipes.
124
+ context.io.stderr(`Exporting… ${Math.max(1, Math.min(99, pct))}%`);
125
+ },
122
126
  });
123
127
  if (!noCache && tokValue) {
124
128
  try {
@@ -1,14 +1,87 @@
1
1
  // src/commands/uninstall.ts
2
- // Runs `npm uninstall -g @mkterswingman/5mghost-wonder`.
3
- // NpmExecutor is injectable for unit testing.
2
+ // Uninstall flow:
3
+ // 1. Remove installed wonder skills from every detected AI client via the
4
+ // same manifest used at install time. Receipt gating in `removeSkills`
5
+ // only deletes directories this package owns.
6
+ // 2. Run `npm uninstall -g @mkterswingman/5mghost-wonder`.
7
+ // 3. Print the location of the local data dir (cookies + export cache)
8
+ // and the exact command to remove it. We do NOT delete data
9
+ // automatically — cookies are still reusable if the user reinstalls.
10
+ import { resolveWonderPaths } from "../platform/paths.js";
4
11
  import { defaultNpmExecutor } from "../platform/npm.js";
12
+ import { fileURLToPath } from "node:url";
13
+ import { dirname, resolve } from "node:path";
5
14
  export async function runUninstallCommand(_argv, context, executor = defaultNpmExecutor) {
6
15
  context.io.stdout("Uninstalling 5mghost-wonder...");
16
+ // Step 1 — best-effort skill removal. Failures here do not block npm.
17
+ await removeInstalledSkills(context);
18
+ // Step 2 — npm uninstall.
7
19
  const result = executor(["uninstall", "-g", "@mkterswingman/5mghost-wonder"]);
8
20
  if (result.exitCode !== 0) {
9
21
  context.io.stderr(`Uninstall failed:\n${result.stderr}`);
10
22
  return { exitCode: 1 };
11
23
  }
24
+ // Step 3 — tell the user about residual data.
25
+ const paths = resolveWonderPaths({ homeDir: context.homeDir });
12
26
  context.io.stdout("Uninstalled successfully.");
27
+ context.io.stdout("");
28
+ context.io.stdout(`Local data kept at: ${paths.wonderDir}\n` +
29
+ ` (cookies + export cache; reused if you reinstall)\n` +
30
+ ` To remove manually: rm -rf ${paths.wonderDir}`);
13
31
  return { exitCode: 0 };
14
32
  }
33
+ async function removeInstalledSkills(context) {
34
+ let removeSkills;
35
+ let listDetectedAgents;
36
+ try {
37
+ ({ removeSkills, listDetectedAgents } = await import("@mkterswingman/5mghost-agent-skills"));
38
+ }
39
+ catch {
40
+ context.io.stdout(" (agent-skills SDK unavailable — skipping skill removal)");
41
+ return;
42
+ }
43
+ const manifestPath = findManifestPath();
44
+ if (!manifestPath) {
45
+ context.io.stdout(" (skills.manifest.json not found — skipping skill removal)");
46
+ return;
47
+ }
48
+ let agents;
49
+ try {
50
+ agents = listDetectedAgents();
51
+ }
52
+ catch {
53
+ context.io.stdout(" (could not detect AI clients — skipping skill removal)");
54
+ return;
55
+ }
56
+ if (!agents || agents.length === 0) {
57
+ context.io.stdout(" (no AI clients detected — skipping skill removal)");
58
+ return;
59
+ }
60
+ try {
61
+ const summary = removeSkills({ manifestPath, detectedAgents: agents });
62
+ const removed = summary.results.filter((r) => r.status === "removed");
63
+ if (removed.length > 0) {
64
+ const names = removed.map((r) => `${r.skill}@${r.agent}`).join(", ");
65
+ context.io.stdout(` removed skills: ${names}`);
66
+ }
67
+ else {
68
+ context.io.stdout(" no wonder-owned skills found in detected AI clients");
69
+ }
70
+ }
71
+ catch (err) {
72
+ context.io.stdout(` (skill removal failed: ${String(err)})`);
73
+ }
74
+ }
75
+ /**
76
+ * Resolve the package's `skills.manifest.json` from the compiled CLI.
77
+ * `dist/commands/uninstall.js` lives two levels below the package root.
78
+ */
79
+ function findManifestPath() {
80
+ try {
81
+ const here = dirname(fileURLToPath(import.meta.url));
82
+ return resolve(here, "..", "..", "skills.manifest.json");
83
+ }
84
+ catch {
85
+ return null;
86
+ }
87
+ }
@@ -1,6 +1,13 @@
1
1
  // src/platform/paths.ts
2
2
  // Resolves all filesystem paths used by wonder at runtime.
3
+ //
4
+ // Since Phase 5 all wonder runtime data lives under
5
+ // `~/.mkterswingman/5mghost-wonder/` to stay consistent with sibling
6
+ // packages (5mghost-insider, 5mghost-rover). Pre-Phase-5 installs had
7
+ // data under `~/.wonder/`; `migrateLegacyWonderDir()` handles that.
8
+ //
3
9
  // __dirname equivalent uses import.meta.url (NodeNext ESM).
10
+ import { existsSync, renameSync, mkdirSync, readdirSync } from "node:fs";
4
11
  import { fileURLToPath } from "url";
5
12
  import { dirname, resolve } from "path";
6
13
  const __filename = fileURLToPath(import.meta.url);
@@ -10,8 +17,10 @@ export function resolveWonderPaths(opts) {
10
17
  process.env["HOME"] ??
11
18
  process.env["USERPROFILE"] ??
12
19
  "";
13
- const wonderDir = resolve(homeDir, ".wonder");
14
- // At runtime, __dirname = dist/ (tsc output). The manifest lives one level up.
20
+ // Phase 5: ~/.mkterswingman/5mghost-wonder/ (aligned with insider / rover).
21
+ const wonderDir = resolve(homeDir, ".mkterswingman", "5mghost-wonder");
22
+ // At runtime, __dirname = dist/platform/ (tsc output). The manifest lives
23
+ // two levels up at dist/../skills.manifest.json.
15
24
  const skillsManifestPath = resolve(__dirname, "../skills.manifest.json");
16
25
  return {
17
26
  homeDir,
@@ -23,3 +32,63 @@ export function resolveWonderPaths(opts) {
23
32
  defaultSaveDir: resolve(homeDir, "Downloads", "5mghost-wonder"),
24
33
  };
25
34
  }
35
+ /**
36
+ * One-shot migration from pre-Phase-5 `~/.wonder/` to the aligned
37
+ * `~/.mkterswingman/5mghost-wonder/`. Idempotent: safe to call on every CLI
38
+ * invocation. Returns true when a migration actually happened.
39
+ *
40
+ * Two cases:
41
+ * - new dir absent → rename the whole legacy dir across.
42
+ * - new dir present (e.g. telemetry already created it) → move individual
43
+ * entries that don't yet exist in the new dir, skipping any conflicts to
44
+ * avoid clobbering newer data.
45
+ */
46
+ export function migrateLegacyWonderDir(opts) {
47
+ const homeDir = opts?.homeDir ??
48
+ process.env["HOME"] ??
49
+ process.env["USERPROFILE"] ??
50
+ "";
51
+ if (!homeDir)
52
+ return false;
53
+ const legacyDir = resolve(homeDir, ".wonder");
54
+ const newParent = resolve(homeDir, ".mkterswingman");
55
+ const newDir = resolve(newParent, "5mghost-wonder");
56
+ if (!existsSync(legacyDir))
57
+ return false;
58
+ try {
59
+ if (!existsSync(newDir)) {
60
+ mkdirSync(newParent, { recursive: true });
61
+ renameSync(legacyDir, newDir);
62
+ return true;
63
+ }
64
+ // New dir present: merge entry-by-entry.
65
+ let moved = false;
66
+ for (const entry of readdirSync(legacyDir)) {
67
+ const src = resolve(legacyDir, entry);
68
+ const dst = resolve(newDir, entry);
69
+ if (existsSync(dst))
70
+ continue; // conservative: keep newer data intact
71
+ try {
72
+ renameSync(src, dst);
73
+ moved = true;
74
+ }
75
+ catch {
76
+ /* skip per-entry failure */
77
+ }
78
+ }
79
+ // Best-effort cleanup of legacy dir if it's now empty.
80
+ try {
81
+ const remaining = readdirSync(legacyDir);
82
+ if (remaining.length === 0) {
83
+ renameSync(legacyDir, `${legacyDir}.migrated`);
84
+ }
85
+ }
86
+ catch {
87
+ /* ignore */
88
+ }
89
+ return moved;
90
+ }
91
+ catch {
92
+ return false;
93
+ }
94
+ }
@@ -7,7 +7,7 @@
7
7
  //
8
8
  // All three document types (sheet/doc/slide) share this same flow.
9
9
  // URL parsing is in ./url.ts; cookie management is in ./cookies.ts (P1-03).
10
- import { writeFileSync, mkdirSync } from "fs";
10
+ import { writeFileSync, mkdirSync, existsSync } from "fs";
11
11
  import { join } from "path";
12
12
  /** Error thrown by exportWecomDoc() */
13
13
  export class ExportError extends Error {
@@ -102,7 +102,7 @@ async function createExportTask(docId, cookieHeader, sourceUrl) {
102
102
  * - maxPollAttempts exceeded without Done
103
103
  * - Done but no file_url in response
104
104
  */
105
- async function pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, maxPollAttempts, pollIntervalMs) {
105
+ async function pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, maxPollAttempts, pollIntervalMs, onProgress) {
106
106
  // Use the supplied interval only when the caller passes a non-default value
107
107
  // (typical: a test pins a short, deterministic interval).
108
108
  const useFixedInterval = pollIntervalMs !== DEFAULT_POLL_INTERVAL_MS;
@@ -159,6 +159,13 @@ async function pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, ma
159
159
  }
160
160
  return { fileUrl, fileName };
161
161
  }
162
+ // Fire progress callback before the next sleep so users see movement.
163
+ if (onProgress && progress > 0) {
164
+ try {
165
+ onProgress(progress, attempt);
166
+ }
167
+ catch { /* never fatal */ }
168
+ }
162
169
  // status is still in-progress — continue polling
163
170
  }
164
171
  throw new ExportError("poll_timeout", `Export did not complete after ${maxPollAttempts} attempts (~${Math.round((maxPollAttempts * pollIntervalMs) / 1000)}s)`, { maxPollAttempts, pollIntervalMs });
@@ -195,7 +202,8 @@ async function downloadExportedFile(fileUrl, fileName, saveDir) {
195
202
  catch (err) {
196
203
  throw new ExportError("write_error", `Failed to create save directory "${saveDir}": ${String(err)}`, err);
197
204
  }
198
- const filePath = join(saveDir, fileName);
205
+ const safeName = sanitizeFilename(fileName);
206
+ const filePath = pickNonClobberingPath(saveDir, safeName);
199
207
  try {
200
208
  writeFileSync(filePath, Buffer.from(buffer));
201
209
  }
@@ -204,6 +212,43 @@ async function downloadExportedFile(fileUrl, fileName, saveDir) {
204
212
  }
205
213
  return { filePath, fileSizeBytes: buffer.byteLength };
206
214
  }
215
+ /**
216
+ * Strip any directory components and disallow control characters. The upstream
217
+ * `fileName` is untrusted: a `../../etc/passwd` or `foo/bar.xlsx` value would
218
+ * otherwise escape `saveDir` via `path.join`. We keep it minimal — basename
219
+ * only, and drop control characters — rather than a full allow-list that
220
+ * would mangle Chinese titles.
221
+ */
222
+ function sanitizeFilename(fileName) {
223
+ // Strip leading path segments; handles both POSIX and win32 separators.
224
+ let name = fileName.split(/[\\/]/).pop() ?? "";
225
+ // Drop control chars (0x00-0x1F, 0x7F).
226
+ // eslint-disable-next-line no-control-regex
227
+ name = name.replace(/[\x00-\x1f\x7f]/g, "");
228
+ name = name.trim();
229
+ if (name === "" || name === "." || name === "..") {
230
+ return "wecom-download";
231
+ }
232
+ return name;
233
+ }
234
+ /**
235
+ * Return `join(saveDir, name)` unless that exists, in which case append
236
+ * ` (2)`, ` (3)`, … before the extension. Never overwrites existing files.
237
+ */
238
+ function pickNonClobberingPath(saveDir, name) {
239
+ const base = join(saveDir, name);
240
+ if (!existsSync(base))
241
+ return base;
242
+ const dotIdx = name.lastIndexOf(".");
243
+ const stem = dotIdx > 0 ? name.slice(0, dotIdx) : name;
244
+ const ext = dotIdx > 0 ? name.slice(dotIdx) : "";
245
+ for (let i = 2; i < 1000; i++) {
246
+ const candidate = join(saveDir, `${stem} (${i})${ext}`);
247
+ if (!existsSync(candidate))
248
+ return candidate;
249
+ }
250
+ return join(saveDir, `${stem} (${Date.now()})${ext}`);
251
+ }
207
252
  // ---------------------------------------------------------------------------
208
253
  // Public API
209
254
  // ---------------------------------------------------------------------------
@@ -223,13 +268,13 @@ const DEFAULT_POLL_INTERVAL_MS = 2000;
223
268
  * @throws ExportError with a typed `kind` field on any failure
224
269
  */
225
270
  export async function exportWecomDoc(input) {
226
- const { docId, docType, sourceUrl, cookies, saveDir, maxPollAttempts = DEFAULT_MAX_POLL_ATTEMPTS, pollIntervalMs = DEFAULT_POLL_INTERVAL_MS, } = input;
271
+ const { docId, docType, sourceUrl, cookies, saveDir, maxPollAttempts = DEFAULT_MAX_POLL_ATTEMPTS, pollIntervalMs = DEFAULT_POLL_INTERVAL_MS, onProgress, } = input;
227
272
  const cookieHeader = buildCookieHeader(cookies);
228
273
  const xsrf = buildXsrfToken(cookies);
229
274
  // Step 1: Create export task
230
275
  const operationId = await createExportTask(docId, cookieHeader, sourceUrl);
231
276
  // Step 2: Poll until Done
232
- const { fileUrl, fileName } = await pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, maxPollAttempts, pollIntervalMs);
277
+ const { fileUrl, fileName } = await pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, maxPollAttempts, pollIntervalMs, onProgress);
233
278
  // Step 3: Download
234
279
  const { filePath, fileSizeBytes } = await downloadExportedFile(fileUrl, fileName, saveDir);
235
280
  return { filePath, fileName, fileSizeBytes, docType };
@@ -1,39 +1,27 @@
1
1
  // src/xlsx/sheet.ts
2
- // Parses xl/worksheets/sheet{N}.xml.
3
- // Outputs a sparse cell array, merge ranges, and bounding-box dimensions.
4
- // Dependencies are injected: sharedStrings[] from shared-strings.ts,
5
- // getFormatCode() from styles.ts. This module does not read files.
6
- import { XMLParser } from "fast-xml-parser";
2
+ // Parses xl/worksheets/sheet{N}.xml via a streaming SAX parser (saxes).
3
+ //
4
+ // Why SAX: large WeCom xlsx exports (hundreds of MB) can produce tens of MB
5
+ // of sheet.xml, which under the previous fast-xml-parser DOM approach blew
6
+ // up peak memory to 1–2 GB. SAX keeps peak heap flat (~tens of MB) because
7
+ // we never materialise the whole parse tree — we build the output cell
8
+ // array as we go and forget everything else.
9
+ //
10
+ // The CLI JSON output is byte-for-byte identical to the prior DOM
11
+ // implementation (verified by the Phase 5 test fixtures). Dependencies are
12
+ // still injected: `sharedStrings[]` from shared-strings.ts and
13
+ // `getFormatCode()` from styles.ts.
14
+ import { SaxesParser } from "saxes";
7
15
  // ---------------------------------------------------------------------------
8
- // XMLParser (module-level singleton stateless, safe to reuse)
16
+ // Cell ref helpers (pure functions, reused)
9
17
  // ---------------------------------------------------------------------------
10
- const parser = new XMLParser({
11
- ignoreAttributes: false,
12
- attributeNamePrefix: "@_",
13
- // Force arrays to eliminate single-node vs. array ambiguity.
14
- isArray: (name) => name === "row" || name === "c" || name === "mergeCell" || name === "r",
15
- // Keep all values as strings; we convert manually based on the t attribute.
16
- parseTagValue: false,
17
- // Preserve whitespace (cells may contain spaces).
18
- trimValues: false,
19
- });
20
- // ---------------------------------------------------------------------------
21
- // Internal helpers
22
- // ---------------------------------------------------------------------------
23
- /**
24
- * Column letters (uppercase) → 0-based column index.
25
- * A→0, Z→25, AA→26, AZ→51, BA→52.
26
- */
27
18
  export function colLettersToIndex(letters) {
28
19
  let result = 0;
29
20
  for (let i = 0; i < letters.length; i++) {
30
- result = result * 26 + (letters.charCodeAt(i) - 64); // 'A'=65 → 1
21
+ result = result * 26 + (letters.charCodeAt(i) - 64);
31
22
  }
32
- return result - 1; // convert to 0-based
23
+ return result - 1;
33
24
  }
34
- /**
35
- * "B5" → { row: 4, col: 1 } (0-based).
36
- */
37
25
  export function parseCellRef(ref) {
38
26
  const match = /^([A-Z]+)(\d+)$/.exec(ref);
39
27
  if (!match)
@@ -43,9 +31,6 @@ export function parseCellRef(ref) {
43
31
  row: Number(match[2]) - 1,
44
32
  };
45
33
  }
46
- /**
47
- * "A1:C3" → MergeRange (0-based).
48
- */
49
34
  function parseMergeRef(ref) {
50
35
  const sep = ref.indexOf(":");
51
36
  if (sep === -1)
@@ -54,62 +39,174 @@ function parseMergeRef(ref) {
54
39
  const e = parseCellRef(ref.slice(sep + 1));
55
40
  return { startRow: s.row, startCol: s.col, endRow: e.row, endCol: e.col };
56
41
  }
57
- /**
58
- * Extract text from an <is> node (inlineStr).
59
- * Handles simple <is><t>text</t></is> and rich-text <is><r><t>part</t></r>…</is>.
60
- */
61
- function extractInlineStr(is) {
62
- if (!is)
63
- return "";
64
- if (is.r) {
65
- const runs = Array.isArray(is.r) ? is.r : [is.r];
66
- return runs.map((r) => String(r?.t ?? "")).join("");
67
- }
68
- return String(is.t ?? "");
42
+ // ---------------------------------------------------------------------------
43
+ // SAX-based parseSheet
44
+ // ---------------------------------------------------------------------------
45
+ export function parseSheet(xml, sharedStrings, getFormatCode) {
46
+ const parser = new SaxesParser({ xmlns: false });
47
+ const cells = [];
48
+ const merges = [];
49
+ let maxRow = 0;
50
+ let maxCol = 0;
51
+ // Cell-building state (resets on every <c>…</c>).
52
+ let active = false;
53
+ let cRef = "";
54
+ let cType = ""; // "" | "s" | "str" | "inlineStr" | "b" | "n" | "e"
55
+ let cStyleIdx = -1;
56
+ let inV = false;
57
+ let vBuf = "";
58
+ let inIs = false;
59
+ let inT = false;
60
+ let isBuf = "";
61
+ let tBuf = "";
62
+ parser.on("opentag", (tag) => {
63
+ switch (tag.name) {
64
+ case "c": {
65
+ active = true;
66
+ cRef = tag.attributes["r"] ?? "";
67
+ cType = tag.attributes["t"] ?? "";
68
+ const sAttr = tag.attributes["s"];
69
+ cStyleIdx = sAttr != null ? Number(sAttr) : -1;
70
+ vBuf = "";
71
+ isBuf = "";
72
+ inV = false;
73
+ inIs = false;
74
+ inT = false;
75
+ tBuf = "";
76
+ break;
77
+ }
78
+ case "v": {
79
+ if (active) {
80
+ inV = true;
81
+ vBuf = "";
82
+ }
83
+ break;
84
+ }
85
+ case "is": {
86
+ if (active)
87
+ inIs = true;
88
+ break;
89
+ }
90
+ case "t": {
91
+ if (active && inIs) {
92
+ inT = true;
93
+ tBuf = "";
94
+ }
95
+ break;
96
+ }
97
+ case "mergeCell": {
98
+ const ref = tag.attributes["ref"];
99
+ if (ref) {
100
+ try {
101
+ merges.push(parseMergeRef(ref));
102
+ }
103
+ catch {
104
+ /* malformed ref — skip */
105
+ }
106
+ }
107
+ break;
108
+ }
109
+ default:
110
+ break;
111
+ }
112
+ });
113
+ parser.on("text", (text) => {
114
+ if (inT)
115
+ tBuf += text;
116
+ else if (inV)
117
+ vBuf += text;
118
+ });
119
+ parser.on("cdata", (cdata) => {
120
+ if (inT)
121
+ tBuf += cdata;
122
+ else if (inV)
123
+ vBuf += cdata;
124
+ });
125
+ parser.on("closetag", (tag) => {
126
+ switch (tag.name) {
127
+ case "t": {
128
+ if (inT) {
129
+ isBuf += tBuf;
130
+ tBuf = "";
131
+ inT = false;
132
+ }
133
+ break;
134
+ }
135
+ case "is": {
136
+ inIs = false;
137
+ break;
138
+ }
139
+ case "v": {
140
+ inV = false;
141
+ break;
142
+ }
143
+ case "c": {
144
+ if (active && cRef) {
145
+ const cell = buildCell(cRef, cType, cStyleIdx, vBuf, isBuf, sharedStrings, getFormatCode);
146
+ if (cell) {
147
+ cells.push(cell);
148
+ if (cell.row > maxRow)
149
+ maxRow = cell.row;
150
+ if (cell.col > maxCol)
151
+ maxCol = cell.col;
152
+ }
153
+ }
154
+ active = false;
155
+ cRef = "";
156
+ cType = "";
157
+ cStyleIdx = -1;
158
+ vBuf = "";
159
+ isBuf = "";
160
+ inV = false;
161
+ inIs = false;
162
+ inT = false;
163
+ tBuf = "";
164
+ break;
165
+ }
166
+ default:
167
+ break;
168
+ }
169
+ });
170
+ let parseError = null;
171
+ parser.on("error", (err) => {
172
+ if (!parseError)
173
+ parseError = err;
174
+ });
175
+ parser.write(xml).close();
176
+ if (parseError)
177
+ throw parseError;
178
+ return { cells, merges, maxRow, maxCol };
69
179
  }
70
180
  /**
71
- * Extract a SheetCell from a single <c> node.
72
- * Returns null when the cell should be skipped (error, no value, no ref).
181
+ * Mirror of the former DOM extractCellValue() keeps output byte-identical.
73
182
  */
74
- function extractCellValue(c, sharedStrings, getFormatCode) {
75
- const ref = c["@_r"];
76
- if (!ref)
77
- return null;
183
+ function buildCell(ref, cType, cStyleIdx, vRaw, isText, sharedStrings, getFormatCode) {
78
184
  const { row, col } = parseCellRef(ref);
79
- const t = c["@_t"] ?? ""; // cell type; default = numeric
80
- const sAttr = c["@_s"] != null ? Number(c["@_s"]) : -1;
81
- // inlineStr: text lives in <is>, no <v>
82
- if (t === "inlineStr") {
83
- return { row, col, text: extractInlineStr(c.is) };
185
+ if (cType === "inlineStr") {
186
+ return { row, col, text: isText };
84
187
  }
85
- // Error cells: skip entirely
86
- if (t === "e")
188
+ if (cType === "e")
189
+ return null;
190
+ if (vRaw === "")
87
191
  return null;
88
- // All other types require <v>
89
- const vRaw = c.v != null ? String(c.v) : undefined;
90
- if (vRaw === undefined)
91
- return null; // formula not calculated or empty cell
92
- switch (t) {
192
+ switch (cType) {
93
193
  case "s": {
94
- // Shared string: <v> is the index into sharedStrings[]
95
194
  const text = sharedStrings[Number(vRaw)] ?? "";
96
195
  return { row, col, text };
97
196
  }
98
197
  case "str": {
99
- // Formula result that is a string: <v> is the literal text
100
198
  return { row, col, text: vRaw };
101
199
  }
102
200
  case "b": {
103
201
  return { row, col, text: vRaw === "1" ? "TRUE" : "FALSE" };
104
202
  }
105
203
  default: {
106
- // Numeric (t="n" or absent t)
107
204
  const value = Number(vRaw);
108
205
  if (isNaN(value))
109
206
  return null;
110
207
  const cell = { row, col, value };
111
- if (sAttr >= 0) {
112
- const format = getFormatCode(sAttr);
208
+ if (cStyleIdx >= 0) {
209
+ const format = getFormatCode(cStyleIdx);
113
210
  if (format !== undefined)
114
211
  cell.format = format;
115
212
  }
@@ -117,45 +214,3 @@ function extractCellValue(c, sharedStrings, getFormatCode) {
117
214
  }
118
215
  }
119
216
  }
120
- // ---------------------------------------------------------------------------
121
- // Main export
122
- // ---------------------------------------------------------------------------
123
- export function parseSheet(xml, sharedStrings, getFormatCode) {
124
- const doc = parser.parse(xml);
125
- const ws = doc?.worksheet;
126
- if (!ws)
127
- return { cells: [], merges: [], maxRow: 0, maxCol: 0 };
128
- // 1. Walk rows → cells
129
- const cells = [];
130
- let maxRow = 0;
131
- let maxCol = 0;
132
- const rows = ws.sheetData?.row ?? [];
133
- for (const row of rows) {
134
- const cList = row.c ?? [];
135
- for (const c of cList) {
136
- const cell = extractCellValue(c, sharedStrings, getFormatCode);
137
- if (cell === null)
138
- continue;
139
- cells.push(cell);
140
- if (cell.row > maxRow)
141
- maxRow = cell.row;
142
- if (cell.col > maxCol)
143
- maxCol = cell.col;
144
- }
145
- }
146
- // 2. Parse mergeCells
147
- const merges = [];
148
- const mergeCellList = ws.mergeCells?.mergeCell ?? [];
149
- for (const mc of mergeCellList) {
150
- const ref = mc["@_ref"];
151
- if (!ref)
152
- continue;
153
- try {
154
- merges.push(parseMergeRef(ref));
155
- }
156
- catch {
157
- // Malformed ref: skip without breaking overall parse
158
- }
159
- }
160
- return { cells, merges, maxRow, maxCol };
161
- }
package/package.json CHANGED
@@ -1,10 +1,14 @@
1
1
  {
2
2
  "name": "@mkterswingman/5mghost-wonder",
3
- "version": "0.0.1",
3
+ "version": "0.0.3",
4
4
  "description": "企微文档读取 CLI — WeCom document reader",
5
5
  "type": "module",
6
- "engines": { "node": ">=18" },
7
- "bin": { "wonder": "./dist/cli.js" },
6
+ "engines": {
7
+ "node": ">=18"
8
+ },
9
+ "bin": {
10
+ "wonder": "./dist/cli.js"
11
+ },
8
12
  "publishConfig": {
9
13
  "access": "public"
10
14
  },
@@ -21,17 +25,18 @@
21
25
  "scripts": {
22
26
  "build": "rm -rf dist && tsc && chmod +x dist/cli.js",
23
27
  "typecheck": "tsc --noEmit",
24
- "test": "node dist/wecom/url.test.js",
28
+ "test": "node dist/wecom/url.test.js && node --test tests/sheet-parity.test.mjs && node --test tests/export-sanitize.test.mjs",
25
29
  "smoke": "npm run build && node dist/cli.js help > /dev/null",
26
30
  "postinstall": "node scripts/postinstall.mjs"
27
31
  },
28
32
  "dependencies": {
33
+ "@mkterswingman/5mghost-agent-skills": "^0.0.1",
29
34
  "@mkterswingman/5mghost-auth": "^0.0.1",
30
35
  "@mkterswingman/5mghost-telemetry": "^0.0.1",
31
- "@mkterswingman/5mghost-agent-skills": "^0.0.1",
32
- "ws": "^8.18.0",
33
36
  "fast-xml-parser": "^4.5.0",
34
- "jszip": "^3.10.1"
37
+ "jszip": "^3.10.1",
38
+ "saxes": "^6.0.0",
39
+ "ws": "^8.18.0"
35
40
  },
36
41
  "devDependencies": {
37
42
  "@types/node": "^22.0.0",
@@ -81,62 +81,62 @@ wonder wecom status
81
81
 
82
82
  ---
83
83
 
84
- ## Step 4 — Detect and install docx skill
84
+ ## Step 4 — Detect and install docx skill (for the AI you are running in)
85
85
 
86
- Check plugin cache first:
86
+ **Important:** `wonder check` does NOT check for docx/pptx skills — wonder
87
+ is a CLI and does not know which AI client is invoking it. Each AI reads
88
+ skills from its own directory. As the AI running this setup, check
89
+ *your own* skills directory.
87
90
 
88
- ```bash
89
- ls ~/.claude-internal/plugins/cache/anthropic-agent-skills/document-skills/*/skills/docx/SKILL.md 2>/dev/null | head -1
90
- ```
91
+ Skills directory by AI client:
91
92
 
92
- Check user skills:
93
+ | AI client | Skills directory |
94
+ |------------------------|---------------------------------|
95
+ | Claude Code | `~/.claude/skills/` |
96
+ | Claude Code (internal) | `~/.claude-internal/skills/` |
97
+ | Codex CLI | `~/.codex/skills/` |
98
+ | Codex CLI (internal) | `~/.codex-internal/skills/` |
99
+ | Gemini CLI | `~/.gemini/skills/` |
100
+ | Gemini CLI (internal) | `~/.gemini-internal/skills/` |
101
+ | OpenClaw | `~/.openclaw/skills/` |
102
+ | WorkBuddy | `~/.workbuddy/skills/` |
103
+ | CodeBuddy | `~/.codebuddy/skills/` |
104
+
105
+ Substitute `<SKILLS_DIR>` with your own row. Claude Code variants may also
106
+ carry the skill via the plugin cache — check that first:
93
107
 
94
108
  ```bash
95
- ls ~/.claude-internal/skills/docx/SKILL.md 2>/dev/null
109
+ ls ~/.claude-internal/plugins/cache/anthropic-agent-skills/*/skills/docx/SKILL.md 2>/dev/null | head -1
110
+ ls <SKILLS_DIR>/docx/SKILL.md 2>/dev/null
96
111
  ```
97
112
 
98
113
  **If neither exists**, download from GitHub:
99
114
 
100
115
  ```bash
101
- mkdir -p ~/.claude-internal/skills/docx
116
+ mkdir -p <SKILLS_DIR>/docx
102
117
  curl -fsSL https://raw.githubusercontent.com/anthropics/skills/main/skills/docx/SKILL.md \
103
- -o ~/.claude-internal/skills/docx/SKILL.md
104
- ```
105
-
106
- Confirm:
107
-
108
- ```bash
109
- ls -lh ~/.claude-internal/skills/docx/SKILL.md
118
+ -o <SKILLS_DIR>/docx/SKILL.md
119
+ ls -lh <SKILLS_DIR>/docx/SKILL.md
110
120
  ```
111
121
 
112
122
  ---
113
123
 
114
- ## Step 5 — Detect and install pptx skill
115
-
116
- Check plugin cache first:
124
+ ## Step 5 — Detect and install pptx skill (for the AI you are running in)
117
125
 
118
- ```bash
119
- ls ~/.claude-internal/plugins/cache/anthropic-agent-skills/document-skills/*/skills/pptx/SKILL.md 2>/dev/null | head -1
120
- ```
121
-
122
- Check user skills:
126
+ Same pattern — substitute `<SKILLS_DIR>`:
123
127
 
124
128
  ```bash
125
- ls ~/.claude-internal/skills/pptx/SKILL.md 2>/dev/null
129
+ ls ~/.claude-internal/plugins/cache/anthropic-agent-skills/*/skills/pptx/SKILL.md 2>/dev/null | head -1
130
+ ls <SKILLS_DIR>/pptx/SKILL.md 2>/dev/null
126
131
  ```
127
132
 
128
- **If neither exists**, download from GitHub:
133
+ **If missing:**
129
134
 
130
135
  ```bash
131
- mkdir -p ~/.claude-internal/skills/pptx
136
+ mkdir -p <SKILLS_DIR>/pptx
132
137
  curl -fsSL https://raw.githubusercontent.com/anthropics/skills/main/skills/pptx/SKILL.md \
133
- -o ~/.claude-internal/skills/pptx/SKILL.md
134
- ```
135
-
136
- Confirm:
137
-
138
- ```bash
139
- ls -lh ~/.claude-internal/skills/pptx/SKILL.md
138
+ -o <SKILLS_DIR>/pptx/SKILL.md
139
+ ls -lh <SKILLS_DIR>/pptx/SKILL.md
140
140
  ```
141
141
 
142
142
  ---
@@ -106,6 +106,28 @@ Read("/Users/<you>/Downloads/5mghost-wonder/media/image3.png")
106
106
 
107
107
  **Note:** Images are full-resolution originals (up to several MB each). Only load images the user specifically asks about.
108
108
 
109
+ ### Viewing visual layout (optional)
110
+
111
+ Use when the cell JSON alone can't answer the question because the sheet's meaning comes from **visual structure** — not from the cell values themselves. Typical signals:
112
+
113
+ - Gantt chart (date columns × task rows, coloured blocks across cell ranges)
114
+ - Calendar (week grid with merged day cells or coloured categories)
115
+ - Status board / roadmap (colour-coded cells indicating stage, owner, priority)
116
+ - Large merge-to-cell ratio in the JSON (`merges.length` is a non-trivial fraction of `cells.length`)
117
+ - User explicitly asks about "how it looks", "颜色", "排版", "这个图表", "这张表的结构"
118
+
119
+ Do **not** run render for plain data tables, lookup sheets, or when the user just wants a value. The render costs ~30 s and ~10+ MB of PDF per file.
120
+
121
+ Render the whole xlsx (one PDF page per tab, preserves layout, merges, fills, borders):
122
+
123
+ ```bash
124
+ soffice --headless \
125
+ --convert-to 'pdf:calc_pdf_Export:{"SinglePageSheets":{"type":"boolean","value":"true"}}' \
126
+ --outdir /tmp/ <path-to-downloaded-xlsx>
127
+ ```
128
+
129
+ Then use the Read tool on the generated PDF. Page N corresponds to the Nth tab in workbook order (same as `tabs[]` in the metadata output).
130
+
109
131
  ---
110
132
 
111
133
  ## docx Workflow (`doc/w3_`, `doc/e2_`)
@@ -221,6 +243,7 @@ for slide in prs.slides:
221
243
  | pptx slice crash | `prs.slides[:N]` → `AttributeError: 'list' object has no attribute 'rId'` | Use `for slide in prs.slides` |
222
244
  | Cookie expiry | Cookie valid for 7–30 days | Run `wonder wecom cookie` to refresh |
223
245
  | xlsx images are full-size | Original images can be up to 6 MB each | Only read images when user specifically needs them |
246
+ | xlsx visual layout needs soffice | Gantt/calendar/coloured boards lose meaning in JSON alone | Run the optional soffice render step in the xlsx section; CLI does not auto-render |
224
247
  | smartpage unsupported | Export API returns 0% progress forever | Manual browser export |
225
248
 
226
249
  ---