npm - @mkterswingman/5mghost-wonder - Versions diffs - 0.0.1 → 0.0.3 - Mend

@mkterswingman/5mghost-wonder 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/cli.js +9 -1
package/dist/commands/check.js +27 -54
package/dist/commands/read.js +4 -0
package/dist/commands/uninstall.js +75 -2
package/dist/platform/paths.js +71 -2
package/dist/wecom/export.js +50 -5
package/dist/xlsx/sheet.js +163 -108
package/package.json +12 -7
package/skills/setup-5mghost-wonder/SKILL.md +33 -33
package/skills/use-5mghost-wonder/SKILL.md +23 -0

package/dist/cli.js CHANGED Viewed

@@ -2,7 +2,7 @@
 // src/cli.ts
 // Main entry point for the `wonder` CLI.
 // Telemetry wired by P1-05. Auth wired by P1-04.
-import { resolveWonderPaths } from "./platform/paths.js";
+import { resolveWonderPaths, migrateLegacyWonderDir } from "./platform/paths.js";
 import { dispatchWonderCommand } from "./commands/index.js";
 import { runHelpCommand } from "./commands/help.js";
 import { createWonderTelemetryRuntime } from "./telemetry/runtime.js";
@@ -19,6 +19,14 @@ process.on("unhandledRejection", (reason) => {
     process.exit(1);
 });
 const argv = process.argv.slice(2);
+// Phase 5 migration: move pre-Phase-5 `~/.wonder/` contents into the aligned
+// `~/.mkterswingman/5mghost-wonder/`. Idempotent and silent on no-op.
+try {
+    migrateLegacyWonderDir();
+}
+catch {
+    // Migration failure must never break the CLI startup.
+}
 const paths = resolveWonderPaths();
 const io = {
     stdout: (m) => process.stdout.write(m + "\n"),

package/dist/commands/check.js CHANGED Viewed

@@ -5,7 +5,9 @@
 //   - auth: mkterswingman auth JWT/PAT
 //   - wecom-cookie: WeCom cookies.json presence + live validity probe
 //   - pandoc: CLI on PATH (used by use-skill for docx/pptx text)
-//   - soffice: CLI on PATH + real executable (broken symlinks are rejected)
+//   - soffice: CLI on PATH + real executable (optional — only the visual-
+//     layout xlsx render and docx/pptx PDF conversion need it; the default
+//     JSON read path does not shell out to soffice)
 //   - docx-skill: docx SKILL.md present in plugin cache or user skills dir
 //   - pptx-skill: pptx SKILL.md present in plugin cache or user skills dir
 //   - cache: local export cache directory state (informational)
@@ -14,10 +16,9 @@
 // Each check produces { label, ok, hint? }. The command exits 0 iff all
 // required checks pass. docx/pptx skills are optional because some consumers
 // (raw JSON, non-Claude AI clients) do not need the Anthropic bundled skills.
-import { accessSync, constants, existsSync, readdirSync, realpathSync, statSync } from "node:fs";
-import { delimiter, join, resolve } from "node:path";
+import { accessSync, constants, existsSync, realpathSync, statSync } from "node:fs";
+import { delimiter, resolve } from "node:path";
 import { spawnSync } from "node:child_process";
-import { homedir } from "node:os";
 import { resolveWonderPaths } from "../platform/paths.js";
 import { getCookieStatus } from "../wecom/cookies.js";
 import { describeCacheDir } from "../wecom/cache.js";
@@ -42,12 +43,13 @@ function findOnPath(binName) {
     }
     return null;
 }
-function checkExecutable(binName, installHints) {
+function checkExecutable(binName, installHints, optional = false) {
     const found = findOnPath(binName);
     if (!found) {
         return {
             label: binName,
             ok: false,
+            optional,
             hint: installHints[process.platform] ?? installHints["default"] ?? `install ${binName}`,
         };
     }
@@ -61,6 +63,7 @@ function checkExecutable(binName, installHints) {
         return {
             label: binName,
             ok: false,
+            optional,
             hint: `${binName} is a broken symlink at ${found}. Reinstall: ${installHints[process.platform] ?? installHints["default"]}`,
             detail: found,
         };
@@ -74,6 +77,7 @@ function checkExecutable(binName, installHints) {
         return {
             label: binName,
             ok: false,
+            optional,
             hint: `${binName} is on PATH (${found}) but failed to execute. Reinstall: ${installHints[process.platform] ?? installHints["default"]}`,
             detail: `realpath=${realTarget}`,
         };
@@ -84,50 +88,7 @@ function checkExecutable(binName, installHints) {
         detail: found === realTarget ? found : `${found} → ${realTarget}`,
     };
 }
-function checkSkillFile(skillName, opts) {
-    // Check plugin cache first (Anthropic-bundled skills)
-    const pluginGlob = join(opts.home, ".claude-internal", "plugins", "cache", "anthropic-agent-skills");
-    let foundPluginVersion = null;
-    if (existsSync(pluginGlob)) {
-        try {
-            for (const entry of readdirSync(pluginGlob, { withFileTypes: true })) {
-                if (!entry.isDirectory())
-                    continue;
-                const candidate = join(pluginGlob, entry.name, "skills", skillName, "SKILL.md");
-                if (existsSync(candidate)) {
-                    foundPluginVersion = entry.name;
-                    break;
-                }
-            }
-        }
-        catch {
-            /* fall through to user skills */
-        }
-    }
-    if (foundPluginVersion) {
-        return {
-            label: `${skillName}-skill`,
-            ok: true,
-            detail: `plugin cache (${foundPluginVersion})`,
-        };
-    }
-    // User-installed skills (both -internal and plain variants)
-    for (const claudeRoot of [".claude-internal", ".claude"]) {
-        const userSkill = join(opts.home, claudeRoot, "skills", skillName, "SKILL.md");
-        if (existsSync(userSkill)) {
-            return { label: `${skillName}-skill`, ok: true, detail: userSkill };
-        }
-    }
-    return {
-        label: `${skillName}-skill`,
-        ok: false,
-        optional: true,
-        hint: `For Claude consumers that read .${skillName}: ` +
-            `mkdir -p ~/.claude-internal/skills/${skillName} && ` +
-            `curl -fsSL https://raw.githubusercontent.com/anthropics/skills/main/skills/${skillName}/SKILL.md ` +
-            `-o ~/.claude-internal/skills/${skillName}/SKILL.md`,
-    };
-}
+// (checkSkillFile removed in Phase 5 — see the note at the check call site.)
 export async function runCheckCommand(_argv, context) {
     const paths = resolveWonderPaths({ homeDir: context.homeDir });
     const items = [];
@@ -195,17 +156,29 @@ export async function runCheckCommand(_argv, context) {
         win32: "winget install pandoc",
         default: "https://pandoc.org/installing.html",
     }));
-    // ── LibreOffice soffice ─────────────────────────────────────────────────
+    // ── LibreOffice soffice (optional) ──────────────────────────────────────
+    // The default JSON read path (`wonder read <url>` / `--tab`) never touches
+    // soffice. It is only needed for (a) the optional xlsx visual-layout render
+    // step and (b) docx/pptx → PDF conversion inside the AI-side use skill.
+    // Mark as optional so a missing install does not make `wonder check` exit 1.
     items.push(checkExecutable("soffice", {
         darwin: "brew install --cask libreoffice",
         linux: "sudo apt install -y libreoffice  (or: sudo dnf install -y libreoffice)",
         win32: "winget install LibreOffice.LibreOffice",
         default: "https://www.libreoffice.org/download",
-    }));
+    },
+    /* optional */ true));
     // ── docx / pptx skill files ─────────────────────────────────────────────
-    const homeForSkills = context.homeDir || homedir();
-    items.push(checkSkillFile("docx", { home: homeForSkills }));
-    items.push(checkSkillFile("pptx", { home: homeForSkills }));
+    // Phase 5: intentionally NOT checked here.
+    //
+    // wonder is a headless CLI; it does not know which AI client (Claude /
+    // Codex / Gemini / OpenClaw / …) is invoking it, and each client reads
+    // skills from its own `skillsDir` (~/.claude/skills, ~/.codex/skills,
+    // ~/.gemini/skills, etc). A wonder-side check that only looked under
+    // `~/.claude*/skills` would false-positive for users calling from Codex
+    // or Gemini. The dependable place for "does my AI know how to read
+    // .docx / .pptx?" is the `setup-5mghost-wonder` skill, which runs in
+    // the calling AI and can inspect its own skills directory.
     // ── Cache directory (informational) ─────────────────────────────────────
     const cache = describeCacheDir(paths.cacheDir);
     items.push({

package/dist/commands/read.js CHANGED Viewed

@@ -119,6 +119,10 @@ export async function runReadCommand(args, context) {
                 sourceUrl: url,
                 cookies,
                 saveDir: resolvedSaveDir,
+                onProgress: (pct) => {
+                    // Progress → stderr only, so stdout stays valid JSON for pipes.
+                    context.io.stderr(`Exporting… ${Math.max(1, Math.min(99, pct))}%`);
+                },
             });
             if (!noCache && tokValue) {
                 try {

package/dist/commands/uninstall.js CHANGED Viewed

@@ -1,14 +1,87 @@
 // src/commands/uninstall.ts
-// Runs `npm uninstall -g @mkterswingman/5mghost-wonder`.
-// NpmExecutor is injectable for unit testing.
+// Uninstall flow:
+//   1. Remove installed wonder skills from every detected AI client via the
+//      same manifest used at install time. Receipt gating in `removeSkills`
+//      only deletes directories this package owns.
+//   2. Run `npm uninstall -g @mkterswingman/5mghost-wonder`.
+//   3. Print the location of the local data dir (cookies + export cache)
+//      and the exact command to remove it. We do NOT delete data
+//      automatically — cookies are still reusable if the user reinstalls.
+import { resolveWonderPaths } from "../platform/paths.js";
 import { defaultNpmExecutor } from "../platform/npm.js";
+import { fileURLToPath } from "node:url";
+import { dirname, resolve } from "node:path";
 export async function runUninstallCommand(_argv, context, executor = defaultNpmExecutor) {
     context.io.stdout("Uninstalling 5mghost-wonder...");
+    // Step 1 — best-effort skill removal. Failures here do not block npm.
+    await removeInstalledSkills(context);
+    // Step 2 — npm uninstall.
     const result = executor(["uninstall", "-g", "@mkterswingman/5mghost-wonder"]);
     if (result.exitCode !== 0) {
         context.io.stderr(`Uninstall failed:\n${result.stderr}`);
         return { exitCode: 1 };
     }
+    // Step 3 — tell the user about residual data.
+    const paths = resolveWonderPaths({ homeDir: context.homeDir });
     context.io.stdout("Uninstalled successfully.");
+    context.io.stdout("");
+    context.io.stdout(`Local data kept at: ${paths.wonderDir}\n` +
+        `  (cookies + export cache; reused if you reinstall)\n` +
+        `  To remove manually:  rm -rf ${paths.wonderDir}`);
     return { exitCode: 0 };
 }
+async function removeInstalledSkills(context) {
+    let removeSkills;
+    let listDetectedAgents;
+    try {
+        ({ removeSkills, listDetectedAgents } = await import("@mkterswingman/5mghost-agent-skills"));
+    }
+    catch {
+        context.io.stdout("  (agent-skills SDK unavailable — skipping skill removal)");
+        return;
+    }
+    const manifestPath = findManifestPath();
+    if (!manifestPath) {
+        context.io.stdout("  (skills.manifest.json not found — skipping skill removal)");
+        return;
+    }
+    let agents;
+    try {
+        agents = listDetectedAgents();
+    }
+    catch {
+        context.io.stdout("  (could not detect AI clients — skipping skill removal)");
+        return;
+    }
+    if (!agents || agents.length === 0) {
+        context.io.stdout("  (no AI clients detected — skipping skill removal)");
+        return;
+    }
+    try {
+        const summary = removeSkills({ manifestPath, detectedAgents: agents });
+        const removed = summary.results.filter((r) => r.status === "removed");
+        if (removed.length > 0) {
+            const names = removed.map((r) => `${r.skill}@${r.agent}`).join(", ");
+            context.io.stdout(`  removed skills: ${names}`);
+        }
+        else {
+            context.io.stdout("  no wonder-owned skills found in detected AI clients");
+        }
+    }
+    catch (err) {
+        context.io.stdout(`  (skill removal failed: ${String(err)})`);
+    }
+}
+/**
+ * Resolve the package's `skills.manifest.json` from the compiled CLI.
+ * `dist/commands/uninstall.js` lives two levels below the package root.
+ */
+function findManifestPath() {
+    try {
+        const here = dirname(fileURLToPath(import.meta.url));
+        return resolve(here, "..", "..", "skills.manifest.json");
+    }
+    catch {
+        return null;
+    }
+}

package/dist/platform/paths.js CHANGED Viewed

@@ -1,6 +1,13 @@
 // src/platform/paths.ts
 // Resolves all filesystem paths used by wonder at runtime.
+//
+// Since Phase 5 all wonder runtime data lives under
+// `~/.mkterswingman/5mghost-wonder/` to stay consistent with sibling
+// packages (5mghost-insider, 5mghost-rover). Pre-Phase-5 installs had
+// data under `~/.wonder/`; `migrateLegacyWonderDir()` handles that.
+//
 // __dirname equivalent uses import.meta.url (NodeNext ESM).
+import { existsSync, renameSync, mkdirSync, readdirSync } from "node:fs";
 import { fileURLToPath } from "url";
 import { dirname, resolve } from "path";
 const __filename = fileURLToPath(import.meta.url);
@@ -10,8 +17,10 @@ export function resolveWonderPaths(opts) {
         process.env["HOME"] ??
         process.env["USERPROFILE"] ??
         "";
-    const wonderDir = resolve(homeDir, ".wonder");
-    // At runtime, __dirname = dist/ (tsc output). The manifest lives one level up.
+    // Phase 5: ~/.mkterswingman/5mghost-wonder/ (aligned with insider / rover).
+    const wonderDir = resolve(homeDir, ".mkterswingman", "5mghost-wonder");
+    // At runtime, __dirname = dist/platform/ (tsc output). The manifest lives
+    // two levels up at dist/../skills.manifest.json.
     const skillsManifestPath = resolve(__dirname, "../skills.manifest.json");
     return {
         homeDir,
@@ -23,3 +32,63 @@ export function resolveWonderPaths(opts) {
         defaultSaveDir: resolve(homeDir, "Downloads", "5mghost-wonder"),
     };
 }
+/**
+ * One-shot migration from pre-Phase-5 `~/.wonder/` to the aligned
+ * `~/.mkterswingman/5mghost-wonder/`. Idempotent: safe to call on every CLI
+ * invocation. Returns true when a migration actually happened.
+ *
+ * Two cases:
+ *   - new dir absent → rename the whole legacy dir across.
+ *   - new dir present (e.g. telemetry already created it) → move individual
+ *     entries that don't yet exist in the new dir, skipping any conflicts to
+ *     avoid clobbering newer data.
+ */
+export function migrateLegacyWonderDir(opts) {
+    const homeDir = opts?.homeDir ??
+        process.env["HOME"] ??
+        process.env["USERPROFILE"] ??
+        "";
+    if (!homeDir)
+        return false;
+    const legacyDir = resolve(homeDir, ".wonder");
+    const newParent = resolve(homeDir, ".mkterswingman");
+    const newDir = resolve(newParent, "5mghost-wonder");
+    if (!existsSync(legacyDir))
+        return false;
+    try {
+        if (!existsSync(newDir)) {
+            mkdirSync(newParent, { recursive: true });
+            renameSync(legacyDir, newDir);
+            return true;
+        }
+        // New dir present: merge entry-by-entry.
+        let moved = false;
+        for (const entry of readdirSync(legacyDir)) {
+            const src = resolve(legacyDir, entry);
+            const dst = resolve(newDir, entry);
+            if (existsSync(dst))
+                continue; // conservative: keep newer data intact
+            try {
+                renameSync(src, dst);
+                moved = true;
+            }
+            catch {
+                /* skip per-entry failure */
+            }
+        }
+        // Best-effort cleanup of legacy dir if it's now empty.
+        try {
+            const remaining = readdirSync(legacyDir);
+            if (remaining.length === 0) {
+                renameSync(legacyDir, `${legacyDir}.migrated`);
+            }
+        }
+        catch {
+            /* ignore */
+        }
+        return moved;
+    }
+    catch {
+        return false;
+    }
+}

package/dist/wecom/export.js CHANGED Viewed

@@ -7,7 +7,7 @@
 //
 // All three document types (sheet/doc/slide) share this same flow.
 // URL parsing is in ./url.ts; cookie management is in ./cookies.ts (P1-03).
-import { writeFileSync, mkdirSync } from "fs";
+import { writeFileSync, mkdirSync, existsSync } from "fs";
 import { join } from "path";
 /** Error thrown by exportWecomDoc() */
 export class ExportError extends Error {
@@ -102,7 +102,7 @@ async function createExportTask(docId, cookieHeader, sourceUrl) {
  *   - maxPollAttempts exceeded without Done
  *   - Done but no file_url in response
  */
-async function pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, maxPollAttempts, pollIntervalMs) {
+async function pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, maxPollAttempts, pollIntervalMs, onProgress) {
     // Use the supplied interval only when the caller passes a non-default value
     // (typical: a test pins a short, deterministic interval).
     const useFixedInterval = pollIntervalMs !== DEFAULT_POLL_INTERVAL_MS;
@@ -159,6 +159,13 @@ async function pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, ma
             }
             return { fileUrl, fileName };
         }
+        // Fire progress callback before the next sleep so users see movement.
+        if (onProgress && progress > 0) {
+            try {
+                onProgress(progress, attempt);
+            }
+            catch { /* never fatal */ }
+        }
         // status is still in-progress — continue polling
     }
     throw new ExportError("poll_timeout", `Export did not complete after ${maxPollAttempts} attempts (~${Math.round((maxPollAttempts * pollIntervalMs) / 1000)}s)`, { maxPollAttempts, pollIntervalMs });
@@ -195,7 +202,8 @@ async function downloadExportedFile(fileUrl, fileName, saveDir) {
     catch (err) {
         throw new ExportError("write_error", `Failed to create save directory "${saveDir}": ${String(err)}`, err);
     }
-    const filePath = join(saveDir, fileName);
+    const safeName = sanitizeFilename(fileName);
+    const filePath = pickNonClobberingPath(saveDir, safeName);
     try {
         writeFileSync(filePath, Buffer.from(buffer));
     }
@@ -204,6 +212,43 @@ async function downloadExportedFile(fileUrl, fileName, saveDir) {
     }
     return { filePath, fileSizeBytes: buffer.byteLength };
 }
+/**
+ * Strip any directory components and disallow control characters. The upstream
+ * `fileName` is untrusted: a `../../etc/passwd` or `foo/bar.xlsx` value would
+ * otherwise escape `saveDir` via `path.join`. We keep it minimal — basename
+ * only, and drop control characters — rather than a full allow-list that
+ * would mangle Chinese titles.
+ */
+function sanitizeFilename(fileName) {
+    // Strip leading path segments; handles both POSIX and win32 separators.
+    let name = fileName.split(/[\\/]/).pop() ?? "";
+    // Drop control chars (0x00-0x1F, 0x7F).
+    // eslint-disable-next-line no-control-regex
+    name = name.replace(/[\x00-\x1f\x7f]/g, "");
+    name = name.trim();
+    if (name === "" || name === "." || name === "..") {
+        return "wecom-download";
+    }
+    return name;
+}
+/**
+ * Return `join(saveDir, name)` unless that exists, in which case append
+ * ` (2)`, ` (3)`, … before the extension. Never overwrites existing files.
+ */
+function pickNonClobberingPath(saveDir, name) {
+    const base = join(saveDir, name);
+    if (!existsSync(base))
+        return base;
+    const dotIdx = name.lastIndexOf(".");
+    const stem = dotIdx > 0 ? name.slice(0, dotIdx) : name;
+    const ext = dotIdx > 0 ? name.slice(dotIdx) : "";
+    for (let i = 2; i < 1000; i++) {
+        const candidate = join(saveDir, `${stem} (${i})${ext}`);
+        if (!existsSync(candidate))
+            return candidate;
+    }
+    return join(saveDir, `${stem} (${Date.now()})${ext}`);
+}
 // ---------------------------------------------------------------------------
 // Public API
 // ---------------------------------------------------------------------------
@@ -223,13 +268,13 @@ const DEFAULT_POLL_INTERVAL_MS = 2000;
  * @throws ExportError with a typed `kind` field on any failure
  */
 export async function exportWecomDoc(input) {
-    const { docId, docType, sourceUrl, cookies, saveDir, maxPollAttempts = DEFAULT_MAX_POLL_ATTEMPTS, pollIntervalMs = DEFAULT_POLL_INTERVAL_MS, } = input;
+    const { docId, docType, sourceUrl, cookies, saveDir, maxPollAttempts = DEFAULT_MAX_POLL_ATTEMPTS, pollIntervalMs = DEFAULT_POLL_INTERVAL_MS, onProgress, } = input;
     const cookieHeader = buildCookieHeader(cookies);
     const xsrf = buildXsrfToken(cookies);
     // Step 1: Create export task
     const operationId = await createExportTask(docId, cookieHeader, sourceUrl);
     // Step 2: Poll until Done
-    const { fileUrl, fileName } = await pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, maxPollAttempts, pollIntervalMs);
+    const { fileUrl, fileName } = await pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, maxPollAttempts, pollIntervalMs, onProgress);
     // Step 3: Download
     const { filePath, fileSizeBytes } = await downloadExportedFile(fileUrl, fileName, saveDir);
     return { filePath, fileName, fileSizeBytes, docType };

package/dist/xlsx/sheet.js CHANGED Viewed

@@ -1,39 +1,27 @@
 // src/xlsx/sheet.ts
-// Parses xl/worksheets/sheet{N}.xml.
-// Outputs a sparse cell array, merge ranges, and bounding-box dimensions.
-// Dependencies are injected: sharedStrings[] from shared-strings.ts,
-// getFormatCode() from styles.ts. This module does not read files.
-import { XMLParser } from "fast-xml-parser";
+// Parses xl/worksheets/sheet{N}.xml via a streaming SAX parser (saxes).
+//
+// Why SAX: large WeCom xlsx exports (hundreds of MB) can produce tens of MB
+// of sheet.xml, which under the previous fast-xml-parser DOM approach blew
+// up peak memory to 1–2 GB. SAX keeps peak heap flat (~tens of MB) because
+// we never materialise the whole parse tree — we build the output cell
+// array as we go and forget everything else.
+//
+// The CLI JSON output is byte-for-byte identical to the prior DOM
+// implementation (verified by the Phase 5 test fixtures). Dependencies are
+// still injected: `sharedStrings[]` from shared-strings.ts and
+// `getFormatCode()` from styles.ts.
+import { SaxesParser } from "saxes";
 // ---------------------------------------------------------------------------
-// XMLParser (module-level singleton — stateless, safe to reuse)
+// Cell ref helpers (pure functions, reused)
 // ---------------------------------------------------------------------------
-const parser = new XMLParser({
-    ignoreAttributes: false,
-    attributeNamePrefix: "@_",
-    // Force arrays to eliminate single-node vs. array ambiguity.
-    isArray: (name) => name === "row" || name === "c" || name === "mergeCell" || name === "r",
-    // Keep all values as strings; we convert manually based on the t attribute.
-    parseTagValue: false,
-    // Preserve whitespace (cells may contain spaces).
-    trimValues: false,
-});
-// ---------------------------------------------------------------------------
-// Internal helpers
-// ---------------------------------------------------------------------------
-/**
- * Column letters (uppercase) → 0-based column index.
- * A→0, Z→25, AA→26, AZ→51, BA→52.
- */
 export function colLettersToIndex(letters) {
     let result = 0;
     for (let i = 0; i < letters.length; i++) {
-        result = result * 26 + (letters.charCodeAt(i) - 64); // 'A'=65 → 1
+        result = result * 26 + (letters.charCodeAt(i) - 64);
     }
-    return result - 1; // convert to 0-based
+    return result - 1;
 }
-/**
- * "B5" → { row: 4, col: 1 } (0-based).
- */
 export function parseCellRef(ref) {
     const match = /^([A-Z]+)(\d+)$/.exec(ref);
     if (!match)
@@ -43,9 +31,6 @@ export function parseCellRef(ref) {
         row: Number(match[2]) - 1,
     };
 }
-/**
- * "A1:C3" → MergeRange (0-based).
- */
 function parseMergeRef(ref) {
     const sep = ref.indexOf(":");
     if (sep === -1)
@@ -54,62 +39,174 @@ function parseMergeRef(ref) {
     const e = parseCellRef(ref.slice(sep + 1));
     return { startRow: s.row, startCol: s.col, endRow: e.row, endCol: e.col };
 }
-/**
- * Extract text from an <is> node (inlineStr).
- * Handles simple <is><t>text</t></is> and rich-text <is><r><t>part</t></r>…</is>.
- */
-function extractInlineStr(is) {
-    if (!is)
-        return "";
-    if (is.r) {
-        const runs = Array.isArray(is.r) ? is.r : [is.r];
-        return runs.map((r) => String(r?.t ?? "")).join("");
-    }
-    return String(is.t ?? "");
+// ---------------------------------------------------------------------------
+// SAX-based parseSheet
+// ---------------------------------------------------------------------------
+export function parseSheet(xml, sharedStrings, getFormatCode) {
+    const parser = new SaxesParser({ xmlns: false });
+    const cells = [];
+    const merges = [];
+    let maxRow = 0;
+    let maxCol = 0;
+    // Cell-building state (resets on every <c>…</c>).
+    let active = false;
+    let cRef = "";
+    let cType = ""; // "" | "s" | "str" | "inlineStr" | "b" | "n" | "e"
+    let cStyleIdx = -1;
+    let inV = false;
+    let vBuf = "";
+    let inIs = false;
+    let inT = false;
+    let isBuf = "";
+    let tBuf = "";
+    parser.on("opentag", (tag) => {
+        switch (tag.name) {
+            case "c": {
+                active = true;
+                cRef = tag.attributes["r"] ?? "";
+                cType = tag.attributes["t"] ?? "";
+                const sAttr = tag.attributes["s"];
+                cStyleIdx = sAttr != null ? Number(sAttr) : -1;
+                vBuf = "";
+                isBuf = "";
+                inV = false;
+                inIs = false;
+                inT = false;
+                tBuf = "";
+                break;
+            }
+            case "v": {
+                if (active) {
+                    inV = true;
+                    vBuf = "";
+                }
+                break;
+            }
+            case "is": {
+                if (active)
+                    inIs = true;
+                break;
+            }
+            case "t": {
+                if (active && inIs) {
+                    inT = true;
+                    tBuf = "";
+                }
+                break;
+            }
+            case "mergeCell": {
+                const ref = tag.attributes["ref"];
+                if (ref) {
+                    try {
+                        merges.push(parseMergeRef(ref));
+                    }
+                    catch {
+                        /* malformed ref — skip */
+                    }
+                }
+                break;
+            }
+            default:
+                break;
+        }
+    });
+    parser.on("text", (text) => {
+        if (inT)
+            tBuf += text;
+        else if (inV)
+            vBuf += text;
+    });
+    parser.on("cdata", (cdata) => {
+        if (inT)
+            tBuf += cdata;
+        else if (inV)
+            vBuf += cdata;
+    });
+    parser.on("closetag", (tag) => {
+        switch (tag.name) {
+            case "t": {
+                if (inT) {
+                    isBuf += tBuf;
+                    tBuf = "";
+                    inT = false;
+                }
+                break;
+            }
+            case "is": {
+                inIs = false;
+                break;
+            }
+            case "v": {
+                inV = false;
+                break;
+            }
+            case "c": {
+                if (active && cRef) {
+                    const cell = buildCell(cRef, cType, cStyleIdx, vBuf, isBuf, sharedStrings, getFormatCode);
+                    if (cell) {
+                        cells.push(cell);
+                        if (cell.row > maxRow)
+                            maxRow = cell.row;
+                        if (cell.col > maxCol)
+                            maxCol = cell.col;
+                    }
+                }
+                active = false;
+                cRef = "";
+                cType = "";
+                cStyleIdx = -1;
+                vBuf = "";
+                isBuf = "";
+                inV = false;
+                inIs = false;
+                inT = false;
+                tBuf = "";
+                break;
+            }
+            default:
+                break;
+        }
+    });
+    let parseError = null;
+    parser.on("error", (err) => {
+        if (!parseError)
+            parseError = err;
+    });
+    parser.write(xml).close();
+    if (parseError)
+        throw parseError;
+    return { cells, merges, maxRow, maxCol };
 }
 /**
- * Extract a SheetCell from a single <c> node.
- * Returns null when the cell should be skipped (error, no value, no ref).
+ * Mirror of the former DOM extractCellValue() — keeps output byte-identical.
  */
-function extractCellValue(c, sharedStrings, getFormatCode) {
-    const ref = c["@_r"];
-    if (!ref)
-        return null;
+function buildCell(ref, cType, cStyleIdx, vRaw, isText, sharedStrings, getFormatCode) {
     const { row, col } = parseCellRef(ref);
-    const t = c["@_t"] ?? ""; // cell type; default = numeric
-    const sAttr = c["@_s"] != null ? Number(c["@_s"]) : -1;
-    // inlineStr: text lives in <is>, no <v>
-    if (t === "inlineStr") {
-        return { row, col, text: extractInlineStr(c.is) };
+    if (cType === "inlineStr") {
+        return { row, col, text: isText };
     }
-    // Error cells: skip entirely
-    if (t === "e")
+    if (cType === "e")
+        return null;
+    if (vRaw === "")
         return null;
-    // All other types require <v>
-    const vRaw = c.v != null ? String(c.v) : undefined;
-    if (vRaw === undefined)
-        return null; // formula not calculated or empty cell
-    switch (t) {
+    switch (cType) {
         case "s": {
-            // Shared string: <v> is the index into sharedStrings[]
             const text = sharedStrings[Number(vRaw)] ?? "";
             return { row, col, text };
         }
         case "str": {
-            // Formula result that is a string: <v> is the literal text
             return { row, col, text: vRaw };
         }
         case "b": {
             return { row, col, text: vRaw === "1" ? "TRUE" : "FALSE" };
         }
         default: {
-            // Numeric (t="n" or absent t)
             const value = Number(vRaw);
             if (isNaN(value))
                 return null;
             const cell = { row, col, value };
-            if (sAttr >= 0) {
-                const format = getFormatCode(sAttr);
+            if (cStyleIdx >= 0) {
+                const format = getFormatCode(cStyleIdx);
                 if (format !== undefined)
                     cell.format = format;
             }
@@ -117,45 +214,3 @@ function extractCellValue(c, sharedStrings, getFormatCode) {
         }
     }
 }
-// ---------------------------------------------------------------------------
-// Main export
-// ---------------------------------------------------------------------------
-export function parseSheet(xml, sharedStrings, getFormatCode) {
-    const doc = parser.parse(xml);
-    const ws = doc?.worksheet;
-    if (!ws)
-        return { cells: [], merges: [], maxRow: 0, maxCol: 0 };
-    // 1. Walk rows → cells
-    const cells = [];
-    let maxRow = 0;
-    let maxCol = 0;
-    const rows = ws.sheetData?.row ?? [];
-    for (const row of rows) {
-        const cList = row.c ?? [];
-        for (const c of cList) {
-            const cell = extractCellValue(c, sharedStrings, getFormatCode);
-            if (cell === null)
-                continue;
-            cells.push(cell);
-            if (cell.row > maxRow)
-                maxRow = cell.row;
-            if (cell.col > maxCol)
-                maxCol = cell.col;
-        }
-    }
-    // 2. Parse mergeCells
-    const merges = [];
-    const mergeCellList = ws.mergeCells?.mergeCell ?? [];
-    for (const mc of mergeCellList) {
-        const ref = mc["@_ref"];
-        if (!ref)
-            continue;
-        try {
-            merges.push(parseMergeRef(ref));
-        }
-        catch {
-            // Malformed ref: skip without breaking overall parse
-        }
-    }
-    return { cells, merges, maxRow, maxCol };
-}

package/package.json CHANGED Viewed

@@ -1,10 +1,14 @@
 {
   "name": "@mkterswingman/5mghost-wonder",
-  "version": "0.0.1",
+  "version": "0.0.3",
   "description": "企微文档读取 CLI — WeCom document reader",
   "type": "module",
-  "engines": { "node": ">=18" },
-  "bin": { "wonder": "./dist/cli.js" },
+  "engines": {
+    "node": ">=18"
+  },
+  "bin": {
+    "wonder": "./dist/cli.js"
+  },
   "publishConfig": {
     "access": "public"
   },
@@ -21,17 +25,18 @@
   "scripts": {
     "build": "rm -rf dist && tsc && chmod +x dist/cli.js",
     "typecheck": "tsc --noEmit",
-    "test": "node dist/wecom/url.test.js",
+    "test": "node dist/wecom/url.test.js && node --test tests/sheet-parity.test.mjs && node --test tests/export-sanitize.test.mjs",
     "smoke": "npm run build && node dist/cli.js help > /dev/null",
     "postinstall": "node scripts/postinstall.mjs"
   },
   "dependencies": {
+    "@mkterswingman/5mghost-agent-skills": "^0.0.1",
     "@mkterswingman/5mghost-auth": "^0.0.1",
     "@mkterswingman/5mghost-telemetry": "^0.0.1",
-    "@mkterswingman/5mghost-agent-skills": "^0.0.1",
-    "ws": "^8.18.0",
     "fast-xml-parser": "^4.5.0",
-    "jszip": "^3.10.1"
+    "jszip": "^3.10.1",
+    "saxes": "^6.0.0",
+    "ws": "^8.18.0"
   },
   "devDependencies": {
     "@types/node": "^22.0.0",

package/skills/setup-5mghost-wonder/SKILL.md CHANGED Viewed

@@ -81,62 +81,62 @@ wonder wecom status
 ---
-## Step 4 — Detect and install docx skill
+## Step 4 — Detect and install docx skill (for the AI you are running in)
-Check plugin cache first:
+**Important:** `wonder check` does NOT check for docx/pptx skills — wonder
+is a CLI and does not know which AI client is invoking it. Each AI reads
+skills from its own directory. As the AI running this setup, check
+*your own* skills directory.
-```bash
-ls ~/.claude-internal/plugins/cache/anthropic-agent-skills/document-skills/*/skills/docx/SKILL.md 2>/dev/null | head -1
-```
+Skills directory by AI client:
-Check user skills:
+| AI client              | Skills directory                |
+|------------------------|---------------------------------|
+| Claude Code            | `~/.claude/skills/`             |
+| Claude Code (internal) | `~/.claude-internal/skills/`    |
+| Codex CLI              | `~/.codex/skills/`              |
+| Codex CLI (internal)   | `~/.codex-internal/skills/`     |
+| Gemini CLI             | `~/.gemini/skills/`             |
+| Gemini CLI (internal)  | `~/.gemini-internal/skills/`    |
+| OpenClaw               | `~/.openclaw/skills/`           |
+| WorkBuddy              | `~/.workbuddy/skills/`          |
+| CodeBuddy              | `~/.codebuddy/skills/`          |
+Substitute `<SKILLS_DIR>` with your own row. Claude Code variants may also
+carry the skill via the plugin cache — check that first:
 ```bash
-ls ~/.claude-internal/skills/docx/SKILL.md 2>/dev/null
+ls ~/.claude-internal/plugins/cache/anthropic-agent-skills/*/skills/docx/SKILL.md 2>/dev/null | head -1
+ls <SKILLS_DIR>/docx/SKILL.md 2>/dev/null
 ```
 **If neither exists**, download from GitHub:
 ```bash
-mkdir -p ~/.claude-internal/skills/docx
+mkdir -p <SKILLS_DIR>/docx
 curl -fsSL https://raw.githubusercontent.com/anthropics/skills/main/skills/docx/SKILL.md \
-  -o ~/.claude-internal/skills/docx/SKILL.md
-```
-Confirm:
-```bash
-ls -lh ~/.claude-internal/skills/docx/SKILL.md
+  -o <SKILLS_DIR>/docx/SKILL.md
+ls -lh <SKILLS_DIR>/docx/SKILL.md
 ```
 ---
-## Step 5 — Detect and install pptx skill
-Check plugin cache first:
+## Step 5 — Detect and install pptx skill (for the AI you are running in)
-```bash
-ls ~/.claude-internal/plugins/cache/anthropic-agent-skills/document-skills/*/skills/pptx/SKILL.md 2>/dev/null | head -1
-```
-Check user skills:
+Same pattern — substitute `<SKILLS_DIR>`:
 ```bash
-ls ~/.claude-internal/skills/pptx/SKILL.md 2>/dev/null
+ls ~/.claude-internal/plugins/cache/anthropic-agent-skills/*/skills/pptx/SKILL.md 2>/dev/null | head -1
+ls <SKILLS_DIR>/pptx/SKILL.md 2>/dev/null
 ```
-**If neither exists**, download from GitHub:
+**If missing:**
 ```bash
-mkdir -p ~/.claude-internal/skills/pptx
+mkdir -p <SKILLS_DIR>/pptx
 curl -fsSL https://raw.githubusercontent.com/anthropics/skills/main/skills/pptx/SKILL.md \
-  -o ~/.claude-internal/skills/pptx/SKILL.md
-```
-Confirm:
-```bash
-ls -lh ~/.claude-internal/skills/pptx/SKILL.md
+  -o <SKILLS_DIR>/pptx/SKILL.md
+ls -lh <SKILLS_DIR>/pptx/SKILL.md
 ```
 ---

package/skills/use-5mghost-wonder/SKILL.md CHANGED Viewed

@@ -106,6 +106,28 @@ Read("/Users/<you>/Downloads/5mghost-wonder/media/image3.png")
 **Note:** Images are full-resolution originals (up to several MB each). Only load images the user specifically asks about.
+### Viewing visual layout (optional)
+Use when the cell JSON alone can't answer the question because the sheet's meaning comes from **visual structure** — not from the cell values themselves. Typical signals:
+- Gantt chart (date columns × task rows, coloured blocks across cell ranges)
+- Calendar (week grid with merged day cells or coloured categories)
+- Status board / roadmap (colour-coded cells indicating stage, owner, priority)
+- Large merge-to-cell ratio in the JSON (`merges.length` is a non-trivial fraction of `cells.length`)
+- User explicitly asks about "how it looks", "颜色", "排版", "这个图表", "这张表的结构"
+Do **not** run render for plain data tables, lookup sheets, or when the user just wants a value. The render costs ~30 s and ~10+ MB of PDF per file.
+Render the whole xlsx (one PDF page per tab, preserves layout, merges, fills, borders):
+```bash
+soffice --headless \
+  --convert-to 'pdf:calc_pdf_Export:{"SinglePageSheets":{"type":"boolean","value":"true"}}' \
+  --outdir /tmp/ <path-to-downloaded-xlsx>
+```
+Then use the Read tool on the generated PDF. Page N corresponds to the Nth tab in workbook order (same as `tabs[]` in the metadata output).
 ---
 ## docx Workflow (`doc/w3_`, `doc/e2_`)
@@ -221,6 +243,7 @@ for slide in prs.slides:
 | pptx slice crash | `prs.slides[:N]` → `AttributeError: 'list' object has no attribute 'rId'` | Use `for slide in prs.slides` |
 | Cookie expiry | Cookie valid for 7–30 days | Run `wonder wecom cookie` to refresh |
 | xlsx images are full-size | Original images can be up to 6 MB each | Only read images when user specifically needs them |
+| xlsx visual layout needs soffice | Gantt/calendar/coloured boards lose meaning in JSON alone | Run the optional soffice render step in the xlsx section; CLI does not auto-render |
 | smartpage unsupported | Export API returns 0% progress forever | Manual browser export |
 ---