@mkterswingman/5mghost-wonder 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/auth/runtime.js +15 -0
  2. package/dist/cli.js +75 -0
  3. package/dist/commands/auth.js +100 -0
  4. package/dist/commands/check.js +258 -0
  5. package/dist/commands/help.js +38 -0
  6. package/dist/commands/index.js +50 -0
  7. package/dist/commands/read.js +198 -0
  8. package/dist/commands/setup.js +81 -0
  9. package/dist/commands/types.js +4 -0
  10. package/dist/commands/uninstall.js +14 -0
  11. package/dist/commands/update.js +21 -0
  12. package/dist/commands/version.js +8 -0
  13. package/dist/commands/wecom.js +136 -0
  14. package/dist/platform/npm.js +14 -0
  15. package/dist/platform/paths.js +25 -0
  16. package/dist/telemetry/events.js +42 -0
  17. package/dist/telemetry/policy.js +51 -0
  18. package/dist/telemetry/runtime.js +31 -0
  19. package/dist/wecom/browser.js +344 -0
  20. package/dist/wecom/cache.js +119 -0
  21. package/dist/wecom/cookies.js +151 -0
  22. package/dist/wecom/export.js +236 -0
  23. package/dist/wecom/url.js +45 -0
  24. package/dist/wecom/url.test.js +64 -0
  25. package/dist/xlsx/drawing.js +131 -0
  26. package/dist/xlsx/metadata.js +34 -0
  27. package/dist/xlsx/parse-tab.js +124 -0
  28. package/dist/xlsx/shared-strings.js +51 -0
  29. package/dist/xlsx/sheet.js +161 -0
  30. package/dist/xlsx/styles.js +85 -0
  31. package/dist/xlsx/unzip.js +33 -0
  32. package/dist/xlsx/workbook.js +51 -0
  33. package/dist/xlsx/workbook.test.js +19 -0
  34. package/package.json +41 -0
  35. package/scripts/check-export-types.mjs +37 -0
  36. package/scripts/postinstall.mjs +50 -0
  37. package/skills/setup-5mghost-wonder/SKILL.md +245 -0
  38. package/skills/use-5mghost-wonder/SKILL.md +240 -0
  39. package/skills.manifest.json +36 -0
@@ -0,0 +1,236 @@
1
+ // src/wecom/export.ts
2
+ // WeCom document export client.
3
+ // Implements the three-step flow:
4
+ // 1. POST /v1/export/export_office → operationId
5
+ // 2. GET /v1/export/query_progress (poll until Done)
6
+ // 3. GET {file_url} → write to disk
7
+ //
8
+ // All three document types (sheet/doc/slide) share this same flow.
9
+ // URL parsing is in ./url.ts; cookie management is in ./cookies.ts (P1-03).
10
+ import { writeFileSync, mkdirSync } from "fs";
11
+ import { join } from "path";
12
+ /** Error thrown by exportWecomDoc() */
13
+ export class ExportError extends Error {
14
+ kind;
15
+ detail;
16
+ constructor(kind, message, detail) {
17
+ super(message);
18
+ this.kind = kind;
19
+ this.detail = detail;
20
+ this.name = "ExportError";
21
+ }
22
+ }
23
+ // ---------------------------------------------------------------------------
24
+ // Internal helpers
25
+ // ---------------------------------------------------------------------------
26
+ /**
27
+ * Build the Cookie header string from an array of CookieEntry.
28
+ * Format: "name1=value1; name2=value2; ..."
29
+ */
30
+ function buildCookieHeader(cookies) {
31
+ // W-02: strip `;` and `\n` from values to prevent Cookie header injection.
32
+ return cookies
33
+ .map((c) => `${c.name}=${String(c.value).replace(/[;\n]/g, "")}`)
34
+ .join("; ");
35
+ }
36
+ /**
37
+ * Extract the TOK cookie value to use as the xsrf query parameter.
38
+ * The WeCom export API requires xsrf={TOK} on all GET requests.
39
+ * Returns empty string if TOK is not present (will result in 401).
40
+ */
41
+ function buildXsrfToken(cookies) {
42
+ return cookies.find((c) => c.name === "TOK")?.value ?? "";
43
+ }
44
+ // ---------------------------------------------------------------------------
45
+ // Step 1
46
+ // ---------------------------------------------------------------------------
47
+ /**
48
+ * Step 1: POST /v1/export/export_office to create an export task.
49
+ * Returns the operationId needed for polling.
50
+ */
51
+ async function createExportTask(docId, cookieHeader, sourceUrl) {
52
+ const body = new URLSearchParams({
53
+ docId,
54
+ version: "1",
55
+ exportType: "1",
56
+ }).toString();
57
+ let res;
58
+ try {
59
+ res = await fetch("https://doc.weixin.qq.com/v1/export/export_office", {
60
+ method: "POST",
61
+ headers: {
62
+ "Content-Type": "application/x-www-form-urlencoded",
63
+ Cookie: cookieHeader,
64
+ Referer: sourceUrl,
65
+ },
66
+ body,
67
+ });
68
+ }
69
+ catch (err) {
70
+ throw new ExportError("network_error", `export_office fetch failed: ${String(err)}`, err);
71
+ }
72
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
73
+ let data;
74
+ try {
75
+ data = await res.json();
76
+ }
77
+ catch (err) {
78
+ throw new ExportError("export_api_error", `export_office response not JSON (HTTP ${res.status})`, err);
79
+ }
80
+ // API returns error_code on auth failure or unsupported types
81
+ if (data.error_code) {
82
+ throw new ExportError("export_api_error", `export_office error: code=${data.error_code} msg=${data.error_msg ?? ""}`, data);
83
+ }
84
+ // operationId may be at top-level or nested under data.*
85
+ const operationId = data.operationId ?? data.data?.operationId;
86
+ if (!operationId) {
87
+ throw new ExportError("no_operation_id", `export_office returned no operationId: ${JSON.stringify(data)}`, data);
88
+ }
89
+ return operationId;
90
+ }
91
+ /**
92
+ * Step 2: Poll GET /v1/export/query_progress until status === "Done".
93
+ *
94
+ * Interval schedule: the first request fires immediately (no leading wait).
95
+ * Subsequent waits use an exponential backoff at 500ms → 1000ms → 2000ms → 3000ms
96
+ * (capped), unless the caller passes an explicit non-default pollIntervalMs,
97
+ * in which case that fixed interval is used (useful for tests).
98
+ *
99
+ * Throws ExportError if:
100
+ * - status === "Failed"
101
+ * - HTTP error during poll
102
+ * - maxPollAttempts exceeded without Done
103
+ * - Done but no file_url in response
104
+ */
105
+ async function pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, maxPollAttempts, pollIntervalMs) {
106
+ // Use the supplied interval only when the caller passes a non-default value
107
+ // (typical: a test pins a short, deterministic interval).
108
+ const useFixedInterval = pollIntervalMs !== DEFAULT_POLL_INTERVAL_MS;
109
+ for (let attempt = 0; attempt < maxPollAttempts; attempt++) {
110
+ if (attempt > 0) {
111
+ let waitMs;
112
+ if (useFixedInterval) {
113
+ waitMs = pollIntervalMs;
114
+ }
115
+ else if (attempt === 1)
116
+ waitMs = 500;
117
+ else if (attempt === 2)
118
+ waitMs = 1000;
119
+ else if (attempt === 3)
120
+ waitMs = 2000;
121
+ else
122
+ waitMs = 3000;
123
+ await new Promise((resolve) => setTimeout(resolve, waitMs));
124
+ }
125
+ const pollUrl = `https://doc.weixin.qq.com/v1/export/query_progress` +
126
+ `?operationId=${encodeURIComponent(operationId)}&xsrf=${encodeURIComponent(xsrf)}`;
127
+ let res;
128
+ try {
129
+ res = await fetch(pollUrl, {
130
+ headers: { Cookie: cookieHeader, Referer: sourceUrl },
131
+ });
132
+ }
133
+ catch (err) {
134
+ throw new ExportError("network_error", `query_progress fetch failed (attempt ${attempt + 1}): ${String(err)}`, err);
135
+ }
136
+ if (!res.ok) {
137
+ throw new ExportError("poll_http_error", `query_progress HTTP ${res.status} (attempt ${attempt + 1})`, { status: res.status });
138
+ }
139
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
140
+ let data;
141
+ try {
142
+ data = await res.json();
143
+ }
144
+ catch {
145
+ // Non-JSON response on a transient error — retry
146
+ continue;
147
+ }
148
+ // Fields may be at top-level or nested under data.*
149
+ const status = data.data?.status ?? data.status ?? "";
150
+ const progress = data.data?.progress ?? data.progress ?? 0;
151
+ const fileUrl = data.data?.file_url ?? data.file_url ?? "";
152
+ const fileName = data.data?.file_name ?? data.file_name ?? `${operationId}.bin`;
153
+ if (status === "Failed") {
154
+ throw new ExportError("poll_failed", `Export task failed (progress=${progress}%): ${JSON.stringify(data)}`, data);
155
+ }
156
+ if (status === "Done" || progress >= 100) {
157
+ if (!fileUrl) {
158
+ throw new ExportError("no_file_url", `Export Done but no file_url in response: ${JSON.stringify(data)}`, data);
159
+ }
160
+ return { fileUrl, fileName };
161
+ }
162
+ // status is still in-progress — continue polling
163
+ }
164
+ throw new ExportError("poll_timeout", `Export did not complete after ${maxPollAttempts} attempts (~${Math.round((maxPollAttempts * pollIntervalMs) / 1000)}s)`, { maxPollAttempts, pollIntervalMs });
165
+ }
166
+ // ---------------------------------------------------------------------------
167
+ // Step 3
168
+ // ---------------------------------------------------------------------------
169
+ /**
170
+ * Step 3: Download the exported file from the COS pre-signed URL to disk.
171
+ * COS URLs are public pre-signed — no Cookie or auth header needed.
172
+ * Returns the absolute path of the written file.
173
+ */
174
+ async function downloadExportedFile(fileUrl, fileName, saveDir) {
175
+ let res;
176
+ try {
177
+ res = await fetch(fileUrl);
178
+ }
179
+ catch (err) {
180
+ throw new ExportError("network_error", `Download fetch failed: ${String(err)}`, err);
181
+ }
182
+ if (!res.ok) {
183
+ throw new ExportError("download_http_error", `Download HTTP ${res.status} for: ${fileUrl}`, { status: res.status, url: fileUrl });
184
+ }
185
+ let buffer;
186
+ try {
187
+ buffer = await res.arrayBuffer();
188
+ }
189
+ catch (err) {
190
+ throw new ExportError("download_http_error", `Failed to read download response body: ${String(err)}`, err);
191
+ }
192
+ try {
193
+ mkdirSync(saveDir, { recursive: true });
194
+ }
195
+ catch (err) {
196
+ throw new ExportError("write_error", `Failed to create save directory "${saveDir}": ${String(err)}`, err);
197
+ }
198
+ const filePath = join(saveDir, fileName);
199
+ try {
200
+ writeFileSync(filePath, Buffer.from(buffer));
201
+ }
202
+ catch (err) {
203
+ throw new ExportError("write_error", `Failed to write file to "${filePath}": ${String(err)}`, err);
204
+ }
205
+ return { filePath, fileSizeBytes: buffer.byteLength };
206
+ }
207
+ // ---------------------------------------------------------------------------
208
+ // Public API
209
+ // ---------------------------------------------------------------------------
210
+ /** Default poll config */
211
+ const DEFAULT_MAX_POLL_ATTEMPTS = 60; // 60 × 2s = 2 minutes max
212
+ const DEFAULT_POLL_INTERVAL_MS = 2000;
213
+ /**
214
+ * Export a WeCom document to a local file.
215
+ *
216
+ * Orchestrates the three-step flow:
217
+ * 1. Create export task (POST /v1/export/export_office)
218
+ * 2. Poll progress (GET /v1/export/query_progress)
219
+ * 3. Download file (GET {COS file_url})
220
+ *
221
+ * All three document types (sheet / doc / slide) use the same flow.
222
+ *
223
+ * @throws ExportError with a typed `kind` field on any failure
224
+ */
225
+ export async function exportWecomDoc(input) {
226
+ const { docId, docType, sourceUrl, cookies, saveDir, maxPollAttempts = DEFAULT_MAX_POLL_ATTEMPTS, pollIntervalMs = DEFAULT_POLL_INTERVAL_MS, } = input;
227
+ const cookieHeader = buildCookieHeader(cookies);
228
+ const xsrf = buildXsrfToken(cookies);
229
+ // Step 1: Create export task
230
+ const operationId = await createExportTask(docId, cookieHeader, sourceUrl);
231
+ // Step 2: Poll until Done
232
+ const { fileUrl, fileName } = await pollExportProgress(operationId, xsrf, cookieHeader, sourceUrl, maxPollAttempts, pollIntervalMs);
233
+ // Step 3: Download
234
+ const { filePath, fileSizeBytes } = await downloadExportedFile(fileUrl, fileName, saveDir);
235
+ return { filePath, fileName, fileSizeBytes, docType };
236
+ }
@@ -0,0 +1,45 @@
1
+ // src/wecom/url.ts
2
+ // Parse a doc.weixin.qq.com URL into its docId and type.
3
+ // Known-unsupported types return an "unsupported" result (not null)
4
+ // so callers can produce a friendly hint instead of a generic "invalid URL".
5
+ /**
6
+ * Parse a WeCom document URL.
7
+ *
8
+ * Supported URL patterns:
9
+ * https://doc.weixin.qq.com/sheet/e3_{docId}?... → sheet
10
+ * https://doc.weixin.qq.com/doc/w3_{docId}?... → doc
11
+ * https://doc.weixin.qq.com/doc/e2_{docId}?... → doc
12
+ * https://doc.weixin.qq.com/slide/p3_{docId}?... → slide
13
+ *
14
+ * Known unsupported (export API hangs or not applicable):
15
+ * https://doc.weixin.qq.com/smartpage/a1_{docId} → kind="smartpage"
16
+ * https://doc.weixin.qq.com/mind/m4_{docId} → kind="mind"
17
+ */
18
+ export function parseWecomUrl(url) {
19
+ let u;
20
+ try {
21
+ u = new URL(url);
22
+ }
23
+ catch {
24
+ return { ok: false, reason: "invalid" };
25
+ }
26
+ const seg = u.pathname.split("/").filter(Boolean);
27
+ const kind = seg[0];
28
+ const docId = seg[1];
29
+ if (!kind || !docId)
30
+ return { ok: false, reason: "invalid" };
31
+ switch (kind) {
32
+ case "sheet":
33
+ return { ok: true, docId, docType: "sheet" };
34
+ case "doc":
35
+ return { ok: true, docId, docType: "doc" };
36
+ case "slide":
37
+ return { ok: true, docId, docType: "slide" };
38
+ case "smartpage":
39
+ return { ok: false, reason: "unsupported", kind: "smartpage" };
40
+ case "mind":
41
+ return { ok: false, reason: "unsupported", kind: "mind" };
42
+ default:
43
+ return { ok: false, reason: "invalid" };
44
+ }
45
+ }
@@ -0,0 +1,64 @@
1
+ // src/wecom/url.test.ts
2
+ // Run with: node --loader ts-node/esm src/wecom/url.test.ts
3
+ // Or after build: node dist/wecom/url.test.js
4
+ import assert from "assert/strict";
5
+ import { parseWecomUrl } from "./url.js";
6
+ // --- Happy paths ---
7
+ const sheet = parseWecomUrl("https://doc.weixin.qq.com/sheet/e3_AbcXxx?scode=foo&xsrf=bar");
8
+ assert.ok(sheet.ok, "sheet URL should parse");
9
+ if (sheet.ok) {
10
+ assert.equal(sheet.docType, "sheet");
11
+ assert.equal(sheet.docId, "e3_AbcXxx");
12
+ }
13
+ const docW3 = parseWecomUrl("https://doc.weixin.qq.com/doc/w3_YyyZzz");
14
+ assert.ok(docW3.ok, "w3_ doc URL should parse");
15
+ if (docW3.ok) {
16
+ assert.equal(docW3.docType, "doc");
17
+ assert.equal(docW3.docId, "w3_YyyZzz");
18
+ }
19
+ const docE2 = parseWecomUrl("https://doc.weixin.qq.com/doc/e2_AnotherDoc");
20
+ assert.ok(docE2.ok, "e2_ doc URL should parse");
21
+ if (docE2.ok) {
22
+ assert.equal(docE2.docType, "doc");
23
+ assert.equal(docE2.docId, "e2_AnotherDoc");
24
+ }
25
+ const slide = parseWecomUrl("https://doc.weixin.qq.com/slide/p3_SlideId?foo=1");
26
+ assert.ok(slide.ok, "slide URL should parse");
27
+ if (slide.ok) {
28
+ assert.equal(slide.docType, "slide");
29
+ assert.equal(slide.docId, "p3_SlideId");
30
+ }
31
+ // --- Unsupported types (new: friendly hint path) ---
32
+ const smartpage = parseWecomUrl("https://doc.weixin.qq.com/smartpage/a1_SmartId");
33
+ assert.equal(smartpage.ok, false);
34
+ if (!smartpage.ok) {
35
+ assert.equal(smartpage.reason, "unsupported");
36
+ if (smartpage.reason === "unsupported") {
37
+ assert.equal(smartpage.kind, "smartpage");
38
+ }
39
+ }
40
+ const mind = parseWecomUrl("https://doc.weixin.qq.com/mind/m4_MindId");
41
+ assert.equal(mind.ok, false);
42
+ if (!mind.ok) {
43
+ assert.equal(mind.reason, "unsupported");
44
+ if (mind.reason === "unsupported") {
45
+ assert.equal(mind.kind, "mind");
46
+ }
47
+ }
48
+ // --- Invalid cases ---
49
+ const wrongHost = parseWecomUrl("https://example.com/sheet/e3_Abc");
50
+ // Different domain — we don't enforce domain here, pathname will still parse
51
+ assert.ok(wrongHost.ok || !wrongHost.ok, "wrong domain should not throw");
52
+ const noPath = parseWecomUrl("https://doc.weixin.qq.com/");
53
+ assert.equal(noPath.ok, false);
54
+ if (!noPath.ok)
55
+ assert.equal(noPath.reason, "invalid");
56
+ const malformed = parseWecomUrl("not-a-url");
57
+ assert.equal(malformed.ok, false);
58
+ if (!malformed.ok)
59
+ assert.equal(malformed.reason, "invalid");
60
+ const emptyString = parseWecomUrl("");
61
+ assert.equal(emptyString.ok, false);
62
+ if (!emptyString.ok)
63
+ assert.equal(emptyString.reason, "invalid");
64
+ console.log("✅ parseWecomUrl: all assertions passed");
@@ -0,0 +1,131 @@
1
+ // src/xlsx/drawing.ts
2
+ // Phase 2 (02-04): Parse xl/drawings/drawingN.xml + rels, extract images to saveDir/media/.
3
+ import { XMLParser } from "fast-xml-parser";
4
+ import { mkdir, writeFile } from "node:fs/promises";
5
+ import { basename } from "node:path";
6
+ /**
7
+ * Parse one drawing XML + its rels, extract image files, return anchor mapping.
8
+ *
9
+ * @param zip JSZip instance (already-opened xlsx zip)
10
+ * @param drawingPath zip-internal drawing path, e.g. "xl/drawings/drawing1.xml"
11
+ * @param saveDir output directory; images written to saveDir/media/
12
+ * @returns DrawingImage array, in XML appearance order
13
+ */
14
+ export async function parseDrawing(zip, drawingPath, saveDir) {
15
+ // Step 1: build rId → zip image path from rels
16
+ const rIdToZipPath = await buildRelsMap(zip, drawingPath);
17
+ // Step 2: parse drawing.xml anchors
18
+ const drawingXml = (await zip.file(drawingPath)?.async("string")) ?? null;
19
+ if (!drawingXml) {
20
+ console.warn(`[drawing] drawing not found in zip: ${drawingPath}`);
21
+ return [];
22
+ }
23
+ const anchors = parseAnchors(drawingXml);
24
+ // Step 3 + 4: extract images and assemble results
25
+ await mkdir(`${saveDir}/media`, { recursive: true });
26
+ const results = [];
27
+ for (const anchor of anchors) {
28
+ const zipImagePath = rIdToZipPath.get(anchor.rId);
29
+ if (!zipImagePath) {
30
+ console.warn(`[drawing] rId "${anchor.rId}" not found in rels for ${drawingPath}`);
31
+ continue;
32
+ }
33
+ const data = (await zip.file(zipImagePath)?.async("nodebuffer")) ?? null;
34
+ if (!data) {
35
+ console.warn(`[drawing] image not found in zip: ${zipImagePath}`);
36
+ continue;
37
+ }
38
+ const filename = basename(zipImagePath);
39
+ await writeFile(`${saveDir}/media/${filename}`, data);
40
+ results.push({ row: anchor.row, col: anchor.col, path: `media/${filename}` });
41
+ }
42
+ return results;
43
+ }
44
+ /**
45
+ * Parse drawing.xml and return raw anchor entries (row, col, rId).
46
+ * Handles both oneCellAnchor and twoCellAnchor; always uses <from> coords.
47
+ */
48
+ function parseAnchors(xml) {
49
+ const parser = new XMLParser({
50
+ ignoreAttributes: false,
51
+ attributeNamePrefix: "@_",
52
+ // Strip namespace prefixes from element names (xdr:from → from, a:blip → blip).
53
+ // Attribute names are NOT stripped, so @_r:embed stays @_r:embed.
54
+ removeNSPrefix: true,
55
+ isArray: (name) => name === "oneCellAnchor" || name === "twoCellAnchor",
56
+ });
57
+ const doc = parser.parse(xml);
58
+ // Root element: <xdr:wsDr> → after removeNSPrefix → "wsDr"
59
+ const root = doc?.wsDr;
60
+ const entries = [];
61
+ for (const anchorType of ["oneCellAnchor", "twoCellAnchor"]) {
62
+ const list = root?.[anchorType] ?? [];
63
+ for (const item of list) {
64
+ const entry = extractAnchor(item);
65
+ if (entry)
66
+ entries.push(entry);
67
+ }
68
+ }
69
+ return entries;
70
+ }
71
+ function extractAnchor(anchor) {
72
+ // <xdr:from> → "from" after removeNSPrefix
73
+ const from = anchor["from"];
74
+ if (!from)
75
+ return null;
76
+ const row = Number(from["row"]);
77
+ const col = Number(from["col"]);
78
+ if (!Number.isFinite(row) || !Number.isFinite(col))
79
+ return null;
80
+ // <xdr:pic> → "pic"; shapes/charts lack this element → skip
81
+ const pic = anchor["pic"];
82
+ if (!pic)
83
+ return null;
84
+ // <xdr:blipFill> → "blipFill" → <a:blip> → "blip"
85
+ const blipFill = pic["blipFill"];
86
+ const blip = blipFill?.["blip"];
87
+ if (!blip)
88
+ return null;
89
+ // removeNSPrefix strips namespace prefixes from attributes too:
90
+ // r:embed → @_embed, r:link → @_link
91
+ const rId = blip["@_embed"] ??
92
+ blip["@_link"];
93
+ if (!rId)
94
+ return null;
95
+ return { row, col, rId };
96
+ }
97
+ /**
98
+ * Build rId → zip-internal image path from the drawing's .rels file.
99
+ *
100
+ * rels path: "xl/drawings/_rels/drawingN.xml.rels"
101
+ * Target is relative to drawingDir ("xl/drawings/"), e.g. "media/image1.png"
102
+ * → resolved zip path: "xl/drawings/media/image1.png"
103
+ */
104
+ async function buildRelsMap(zip, drawingPath) {
105
+ const parts = drawingPath.split("/");
106
+ const filename = parts[parts.length - 1];
107
+ const drawingDir = parts.slice(0, -1).join("/");
108
+ const relsPath = `${drawingDir}/_rels/${filename}.rels`;
109
+ const relsXml = (await zip.file(relsPath)?.async("string")) ?? null;
110
+ if (!relsXml) {
111
+ console.warn(`[drawing] rels not found: ${relsPath}`);
112
+ return new Map();
113
+ }
114
+ const parser = new XMLParser({
115
+ ignoreAttributes: false,
116
+ attributeNamePrefix: "@_",
117
+ isArray: (name) => name === "Relationship",
118
+ });
119
+ const doc = parser.parse(relsXml);
120
+ const relationships = doc?.Relationships?.Relationship ?? [];
121
+ const map = new Map();
122
+ for (const rel of relationships) {
123
+ const r = rel;
124
+ const id = r["@_Id"];
125
+ const target = r["@_Target"]; // e.g. "media/image1.png", relative to drawingDir
126
+ if (id && target) {
127
+ map.set(id, `${drawingDir}/${target}`);
128
+ }
129
+ }
130
+ return map;
131
+ }
@@ -0,0 +1,34 @@
1
+ // src/xlsx/metadata.ts
2
+ // Phase 1: Extract tab names from a downloaded .xlsx file.
3
+ // Phase 2 will add cell/merge/image parsing via --tab.
4
+ import JSZip from "jszip";
5
+ import { XMLParser } from "fast-xml-parser";
6
+ import { readFile } from "node:fs/promises";
7
+ /**
8
+ * Extract metadata from a downloaded xlsx file.
9
+ * Phase 1 scope: tab names from xl/workbook.xml only — no cell parsing.
10
+ *
11
+ * @param filePath Absolute path to the xlsx file
12
+ * @param fileName Original file name (with extension), used to derive title
13
+ */
14
+ export async function extractXlsxMetadata(filePath, fileName) {
15
+ const buffer = await readFile(filePath);
16
+ const zip = await JSZip.loadAsync(buffer);
17
+ const workbookXml = await zip.file("xl/workbook.xml")?.async("string");
18
+ if (!workbookXml) {
19
+ throw new Error("xl/workbook.xml not found in xlsx archive");
20
+ }
21
+ const parser = new XMLParser({
22
+ ignoreAttributes: false,
23
+ attributeNamePrefix: "@_",
24
+ parseAttributeValue: false,
25
+ // Ensure single-sheet files still produce an array (not a bare object)
26
+ isArray: (name) => name === "sheet",
27
+ });
28
+ const parsed = parser.parse(workbookXml);
29
+ // workbook > sheets > sheet[]
30
+ const sheets = parsed?.workbook?.sheets?.sheet ?? [];
31
+ const tabs = sheets.map((s) => ({ name: s["@_name"] ?? "" }));
32
+ const title = fileName.replace(/\.xlsx$/i, "");
33
+ return { type: "sheet", title, file: filePath, tabs };
34
+ }
@@ -0,0 +1,124 @@
1
+ // src/xlsx/parse-tab.ts
2
+ // Phase 2 (02-06): Assemble a complete TabOutput from one xlsx tab.
3
+ // Orchestrates: openXlsx → parseWorkbook → parseSharedStrings →
4
+ // buildStylesLookup → parseSheet → parseDrawing → merge.
5
+ import path from "node:path";
6
+ import { openXlsx } from "./unzip.js";
7
+ import { parseWorkbook } from "./workbook.js";
8
+ import { parseSharedStrings } from "./shared-strings.js";
9
+ import { buildStylesLookup } from "./styles.js";
10
+ import { parseSheet } from "./sheet.js";
11
+ import { parseDrawing } from "./drawing.js";
12
+ /**
13
+ * Parse a single named tab from an xlsx file.
14
+ *
15
+ * @param xlsxPath Absolute path to the xlsx file.
16
+ * @param tabName Tab name (exact, case-sensitive).
17
+ * @param saveDir Directory where extracted images are written (saveDir/media/).
18
+ */
19
+ export async function parseTab(xlsxPath, tabName, saveDir) {
20
+ // Step 1: open zip
21
+ const zip = await openXlsx(xlsxPath);
22
+ // Step 2: locate tab
23
+ const workbookInfo = await parseWorkbook(zip);
24
+ const tab = workbookInfo.tabs.find((t) => t.name === tabName);
25
+ if (!tab) {
26
+ const names = workbookInfo.tabs.map((t) => t.name).join(", ");
27
+ throw new Error(`Tab "${tabName}" not found. Available tabs: ${names}`);
28
+ }
29
+ // sheetPath is xl-relative, e.g. "worksheets/sheet10.xml"
30
+ const xlSheetPath = `xl/${tab.sheetPath}`; // "xl/worksheets/sheet10.xml"
31
+ // Step 3: parallel load of shared strings + styles
32
+ const [sharedStringsXml, stylesXml, sheetXml] = await Promise.all([
33
+ zip.readEntry("xl/sharedStrings.xml"),
34
+ zip.readEntry("xl/styles.xml"),
35
+ zip.readEntry(xlSheetPath),
36
+ ]);
37
+ if (!sheetXml) {
38
+ throw new Error(`Sheet XML not found in zip: ${xlSheetPath}`);
39
+ }
40
+ const sharedStrings = sharedStringsXml
41
+ ? parseSharedStrings(sharedStringsXml)
42
+ : [];
43
+ const getFormatCode = buildStylesLookup(stylesXml ?? "");
44
+ // Step 4: parse sheet cells + merges
45
+ const sheetData = parseSheet(sheetXml, sharedStrings, getFormatCode);
46
+ // Step 5: resolve drawing rels
47
+ const sheetDir = path.posix.dirname(xlSheetPath); // "xl/worksheets"
48
+ const sheetFile = path.posix.basename(xlSheetPath); // "sheet10.xml"
49
+ const sheetRelsPath = `${sheetDir}/_rels/${sheetFile}.rels`;
50
+ let drawingImages = [];
51
+ const relsXml = await zip.readEntry(sheetRelsPath);
52
+ if (relsXml) {
53
+ const drawingTarget = extractDrawingTarget(relsXml);
54
+ if (drawingTarget) {
55
+ // Resolve target relative to sheetDir
56
+ const drawingZipPath = path.posix.normalize(path.posix.join(sheetDir, drawingTarget));
57
+ // Step 6: extract images
58
+ drawingImages = await parseDrawing(zip.rawJSZip, drawingZipPath, saveDir);
59
+ }
60
+ }
61
+ const cellKey = (row, col) => `${row}:${col}`;
62
+ const cellMap = new Map();
63
+ for (const c of sheetData.cells) {
64
+ const cell = { row: c.row, col: c.col };
65
+ if (c.text !== undefined)
66
+ cell.text = c.text;
67
+ if (c.value !== undefined)
68
+ cell.value = c.value;
69
+ if (c.format !== undefined)
70
+ cell.format = c.format;
71
+ cellMap.set(cellKey(c.row, c.col), cell);
72
+ }
73
+ // Step 7b: merge images into cells
74
+ for (const img of drawingImages) {
75
+ const key = cellKey(img.row, img.col);
76
+ const existing = cellMap.get(key);
77
+ if (existing) {
78
+ // Only set image if not already set (keep first anchor order)
79
+ if (!existing.image) {
80
+ existing.image = { path: img.path };
81
+ }
82
+ }
83
+ else {
84
+ cellMap.set(key, { row: img.row, col: img.col, image: { path: img.path } });
85
+ }
86
+ }
87
+ // Step 7c: compute maxRow / maxCol
88
+ let maxRow = 0;
89
+ let maxCol = 0;
90
+ for (const cell of cellMap.values()) {
91
+ if (cell.row > maxRow)
92
+ maxRow = cell.row;
93
+ if (cell.col > maxCol)
94
+ maxCol = cell.col;
95
+ }
96
+ for (const m of sheetData.merges) {
97
+ if (m.endRow > maxRow)
98
+ maxRow = m.endRow;
99
+ if (m.endCol > maxCol)
100
+ maxCol = m.endCol;
101
+ }
102
+ // Step 7d: sort cells row-asc, col-asc
103
+ const cells = Array.from(cellMap.values()).sort((a, b) => a.row !== b.row ? a.row - b.row : a.col - b.col);
104
+ return {
105
+ tab: tabName,
106
+ maxRow,
107
+ maxCol,
108
+ cells,
109
+ merges: sheetData.merges,
110
+ };
111
+ }
112
+ // ---------------------------------------------------------------------------
113
+ // Internal helpers
114
+ // ---------------------------------------------------------------------------
115
+ /**
116
+ * Extract the drawing Target from a sheet rels XML string.
117
+ * Returns undefined when no drawing relationship exists.
118
+ */
119
+ function extractDrawingTarget(relsXml) {
120
+ // Fast path: regex is sufficient and avoids a full XML parse dependency here.
121
+ // Matches: Type="...../drawing" Target="<value>"
122
+ const match = relsXml.match(/Type="[^"]*\/drawing"\s[^>]*Target="([^"]+)"/) ?? relsXml.match(/Target="([^"]+)"[^>]*Type="[^"]*\/drawing"/);
123
+ return match?.[1];
124
+ }