@mkterswingman/5mghost-wonder 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -159,6 +159,10 @@ export async function runBrowserNoExportRead(options) {
159
159
  evidence.push("opendoc-response");
160
160
  if (extracted?.text)
161
161
  evidence.push("initial-attributed-text");
162
+ if ((extracted?.tables.length ?? 0) > 0)
163
+ evidence.push("table-control-characters");
164
+ if (extracted?.structureHints)
165
+ evidence.push("structure-hints");
162
166
  const images = options.saveDir && extracted?.imageUrls
163
167
  ? await downloadImageResources(extracted.imageUrls, options.saveDir)
164
168
  : (extracted?.imageUrls ?? []).map((url) => ({
@@ -180,6 +184,8 @@ export async function runBrowserNoExportRead(options) {
180
184
  textLength: extracted?.text.length,
181
185
  imageUrls: extracted?.imageUrls,
182
186
  images,
187
+ tables: extracted?.tables,
188
+ structureHints: extracted?.structureHints,
183
189
  extraction: extracted?.text ? "opendoc-initial-attributed-text" : "none",
184
190
  evidence,
185
191
  warnings: extracted?.warnings ?? [
@@ -305,16 +311,26 @@ export function extractTextFromOpendoc(body) {
305
311
  const decodedChunks = chunks
306
312
  .filter((chunk) => typeof chunk === "string")
307
313
  .flatMap((chunk) => extractInitialAttributedTextCandidates(chunk));
314
+ const decodedText = decodedChunks.join("\n");
308
315
  const textCandidate = chooseTextCandidate(decodedChunks);
309
- const text = normalizeExtractedText(textCandidate ?? decodedChunks.join("\n"));
316
+ const text = normalizeExtractedText(textCandidate ?? decodedText);
310
317
  if (!text)
311
318
  return null;
312
- const imageUrls = extractImageUrls(decodedChunks.join("\n"));
319
+ const imageUrls = extractImageUrls(decodedText);
320
+ const tables = extractTablesFromDecodedText(decodedText);
321
+ const structureHints = analyzeOpendocStructure({
322
+ opendocBody: body,
323
+ htmlData: typeof payload.htmlData === "string" ? payload.htmlData : "",
324
+ decodedText,
325
+ decodedCandidateCount: decodedChunks.length,
326
+ });
313
327
  return {
314
328
  title: payload.clientVars?.title ?? payload.clientVars?.initialTitle,
315
329
  padType: payload.clientVars?.padType ?? payload.padType,
316
330
  text,
317
331
  imageUrls,
332
+ tables,
333
+ structureHints,
318
334
  warnings: [
319
335
  "No-export text is decoded from opendoc initialAttributedText; rich styles are not reconstructed yet.",
320
336
  ...(imageUrls.length > 0
@@ -323,6 +339,52 @@ export function extractTextFromOpendoc(body) {
323
339
  ],
324
340
  };
325
341
  }
342
+ function extractTablesFromDecodedText(decodedText) {
343
+ const tables = [];
344
+ const tablePattern = /\x1a([\s\S]*?)\x1b/g;
345
+ for (const match of decodedText.matchAll(tablePattern)) {
346
+ const body = match[1] ?? "";
347
+ const rows = body
348
+ .split("\x06")
349
+ .map((row) => row
350
+ .split("\x07")
351
+ .map((cell) => normalizeTableCellText(cell))
352
+ .filter((text) => text.length > 0)
353
+ .map((text) => ({ text })))
354
+ .filter((row) => row.length > 0);
355
+ if (rows.length > 0)
356
+ tables.push({ rows });
357
+ }
358
+ return tables;
359
+ }
360
+ function normalizeTableCellText(value) {
361
+ return value
362
+ .replace(/\r/g, "")
363
+ .replace(/[\u0000-\u0006\u0008-\u001f\u007f-\u009f]+/g, "")
364
+ .replace(/[^\S\r\n]+/g, " ")
365
+ .trim();
366
+ }
367
+ function analyzeOpendocStructure(input) {
368
+ return {
369
+ opendoc: summarizeStructureSignals(input.opendocBody),
370
+ htmlData: {
371
+ ...summarizeStructureSignals(input.htmlData),
372
+ hasTableMarkup: /<\/?(table|tr|td|th)\b/i.test(input.htmlData),
373
+ },
374
+ initialAttributedText: {
375
+ ...summarizeStructureSignals(input.decodedText),
376
+ decodedCandidateCount: input.decodedCandidateCount,
377
+ },
378
+ };
379
+ }
380
+ function summarizeStructureSignals(value) {
381
+ return {
382
+ length: value.length,
383
+ hasTableSignals: /\b(table|cell|row|column|sheet|workbook)\b/i.test(value),
384
+ hasMergeSignals: /\b(merge|merged|rowspan|colspan|mergeCell|mergeCells)\b/i.test(value),
385
+ hasImageSignals: /\b(image|img|pic|picture|media|download_url|qpic)\b/i.test(value),
386
+ };
387
+ }
326
388
  function extractInitialAttributedTextCandidates(chunk) {
327
389
  let decoded;
328
390
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mkterswingman/5mghost-wonder",
3
- "version": "0.0.16",
3
+ "version": "0.0.18",
4
4
  "description": "企微文档读取 CLI — WeCom document reader",
5
5
  "type": "module",
6
6
  "engines": {
@@ -7,9 +7,9 @@ description: Use this skill when the user wants to install or set up wonder, say
7
7
 
8
8
  ## Skill version
9
9
 
10
- This skill matches **wonder 0.0.16**.
10
+ This skill matches **wonder 0.0.18**.
11
11
 
12
- Once the CLI is installed in Step 1, run `wonder --version`. If the output does not equal `0.0.16`, the CLI on disk has drifted from the skill text loaded in this session. Ask the user to run `/update-5mghost-wonder`, then **start a fresh AI session** (`/exit` and re-enter, or open a new chat) — skill text already loaded into a running session does not refresh after `wonder update`, even though the file on disk has been replaced.
12
+ Once the CLI is installed in Step 1, run `wonder --version`. If the output does not equal `0.0.18`, the CLI on disk has drifted from the skill text loaded in this session. Ask the user to run `/update-5mghost-wonder`, then **start a fresh AI session** (`/exit` and re-enter, or open a new chat) — skill text already loaded into a running session does not refresh after `wonder update`, even though the file on disk has been replaced.
13
13
 
14
14
  After a successful first install, also remind the user to start a fresh AI session before invoking `/use-5mghost-wonder` for the first time. The skill files were just written to disk; the current session never loaded them.
15
15
 
@@ -7,7 +7,7 @@ description: Use this skill when the user wants to update or upgrade wonder, say
7
7
 
8
8
  ## Skill version
9
9
 
10
- This skill matches **wonder 0.0.16**.
10
+ This skill matches **wonder 0.0.18**.
11
11
 
12
12
  ---
13
13
 
@@ -10,10 +10,10 @@ the referenced workflow files needed for the current task.
10
10
 
11
11
  ## Version Gate
12
12
 
13
- This skill matches **wonder 0.0.16**.
13
+ This skill matches **wonder 0.0.18**.
14
14
 
15
15
  On first use in a session, follow `references/session-init.md`. If the installed
16
- CLI version differs from `0.0.16`, stop and ask the user to run
16
+ CLI version differs from `0.0.18`, stop and ask the user to run
17
17
  `/update-5mghost-wonder`, then start a fresh AI session.
18
18
 
19
19
  ## Hard Rules
@@ -37,8 +37,11 @@ wonder browser read <url> --save /tmp/wonder-browser-read
37
37
  ```
38
38
 
39
39
  This currently attempts text extraction from the browser `opendoc`
40
- `initialAttributedText`. Treat it as `partial` unless it also proves image and
41
- table structure.
40
+ `initialAttributedText`, decodes simple table cells from WeCom table control
41
+ characters, downloads detected image resources when `--save` is present, and
42
+ returns safe `structureHints` for `htmlData`, `initialAttributedText`, and the
43
+ full `opendoc` body. Treat it as `partial` unless it also proves merge ranges
44
+ and image anchors.
42
45
 
43
46
  3. If the read is insufficient, run the evidence probe:
44
47
 
@@ -59,6 +62,9 @@ Use `--headed` only when login/debug visibility is needed.
59
62
  - WebSocket messages if they expose structured document operations
60
63
  - screenshots only as a fallback or visual cross-check
61
64
  6. For tables, specifically look for:
65
+ - `tables[].rows[].text` for simple table cell content
66
+ - `structureHints.*.hasTableSignals` and `hasMergeSignals`
67
+ - `structureHints.htmlData.hasTableMarkup`
62
68
  - cell coordinates
63
69
  - displayed text
64
70
  - merge ranges
@@ -82,6 +88,19 @@ Return a concise result:
82
88
  "mergeRangesAvailable": true,
83
89
  "imagesAvailable": true,
84
90
  "imageAnchorsAvailable": true,
91
+ "tables": [
92
+ {
93
+ "rows": [
94
+ [{ "text": "A1" }, { "text": "B1" }]
95
+ ]
96
+ }
97
+ ],
98
+ "structureHints": {
99
+ "htmlData": {
100
+ "hasTableMarkup": true,
101
+ "hasMergeSignals": true
102
+ }
103
+ },
85
104
  "evidence": ["runtime-store", "network-response", "websocket", "screenshot"],
86
105
  "missing": []
87
106
  }