@mkterswingman/5mghost-wonder 0.0.15 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/wecom/browser-probe.js +139 -3
- package/package.json +1 -1
- package/skills/setup-5mghost-wonder/SKILL.md +2 -2
- package/skills/update-5mghost-wonder/SKILL.md +1 -1
- package/skills/use-5mghost-wonder/SKILL.md +2 -2
- package/skills/use-5mghost-wonder/references/read-no-export-browser.md +12 -2
|
@@ -10,6 +10,8 @@ const CDP_CALL_TIMEOUT_MS = 5000;
|
|
|
10
10
|
const BROWSER_START_TIMEOUT_MS = 20_000;
|
|
11
11
|
const DEFAULT_CAPTURE_MS = 8_000;
|
|
12
12
|
const MAX_NETWORK_CANDIDATES = 40;
|
|
13
|
+
const MAX_IMAGE_DOWNLOADS = 20;
|
|
14
|
+
const MAX_IMAGE_BYTES = 25 * 1024 * 1024;
|
|
13
15
|
let cdpNextId = 1;
|
|
14
16
|
export async function runBrowserNoExportProbe(options) {
|
|
15
17
|
const timeoutMs = options.timeoutMs ?? DEFAULT_CAPTURE_MS;
|
|
@@ -157,6 +159,18 @@ export async function runBrowserNoExportRead(options) {
|
|
|
157
159
|
evidence.push("opendoc-response");
|
|
158
160
|
if (extracted?.text)
|
|
159
161
|
evidence.push("initial-attributed-text");
|
|
162
|
+
if (extracted?.structureHints)
|
|
163
|
+
evidence.push("structure-hints");
|
|
164
|
+
const images = options.saveDir && extracted?.imageUrls
|
|
165
|
+
? await downloadImageResources(extracted.imageUrls, options.saveDir)
|
|
166
|
+
: (extracted?.imageUrls ?? []).map((url) => ({
|
|
167
|
+
url,
|
|
168
|
+
...parseImageDimensionsFromUrl(url),
|
|
169
|
+
status: "skipped",
|
|
170
|
+
error: "pass --save <dir> to download image resources",
|
|
171
|
+
}));
|
|
172
|
+
if (images.some((image) => image.status === "downloaded"))
|
|
173
|
+
evidence.push("image-resource-download");
|
|
160
174
|
const result = {
|
|
161
175
|
mode: "browser-no-export-read",
|
|
162
176
|
status: extracted?.text ? "partial" : "fail",
|
|
@@ -167,6 +181,8 @@ export async function runBrowserNoExportRead(options) {
|
|
|
167
181
|
text: extracted?.text,
|
|
168
182
|
textLength: extracted?.text.length,
|
|
169
183
|
imageUrls: extracted?.imageUrls,
|
|
184
|
+
images,
|
|
185
|
+
structureHints: extracted?.structureHints,
|
|
170
186
|
extraction: extracted?.text ? "opendoc-initial-attributed-text" : "none",
|
|
171
187
|
evidence,
|
|
172
188
|
warnings: extracted?.warnings ?? [
|
|
@@ -174,7 +190,7 @@ export async function runBrowserNoExportRead(options) {
|
|
|
174
190
|
],
|
|
175
191
|
missing: [
|
|
176
192
|
"table merge ranges",
|
|
177
|
-
...(
|
|
193
|
+
...(images.some((image) => image.status === "downloaded") ? [] : ["image original resources"]),
|
|
178
194
|
"image anchors",
|
|
179
195
|
"floating vs fixed image classification",
|
|
180
196
|
],
|
|
@@ -292,16 +308,24 @@ export function extractTextFromOpendoc(body) {
|
|
|
292
308
|
const decodedChunks = chunks
|
|
293
309
|
.filter((chunk) => typeof chunk === "string")
|
|
294
310
|
.flatMap((chunk) => extractInitialAttributedTextCandidates(chunk));
|
|
311
|
+
const decodedText = decodedChunks.join("\n");
|
|
295
312
|
const textCandidate = chooseTextCandidate(decodedChunks);
|
|
296
|
-
const text = normalizeExtractedText(textCandidate ??
|
|
313
|
+
const text = normalizeExtractedText(textCandidate ?? decodedText);
|
|
297
314
|
if (!text)
|
|
298
315
|
return null;
|
|
299
|
-
const imageUrls = extractImageUrls(
|
|
316
|
+
const imageUrls = extractImageUrls(decodedText);
|
|
317
|
+
const structureHints = analyzeOpendocStructure({
|
|
318
|
+
opendocBody: body,
|
|
319
|
+
htmlData: typeof payload.htmlData === "string" ? payload.htmlData : "",
|
|
320
|
+
decodedText,
|
|
321
|
+
decodedCandidateCount: decodedChunks.length,
|
|
322
|
+
});
|
|
300
323
|
return {
|
|
301
324
|
title: payload.clientVars?.title ?? payload.clientVars?.initialTitle,
|
|
302
325
|
padType: payload.clientVars?.padType ?? payload.padType,
|
|
303
326
|
text,
|
|
304
327
|
imageUrls,
|
|
328
|
+
structureHints,
|
|
305
329
|
warnings: [
|
|
306
330
|
"No-export text is decoded from opendoc initialAttributedText; rich styles are not reconstructed yet.",
|
|
307
331
|
...(imageUrls.length > 0
|
|
@@ -310,6 +334,27 @@ export function extractTextFromOpendoc(body) {
|
|
|
310
334
|
],
|
|
311
335
|
};
|
|
312
336
|
}
|
|
337
|
+
function analyzeOpendocStructure(input) {
|
|
338
|
+
return {
|
|
339
|
+
opendoc: summarizeStructureSignals(input.opendocBody),
|
|
340
|
+
htmlData: {
|
|
341
|
+
...summarizeStructureSignals(input.htmlData),
|
|
342
|
+
hasTableMarkup: /<\/?(table|tr|td|th)\b/i.test(input.htmlData),
|
|
343
|
+
},
|
|
344
|
+
initialAttributedText: {
|
|
345
|
+
...summarizeStructureSignals(input.decodedText),
|
|
346
|
+
decodedCandidateCount: input.decodedCandidateCount,
|
|
347
|
+
},
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
function summarizeStructureSignals(value) {
|
|
351
|
+
return {
|
|
352
|
+
length: value.length,
|
|
353
|
+
hasTableSignals: /\b(table|cell|row|column|sheet|workbook)\b/i.test(value),
|
|
354
|
+
hasMergeSignals: /\b(merge|merged|rowspan|colspan|mergeCell|mergeCells)\b/i.test(value),
|
|
355
|
+
hasImageSignals: /\b(image|img|pic|picture|media|download_url|qpic)\b/i.test(value),
|
|
356
|
+
};
|
|
357
|
+
}
|
|
313
358
|
function extractInitialAttributedTextCandidates(chunk) {
|
|
314
359
|
let decoded;
|
|
315
360
|
try {
|
|
@@ -426,6 +471,97 @@ function extractImageUrls(text) {
|
|
|
426
471
|
.map((url) => url.replace(/[)*,.;:]+$/g, ""))
|
|
427
472
|
.filter((url) => /qpic\.cn|weixin\.qq\.com|doc\.weixin\.qq\.com/i.test(url))));
|
|
428
473
|
}
|
|
474
|
+
async function downloadImageResources(imageUrls, saveDir) {
|
|
475
|
+
const imageDir = resolve(saveDir, "images");
|
|
476
|
+
mkdirSync(imageDir, { recursive: true });
|
|
477
|
+
const results = [];
|
|
478
|
+
for (const [index, url] of imageUrls.slice(0, MAX_IMAGE_DOWNLOADS).entries()) {
|
|
479
|
+
const dimensions = parseImageDimensionsFromUrl(url);
|
|
480
|
+
try {
|
|
481
|
+
const res = await fetch(url);
|
|
482
|
+
if (!res.ok) {
|
|
483
|
+
results.push({
|
|
484
|
+
url,
|
|
485
|
+
...dimensions,
|
|
486
|
+
status: "failed",
|
|
487
|
+
error: `HTTP ${res.status}`,
|
|
488
|
+
});
|
|
489
|
+
continue;
|
|
490
|
+
}
|
|
491
|
+
const contentType = res.headers.get("content-type") ?? undefined;
|
|
492
|
+
const bytes = Buffer.from(await res.arrayBuffer());
|
|
493
|
+
if (bytes.length > MAX_IMAGE_BYTES) {
|
|
494
|
+
results.push({
|
|
495
|
+
url,
|
|
496
|
+
...dimensions,
|
|
497
|
+
contentType,
|
|
498
|
+
sizeBytes: bytes.length,
|
|
499
|
+
status: "failed",
|
|
500
|
+
error: `image exceeds ${MAX_IMAGE_BYTES} bytes`,
|
|
501
|
+
});
|
|
502
|
+
continue;
|
|
503
|
+
}
|
|
504
|
+
const filePath = resolve(imageDir, `image-${String(index + 1).padStart(3, "0")}${extensionForContentType(contentType)}`);
|
|
505
|
+
writeFileSync(filePath, bytes, { mode: 0o600 });
|
|
506
|
+
results.push({
|
|
507
|
+
url,
|
|
508
|
+
path: filePath,
|
|
509
|
+
contentType,
|
|
510
|
+
sizeBytes: bytes.length,
|
|
511
|
+
...dimensions,
|
|
512
|
+
status: "downloaded",
|
|
513
|
+
});
|
|
514
|
+
}
|
|
515
|
+
catch (err) {
|
|
516
|
+
results.push({
|
|
517
|
+
url,
|
|
518
|
+
...dimensions,
|
|
519
|
+
status: "failed",
|
|
520
|
+
error: err instanceof Error ? err.message : String(err),
|
|
521
|
+
});
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
if (imageUrls.length > MAX_IMAGE_DOWNLOADS) {
|
|
525
|
+
for (const url of imageUrls.slice(MAX_IMAGE_DOWNLOADS)) {
|
|
526
|
+
results.push({
|
|
527
|
+
url,
|
|
528
|
+
...parseImageDimensionsFromUrl(url),
|
|
529
|
+
status: "skipped",
|
|
530
|
+
error: `only first ${MAX_IMAGE_DOWNLOADS} images are downloaded`,
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
return results;
|
|
535
|
+
}
|
|
536
|
+
export function parseImageDimensionsFromUrl(url) {
|
|
537
|
+
try {
|
|
538
|
+
const parsed = new URL(url);
|
|
539
|
+
const width = Number(parsed.searchParams.get("w"));
|
|
540
|
+
const height = Number(parsed.searchParams.get("h"));
|
|
541
|
+
return {
|
|
542
|
+
width: Number.isFinite(width) && width > 0 ? width : undefined,
|
|
543
|
+
height: Number.isFinite(height) && height > 0 ? height : undefined,
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
catch {
|
|
547
|
+
return {};
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
function extensionForContentType(contentType) {
|
|
551
|
+
if (!contentType)
|
|
552
|
+
return ".bin";
|
|
553
|
+
if (contentType.includes("png"))
|
|
554
|
+
return ".png";
|
|
555
|
+
if (contentType.includes("jpeg") || contentType.includes("jpg"))
|
|
556
|
+
return ".jpg";
|
|
557
|
+
if (contentType.includes("gif"))
|
|
558
|
+
return ".gif";
|
|
559
|
+
if (contentType.includes("webp"))
|
|
560
|
+
return ".webp";
|
|
561
|
+
if (contentType.includes("svg"))
|
|
562
|
+
return ".svg";
|
|
563
|
+
return ".bin";
|
|
564
|
+
}
|
|
429
565
|
function normalizeExtractedText(text) {
|
|
430
566
|
return text
|
|
431
567
|
.replace(/p\.\d{8,}/g, "")
|
package/package.json
CHANGED
|
@@ -7,9 +7,9 @@ description: Use this skill when the user wants to install or set up wonder, say
|
|
|
7
7
|
|
|
8
8
|
## Skill version
|
|
9
9
|
|
|
10
|
-
This skill matches **wonder 0.0.
|
|
10
|
+
This skill matches **wonder 0.0.17**.
|
|
11
11
|
|
|
12
|
-
Once the CLI is installed in Step 1, run `wonder --version`. If the output does not equal `0.0.
|
|
12
|
+
Once the CLI is installed in Step 1, run `wonder --version`. If the output does not equal `0.0.17`, the CLI on disk has drifted from the skill text loaded in this session. Ask the user to run `/update-5mghost-wonder`, then **start a fresh AI session** (`/exit` and re-enter, or open a new chat) — skill text already loaded into a running session does not refresh after `wonder update`, even though the file on disk has been replaced.
|
|
13
13
|
|
|
14
14
|
After a successful first install, also remind the user to start a fresh AI session before invoking `/use-5mghost-wonder` for the first time. The skill files were just written to disk; the current session never loaded them.
|
|
15
15
|
|
|
@@ -10,10 +10,10 @@ the referenced workflow files needed for the current task.
|
|
|
10
10
|
|
|
11
11
|
## Version Gate
|
|
12
12
|
|
|
13
|
-
This skill matches **wonder 0.0.
|
|
13
|
+
This skill matches **wonder 0.0.17**.
|
|
14
14
|
|
|
15
15
|
On first use in a session, follow `references/session-init.md`. If the installed
|
|
16
|
-
CLI version differs from `0.0.
|
|
16
|
+
CLI version differs from `0.0.17`, stop and ask the user to run
|
|
17
17
|
`/update-5mghost-wonder`, then start a fresh AI session.
|
|
18
18
|
|
|
19
19
|
## Hard Rules
|
|
@@ -37,8 +37,10 @@ wonder browser read <url> --save /tmp/wonder-browser-read
|
|
|
37
37
|
```
|
|
38
38
|
|
|
39
39
|
This currently attempts text extraction from the browser `opendoc`
|
|
40
|
-
`initialAttributedText
|
|
41
|
-
|
|
40
|
+
`initialAttributedText`, downloads detected image resources when `--save` is
|
|
41
|
+
present, and returns safe `structureHints` for `htmlData`,
|
|
42
|
+
`initialAttributedText`, and the full `opendoc` body. Treat it as `partial`
|
|
43
|
+
unless it also proves actual table cells, merge ranges, and image anchors.
|
|
42
44
|
|
|
43
45
|
3. If the read is insufficient, run the evidence probe:
|
|
44
46
|
|
|
@@ -59,6 +61,8 @@ Use `--headed` only when login/debug visibility is needed.
|
|
|
59
61
|
- WebSocket messages if they expose structured document operations
|
|
60
62
|
- screenshots only as a fallback or visual cross-check
|
|
61
63
|
6. For tables, specifically look for:
|
|
64
|
+
- `structureHints.*.hasTableSignals` and `hasMergeSignals`
|
|
65
|
+
- `structureHints.htmlData.hasTableMarkup`
|
|
62
66
|
- cell coordinates
|
|
63
67
|
- displayed text
|
|
64
68
|
- merge ranges
|
|
@@ -82,6 +86,12 @@ Return a concise result:
|
|
|
82
86
|
"mergeRangesAvailable": true,
|
|
83
87
|
"imagesAvailable": true,
|
|
84
88
|
"imageAnchorsAvailable": true,
|
|
89
|
+
"structureHints": {
|
|
90
|
+
"htmlData": {
|
|
91
|
+
"hasTableMarkup": true,
|
|
92
|
+
"hasMergeSignals": true
|
|
93
|
+
}
|
|
94
|
+
},
|
|
85
95
|
"evidence": ["runtime-store", "network-response", "websocket", "screenshot"],
|
|
86
96
|
"missing": []
|
|
87
97
|
}
|