@yuzc-001/grasp 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +327 -0
- package/README.zh-CN.md +324 -0
- package/examples/README.md +31 -0
- package/examples/claude-desktop.json +8 -0
- package/examples/codex-config.toml +4 -0
- package/grasp.skill +0 -0
- package/index.js +87 -0
- package/package.json +48 -0
- package/scripts/grasp_openclaw_ctl.sh +122 -0
- package/scripts/run-search-benchmark.mjs +287 -0
- package/scripts/update-star-history.mjs +274 -0
- package/skill/SKILL.md +61 -0
- package/skill/references/tools.md +306 -0
- package/src/cli/auto-configure.js +116 -0
- package/src/cli/cmd-connect.js +148 -0
- package/src/cli/cmd-explain.js +42 -0
- package/src/cli/cmd-logs.js +55 -0
- package/src/cli/cmd-status.js +119 -0
- package/src/cli/config.js +27 -0
- package/src/cli/detect-chrome.js +58 -0
- package/src/grasp/handoff/events.js +67 -0
- package/src/grasp/handoff/persist.js +48 -0
- package/src/grasp/handoff/state.js +28 -0
- package/src/grasp/page/capture.js +34 -0
- package/src/grasp/page/state.js +273 -0
- package/src/grasp/verify/evidence.js +40 -0
- package/src/grasp/verify/pipeline.js +52 -0
- package/src/layer1-bridge/chrome.js +416 -0
- package/src/layer1-bridge/webmcp.js +143 -0
- package/src/layer2-perception/hints.js +284 -0
- package/src/layer3-action/actions.js +400 -0
- package/src/runtime/browser-instance.js +65 -0
- package/src/runtime/truth/model.js +94 -0
- package/src/runtime/truth/snapshot.js +51 -0
- package/src/server/affordances.js +47 -0
- package/src/server/audit.js +122 -0
- package/src/server/boss-fast-path.js +164 -0
- package/src/server/boundary-guard.js +53 -0
- package/src/server/content.js +97 -0
- package/src/server/continuity.js +256 -0
- package/src/server/engine-selection.js +29 -0
- package/src/server/entry-orchestrator.js +115 -0
- package/src/server/error-codes.js +7 -0
- package/src/server/explain-share-card.js +113 -0
- package/src/server/fast-path-router.js +134 -0
- package/src/server/form-runtime.js +602 -0
- package/src/server/form-tasks.js +254 -0
- package/src/server/gateway-response.js +62 -0
- package/src/server/index.js +22 -0
- package/src/server/observe.js +52 -0
- package/src/server/page-projection.js +31 -0
- package/src/server/page-state.js +27 -0
- package/src/server/postconditions.js +128 -0
- package/src/server/prompt-assembly.js +148 -0
- package/src/server/responses.js +44 -0
- package/src/server/route-boundary.js +174 -0
- package/src/server/route-policy.js +168 -0
- package/src/server/runtime-confirmation.js +87 -0
- package/src/server/runtime-status.js +7 -0
- package/src/server/share-artifacts.js +284 -0
- package/src/server/state.js +132 -0
- package/src/server/structured-extraction.js +131 -0
- package/src/server/surface-prompts.js +166 -0
- package/src/server/task-frame.js +11 -0
- package/src/server/tasks/search-task.js +321 -0
- package/src/server/tools.actions.js +1361 -0
- package/src/server/tools.form.js +526 -0
- package/src/server/tools.gateway.js +757 -0
- package/src/server/tools.handoff.js +210 -0
- package/src/server/tools.js +20 -0
- package/src/server/tools.legacy.js +983 -0
- package/src/server/tools.strategy.js +250 -0
- package/src/server/tools.task-surface.js +66 -0
- package/src/server/tools.workspace.js +873 -0
- package/src/server/workspace-runtime.js +1138 -0
- package/src/server/workspace-tasks.js +735 -0
- package/start-chrome.bat +84 -0
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
import { mkdir, writeFile } from 'node:fs/promises';
|
|
2
|
+
import os from 'node:os';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
|
|
5
|
+
const DEFAULT_ARTIFACT_DIR = path.join(os.homedir(), '.grasp', 'artifacts');
|
|
6
|
+
|
|
7
|
+
function toSafeText(value) {
|
|
8
|
+
return String(value ?? '').trim();
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function escapeHtml(value) {
|
|
12
|
+
return String(value ?? '')
|
|
13
|
+
.replaceAll('&', '&')
|
|
14
|
+
.replaceAll('<', '<')
|
|
15
|
+
.replaceAll('>', '>')
|
|
16
|
+
.replaceAll('"', '"')
|
|
17
|
+
.replaceAll("'", ''');
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function buildShareMarkdown({ projection, explainCard } = {}) {
|
|
21
|
+
const title = toSafeText(projection?.title) || 'Untitled';
|
|
22
|
+
const url = toSafeText(projection?.url) || 'unknown';
|
|
23
|
+
const summary = toSafeText(projection?.summary);
|
|
24
|
+
const mainText = toSafeText(projection?.main_text);
|
|
25
|
+
const lines = [
|
|
26
|
+
`# ${title}`,
|
|
27
|
+
'',
|
|
28
|
+
`Source: ${url}`,
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
if (summary) {
|
|
32
|
+
lines.push('', '## Summary', '', summary);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (mainText) {
|
|
36
|
+
lines.push('', '## Content', '', mainText);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (explainCard) {
|
|
40
|
+
lines.push(
|
|
41
|
+
'',
|
|
42
|
+
'## Share Card',
|
|
43
|
+
'',
|
|
44
|
+
`- Engine: ${explainCard.engine}`,
|
|
45
|
+
`- Estimated height: ${explainCard.estimated_height}px`,
|
|
46
|
+
`- Title lines: ${explainCard.title_lines}`,
|
|
47
|
+
`- Summary lines: ${explainCard.summary_lines}`,
|
|
48
|
+
`- Body lines: ${explainCard.body_lines}`,
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return lines.join('\n').trim();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function buildBatchMarkdownBundle({ fields = [], records = [] } = {}) {
|
|
56
|
+
const lines = ['# Batch Extract', ''];
|
|
57
|
+
|
|
58
|
+
for (const record of records) {
|
|
59
|
+
const title = toSafeText(record?.title) || 'Untitled';
|
|
60
|
+
lines.push(`## ${title}`);
|
|
61
|
+
lines.push('');
|
|
62
|
+
lines.push(`- Input URL: ${toSafeText(record?.input_url) || 'unknown'}`);
|
|
63
|
+
lines.push(`- Final URL: ${toSafeText(record?.final_url) || 'unknown'}`);
|
|
64
|
+
lines.push(`- Status: ${toSafeText(record?.status) || 'unknown'}`);
|
|
65
|
+
|
|
66
|
+
for (const field of fields) {
|
|
67
|
+
lines.push(`- ${field}: ${toSafeText(record?.record?.[field]) || ''}`);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (Array.isArray(record?.missing_fields) && record.missing_fields.length > 0) {
|
|
71
|
+
lines.push(`- Missing fields: ${record.missing_fields.join(', ')}`);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
lines.push('');
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return lines.join('\n').trim();
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function serializeCsv(columns = [], rows = []) {
|
|
81
|
+
const escapeCell = (value) => {
|
|
82
|
+
const cell = String(value ?? '');
|
|
83
|
+
if (cell.includes('"') || cell.includes(',') || cell.includes('\n')) {
|
|
84
|
+
return `"${cell.replaceAll('"', '""')}"`;
|
|
85
|
+
}
|
|
86
|
+
return cell;
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
const csvRows = [
|
|
90
|
+
columns.map(escapeCell).join(','),
|
|
91
|
+
...rows.map((row) => columns.map((column) => escapeCell(row?.[column])).join(',')),
|
|
92
|
+
];
|
|
93
|
+
|
|
94
|
+
return csvRows.join('\n');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function buildShareHtml({ projection, explainCard } = {}) {
|
|
98
|
+
const title = toSafeText(projection?.title) || 'Untitled';
|
|
99
|
+
const url = toSafeText(projection?.url) || 'unknown';
|
|
100
|
+
const summary = toSafeText(projection?.summary);
|
|
101
|
+
const mainText = toSafeText(projection?.main_text);
|
|
102
|
+
const headerHeight = Math.max(240, Number(explainCard?.estimated_height ?? 0));
|
|
103
|
+
|
|
104
|
+
return `<!doctype html>
|
|
105
|
+
<html lang="en">
|
|
106
|
+
<head>
|
|
107
|
+
<meta charset="utf-8">
|
|
108
|
+
<title>${escapeHtml(title)}</title>
|
|
109
|
+
<style>
|
|
110
|
+
:root {
|
|
111
|
+
color-scheme: light;
|
|
112
|
+
--page-width: 860px;
|
|
113
|
+
--ink: #0b1738;
|
|
114
|
+
--muted: #52607a;
|
|
115
|
+
--line: #d9e0ef;
|
|
116
|
+
--card: #ffffff;
|
|
117
|
+
--wash: linear-gradient(180deg, #eef5ff 0%, #f9fbff 100%);
|
|
118
|
+
--accent: #23c993;
|
|
119
|
+
}
|
|
120
|
+
* { box-sizing: border-box; }
|
|
121
|
+
body {
|
|
122
|
+
margin: 0;
|
|
123
|
+
padding: 40px;
|
|
124
|
+
font-family: Arial, sans-serif;
|
|
125
|
+
background: radial-gradient(circle at top left, #dff6ea 0%, #eef5ff 42%, #f8fbff 100%);
|
|
126
|
+
color: var(--ink);
|
|
127
|
+
}
|
|
128
|
+
.sheet {
|
|
129
|
+
width: min(100%, var(--page-width));
|
|
130
|
+
margin: 0 auto;
|
|
131
|
+
background: var(--card);
|
|
132
|
+
border: 1px solid var(--line);
|
|
133
|
+
border-radius: 28px;
|
|
134
|
+
overflow: hidden;
|
|
135
|
+
box-shadow: 0 24px 80px rgba(11, 23, 56, 0.12);
|
|
136
|
+
}
|
|
137
|
+
.hero {
|
|
138
|
+
min-height: ${headerHeight}px;
|
|
139
|
+
padding: 40px 44px 28px;
|
|
140
|
+
background: var(--wash);
|
|
141
|
+
border-bottom: 1px solid var(--line);
|
|
142
|
+
}
|
|
143
|
+
.eyebrow {
|
|
144
|
+
display: inline-block;
|
|
145
|
+
padding: 8px 14px;
|
|
146
|
+
border-radius: 999px;
|
|
147
|
+
background: rgba(35, 201, 147, 0.12);
|
|
148
|
+
color: #0d7d5d;
|
|
149
|
+
font-size: 13px;
|
|
150
|
+
font-weight: 700;
|
|
151
|
+
letter-spacing: 0.04em;
|
|
152
|
+
text-transform: uppercase;
|
|
153
|
+
}
|
|
154
|
+
h1 {
|
|
155
|
+
margin: 18px 0 14px;
|
|
156
|
+
font-size: 34px;
|
|
157
|
+
line-height: 1.2;
|
|
158
|
+
}
|
|
159
|
+
.summary {
|
|
160
|
+
margin: 0;
|
|
161
|
+
color: var(--muted);
|
|
162
|
+
font-size: 18px;
|
|
163
|
+
line-height: 1.55;
|
|
164
|
+
}
|
|
165
|
+
.meta {
|
|
166
|
+
margin-top: 20px;
|
|
167
|
+
color: var(--muted);
|
|
168
|
+
font-size: 14px;
|
|
169
|
+
line-height: 1.6;
|
|
170
|
+
}
|
|
171
|
+
.body {
|
|
172
|
+
padding: 32px 44px 44px;
|
|
173
|
+
white-space: pre-wrap;
|
|
174
|
+
font-size: 16px;
|
|
175
|
+
line-height: 1.7;
|
|
176
|
+
}
|
|
177
|
+
.layout {
|
|
178
|
+
margin-top: 28px;
|
|
179
|
+
display: grid;
|
|
180
|
+
grid-template-columns: repeat(4, minmax(0, 1fr));
|
|
181
|
+
gap: 12px;
|
|
182
|
+
}
|
|
183
|
+
.layout-card {
|
|
184
|
+
padding: 14px 16px;
|
|
185
|
+
border-radius: 18px;
|
|
186
|
+
border: 1px solid var(--line);
|
|
187
|
+
background: rgba(255, 255, 255, 0.78);
|
|
188
|
+
}
|
|
189
|
+
.layout-card strong {
|
|
190
|
+
display: block;
|
|
191
|
+
font-size: 12px;
|
|
192
|
+
text-transform: uppercase;
|
|
193
|
+
color: var(--muted);
|
|
194
|
+
margin-bottom: 6px;
|
|
195
|
+
}
|
|
196
|
+
.layout-card span {
|
|
197
|
+
font-size: 18px;
|
|
198
|
+
font-weight: 700;
|
|
199
|
+
}
|
|
200
|
+
</style>
|
|
201
|
+
</head>
|
|
202
|
+
<body>
|
|
203
|
+
<main class="sheet">
|
|
204
|
+
<section class="hero">
|
|
205
|
+
<div class="eyebrow">Grasp Share</div>
|
|
206
|
+
<h1>${escapeHtml(title)}</h1>
|
|
207
|
+
<p class="summary">${escapeHtml(summary || mainText.slice(0, 160))}</p>
|
|
208
|
+
<div class="meta">Source: ${escapeHtml(url)}</div>
|
|
209
|
+
<div class="layout">
|
|
210
|
+
<div class="layout-card"><strong>Engine</strong><span>${escapeHtml(explainCard?.engine ?? 'fallback')}</span></div>
|
|
211
|
+
<div class="layout-card"><strong>Height</strong><span>${escapeHtml(String(explainCard?.estimated_height ?? 0))}px</span></div>
|
|
212
|
+
<div class="layout-card"><strong>Title Lines</strong><span>${escapeHtml(String(explainCard?.title_lines ?? 0))}</span></div>
|
|
213
|
+
<div class="layout-card"><strong>Summary Lines</strong><span>${escapeHtml(String(explainCard?.summary_lines ?? 0))}</span></div>
|
|
214
|
+
</div>
|
|
215
|
+
</section>
|
|
216
|
+
<section class="body">${escapeHtml(mainText)}</section>
|
|
217
|
+
</main>
|
|
218
|
+
</body>
|
|
219
|
+
</html>`;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
async function defaultCreateScratchPage(page) {
|
|
223
|
+
const context = page?.context?.();
|
|
224
|
+
if (!context || typeof context.newPage !== 'function') {
|
|
225
|
+
throw new Error('Share rendering requires a browser page context.');
|
|
226
|
+
}
|
|
227
|
+
return context.newPage();
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
export async function renderShareArtifact(page, html, format, deps = {}) {
|
|
231
|
+
const createScratchPage = deps.createScratchPage ?? defaultCreateScratchPage;
|
|
232
|
+
const scratchPage = await createScratchPage(page);
|
|
233
|
+
|
|
234
|
+
try {
|
|
235
|
+
if (typeof scratchPage.setViewportSize === 'function') {
|
|
236
|
+
await scratchPage.setViewportSize({ width: 960, height: 1280 });
|
|
237
|
+
}
|
|
238
|
+
await scratchPage.setContent(html, { waitUntil: 'load' });
|
|
239
|
+
|
|
240
|
+
if (format === 'screenshot') {
|
|
241
|
+
return {
|
|
242
|
+
data: await scratchPage.screenshot({ type: 'png', fullPage: true }),
|
|
243
|
+
extension: 'png',
|
|
244
|
+
mimeType: 'image/png',
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if (format === 'pdf') {
|
|
249
|
+
return {
|
|
250
|
+
data: await scratchPage.pdf({
|
|
251
|
+
printBackground: true,
|
|
252
|
+
preferCSSPageSize: true,
|
|
253
|
+
}),
|
|
254
|
+
extension: 'pdf',
|
|
255
|
+
mimeType: 'application/pdf',
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
throw new Error(`Unsupported share format: ${format}`);
|
|
260
|
+
} finally {
|
|
261
|
+
await scratchPage.close?.().catch?.(() => {});
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
export async function writeArtifactFile(artifact, deps = {}) {
|
|
266
|
+
const {
|
|
267
|
+
artifactDir = DEFAULT_ARTIFACT_DIR,
|
|
268
|
+
mkdirImpl = mkdir,
|
|
269
|
+
writeFileImpl = writeFile,
|
|
270
|
+
} = deps;
|
|
271
|
+
const filename = toSafeText(artifact?.filename) || 'artifact.txt';
|
|
272
|
+
const fullPath = path.join(artifactDir, filename);
|
|
273
|
+
|
|
274
|
+
await mkdirImpl(artifactDir, { recursive: true });
|
|
275
|
+
await writeFileImpl(fullPath, artifact.data, artifact.encoding ? { encoding: artifact.encoding } : undefined);
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
path: fullPath,
|
|
279
|
+
bytes: Buffer.isBuffer(artifact.data)
|
|
280
|
+
? artifact.data.length
|
|
281
|
+
: Buffer.byteLength(String(artifact.data), artifact.encoding === 'utf8' ? 'utf8' : undefined),
|
|
282
|
+
mimeType: artifact.mimeType ?? null,
|
|
283
|
+
};
|
|
284
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import { probe, listTools } from '../layer1-bridge/webmcp.js';
|
|
2
|
+
import { buildHintMap } from '../layer2-perception/hints.js';
|
|
3
|
+
import { createHandoffState } from '../grasp/handoff/state.js';
|
|
4
|
+
import { createPageGraspState, applySnapshotToPageGraspState } from '../grasp/page/state.js';
|
|
5
|
+
import { capturePageSnapshot } from '../grasp/page/capture.js';
|
|
6
|
+
|
|
7
|
+
const TRANSIENT_CONTEXT_ERRORS = [
|
|
8
|
+
'Execution context was destroyed',
|
|
9
|
+
'Cannot find context with specified id',
|
|
10
|
+
];
|
|
11
|
+
|
|
12
|
+
function isTransientExecutionContextError(error) {
|
|
13
|
+
const message = error?.message ?? '';
|
|
14
|
+
return TRANSIENT_CONTEXT_ERRORS.some((pattern) => message.includes(pattern));
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async function sleep(ms) {
|
|
18
|
+
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async function retryTransientPageStep(step, { attempts = 3, delayMs = 120 } = {}) {
|
|
22
|
+
let lastError = null;
|
|
23
|
+
|
|
24
|
+
for (let i = 0; i < attempts; i += 1) {
|
|
25
|
+
try {
|
|
26
|
+
return await step();
|
|
27
|
+
} catch (error) {
|
|
28
|
+
lastError = error;
|
|
29
|
+
if (!isTransientExecutionContextError(error) || i === attempts - 1) {
|
|
30
|
+
throw error;
|
|
31
|
+
}
|
|
32
|
+
await sleep(delayMs * (i + 1));
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
throw lastError;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function isSafeModeEnabled() {
|
|
40
|
+
return process.env.GRASP_SAFE_MODE !== 'false';
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function createServerState() {
|
|
44
|
+
return {
|
|
45
|
+
webmcp: null,
|
|
46
|
+
hintMap: [],
|
|
47
|
+
lastUrl: null,
|
|
48
|
+
targetSession: null,
|
|
49
|
+
activeTaskId: null,
|
|
50
|
+
hintRegistry: new Map(),
|
|
51
|
+
hintCounters: { B: 0, I: 0, L: 0, S: 0 },
|
|
52
|
+
safeMode: isSafeModeEnabled(),
|
|
53
|
+
pageState: createPageGraspState(),
|
|
54
|
+
handoff: createHandoffState(),
|
|
55
|
+
runtimeTruth: null,
|
|
56
|
+
runtimeConfirmation: null,
|
|
57
|
+
verificationContext: null,
|
|
58
|
+
lastRouteTrace: null,
|
|
59
|
+
taskFrames: new Map(),
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function getActiveTaskFrame(state) {
|
|
64
|
+
const taskId = state?.activeTaskId ?? state?.taskId ?? null;
|
|
65
|
+
if (!taskId) return null;
|
|
66
|
+
return state?.taskFrames?.get(taskId) ?? null;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export async function syncPageState(page, state, { force = false } = {}) {
|
|
70
|
+
const url = page.url();
|
|
71
|
+
const snapshotData = await capturePageSnapshot(page);
|
|
72
|
+
const snapshotHash = `${url}|${snapshotData.nodes}|${snapshotData.bodyText}`;
|
|
73
|
+
const prevPageState = state.pageState ?? createPageGraspState();
|
|
74
|
+
const nextPageState = applySnapshotToPageGraspState(prevPageState, {
|
|
75
|
+
url,
|
|
76
|
+
snapshotHash,
|
|
77
|
+
title: snapshotData.title,
|
|
78
|
+
bodyText: snapshotData.bodyText,
|
|
79
|
+
nodes: snapshotData.nodes,
|
|
80
|
+
forms: snapshotData.forms,
|
|
81
|
+
navs: snapshotData.navs,
|
|
82
|
+
headings: snapshotData.headings,
|
|
83
|
+
});
|
|
84
|
+
const urlChanged = prevPageState.lastUrl !== url;
|
|
85
|
+
const domRevisionChanged = prevPageState.domRevision !== nextPageState.domRevision;
|
|
86
|
+
state.pageState = nextPageState;
|
|
87
|
+
state.lastUrl = nextPageState.lastUrl;
|
|
88
|
+
const activeTaskFrame = getActiveTaskFrame(state);
|
|
89
|
+
if (activeTaskFrame) {
|
|
90
|
+
activeTaskFrame.lastUrl = nextPageState.lastUrl;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const needsRefresh = force || state.webmcp === null || urlChanged || domRevisionChanged;
|
|
94
|
+
if (!needsRefresh) return state;
|
|
95
|
+
|
|
96
|
+
if (urlChanged) {
|
|
97
|
+
state.hintRegistry = new Map();
|
|
98
|
+
state.hintCounters = { B: 0, I: 0, L: 0, S: 0 };
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
await retryTransientPageStep(async () => {
|
|
102
|
+
const webmcp = await probe(page);
|
|
103
|
+
state.lastUrl = url;
|
|
104
|
+
|
|
105
|
+
if (webmcp.available) {
|
|
106
|
+
const tools = await listTools(page, webmcp);
|
|
107
|
+
state.webmcp = { ...webmcp, tools };
|
|
108
|
+
state.hintMap = [];
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
state.webmcp = webmcp;
|
|
113
|
+
state.hintMap = await buildHintMap(page, state.hintRegistry, state.hintCounters);
|
|
114
|
+
});
|
|
115
|
+
return state;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export function describeMode(state) {
|
|
119
|
+
if (state.webmcp?.available) {
|
|
120
|
+
return {
|
|
121
|
+
mode: 'WebMCP',
|
|
122
|
+
detail: `WebMCP via ${state.webmcp.source} (${state.webmcp.tools?.length ?? 0} native tools)`,
|
|
123
|
+
summary: `WebMCP (${state.webmcp.tools?.length ?? 0} native tools)`,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
mode: 'CDP',
|
|
129
|
+
detail: 'CDP (Hint Map + Mouse Events)',
|
|
130
|
+
summary: 'CDP (Hint Map + Mouse Events)',
|
|
131
|
+
};
|
|
132
|
+
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
function normalizeWhitespace(value) {
|
|
2
|
+
return String(value ?? '').replace(/\s+/g, ' ').trim();
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
export function normalizeStructuredLabel(value) {
|
|
6
|
+
return normalizeWhitespace(value)
|
|
7
|
+
.replace(/[::]+$/u, '')
|
|
8
|
+
.toLowerCase();
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function scoreStructuredCandidate(field, candidate) {
|
|
12
|
+
if (!field || !candidate) return 0;
|
|
13
|
+
if (field === candidate) return 4;
|
|
14
|
+
if (candidate.includes(field)) return 3;
|
|
15
|
+
if (field.includes(candidate)) return 2;
|
|
16
|
+
return 0;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function matchStructuredFields(fields = [], candidates = []) {
|
|
20
|
+
const normalizedCandidates = candidates.map((candidate, index) => ({
|
|
21
|
+
index,
|
|
22
|
+
label: normalizeWhitespace(candidate?.label),
|
|
23
|
+
value: normalizeWhitespace(candidate?.value),
|
|
24
|
+
strategy: candidate?.strategy ?? 'unknown',
|
|
25
|
+
normalizedLabel: normalizeStructuredLabel(candidate?.label),
|
|
26
|
+
})).filter((candidate) => candidate.label && candidate.value);
|
|
27
|
+
const used = new Set();
|
|
28
|
+
const record = {};
|
|
29
|
+
const evidence = [];
|
|
30
|
+
const missing_fields = [];
|
|
31
|
+
|
|
32
|
+
for (const requestedField of fields) {
|
|
33
|
+
const field = normalizeWhitespace(requestedField);
|
|
34
|
+
const normalizedField = normalizeStructuredLabel(requestedField);
|
|
35
|
+
const matches = normalizedCandidates
|
|
36
|
+
.filter((candidate) => !used.has(candidate.index))
|
|
37
|
+
.map((candidate) => ({
|
|
38
|
+
...candidate,
|
|
39
|
+
score: scoreStructuredCandidate(normalizedField, candidate.normalizedLabel),
|
|
40
|
+
}))
|
|
41
|
+
.filter((candidate) => candidate.score > 0)
|
|
42
|
+
.sort((left, right) => right.score - left.score || left.label.length - right.label.length);
|
|
43
|
+
|
|
44
|
+
if (matches.length === 0) {
|
|
45
|
+
missing_fields.push(field);
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const best = matches[0];
|
|
50
|
+
used.add(best.index);
|
|
51
|
+
record[field] = best.value;
|
|
52
|
+
evidence.push({
|
|
53
|
+
field,
|
|
54
|
+
label: best.label,
|
|
55
|
+
value: best.value,
|
|
56
|
+
strategy: best.strategy,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
requested_fields: fields.map((field) => normalizeWhitespace(field)).filter(Boolean),
|
|
62
|
+
record,
|
|
63
|
+
missing_fields,
|
|
64
|
+
evidence,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export async function extractStructuredContent(page, fields = []) {
|
|
69
|
+
const candidates = await page.evaluate(() => {
|
|
70
|
+
function normalizeWhitespace(value) {
|
|
71
|
+
return String(value ?? '').replace(/\s+/g, ' ').trim();
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function pushCandidate(target, label, value, strategy) {
|
|
75
|
+
const normalizedLabel = normalizeWhitespace(label);
|
|
76
|
+
const normalizedValue = normalizeWhitespace(value);
|
|
77
|
+
if (!normalizedLabel || !normalizedValue) return;
|
|
78
|
+
const key = `${strategy}::${normalizedLabel}::${normalizedValue}`;
|
|
79
|
+
if (target.seen.has(key)) return;
|
|
80
|
+
target.seen.add(key);
|
|
81
|
+
target.items.push({
|
|
82
|
+
label: normalizedLabel,
|
|
83
|
+
value: normalizedValue,
|
|
84
|
+
strategy,
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const target = { items: [], seen: new Set() };
|
|
89
|
+
|
|
90
|
+
for (const term of document.querySelectorAll('dt')) {
|
|
91
|
+
const detail = term.nextElementSibling;
|
|
92
|
+
if (detail && detail.tagName?.toLowerCase() === 'dd') {
|
|
93
|
+
pushCandidate(target, term.textContent, detail.textContent, 'definition_list');
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
for (const row of document.querySelectorAll('tr')) {
|
|
98
|
+
const headers = [...row.querySelectorAll('th')].map((cell) => normalizeWhitespace(cell.textContent)).filter(Boolean);
|
|
99
|
+
const cells = [...row.querySelectorAll('td')].map((cell) => normalizeWhitespace(cell.textContent)).filter(Boolean);
|
|
100
|
+
|
|
101
|
+
if (headers.length === 1 && cells.length >= 1) {
|
|
102
|
+
pushCandidate(target, headers[0], cells.join(' '), 'table_row');
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (headers.length === 0 && cells.length === 2) {
|
|
107
|
+
pushCandidate(target, cells[0], cells[1], 'table_pair');
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const root = document.querySelector('main') || document.querySelector('article') || document.body;
|
|
112
|
+
const lines = normalizeWhitespace(root?.innerText ?? '')
|
|
113
|
+
.split('\n')
|
|
114
|
+
.map((line) => normalizeWhitespace(line))
|
|
115
|
+
.filter(Boolean);
|
|
116
|
+
|
|
117
|
+
for (const line of lines) {
|
|
118
|
+
const separator = line.includes(':') ? ':' : line.includes(':') ? ':' : null;
|
|
119
|
+
if (!separator) continue;
|
|
120
|
+
const parts = line.split(separator);
|
|
121
|
+
if (parts.length < 2) continue;
|
|
122
|
+
const label = parts.shift();
|
|
123
|
+
const value = parts.join(separator);
|
|
124
|
+
pushCandidate(target, label, value, 'inline_pair');
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return target.items;
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
return matchStructuredFields(fields, candidates);
|
|
131
|
+
}
|