screenhand 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +165 -446
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +3615 -400
  4. package/dist/scripts/export-help-center.js +112 -0
  5. package/dist/scripts/marketing-loop.js +117 -0
  6. package/dist/scripts/observer-daemon.js +288 -0
  7. package/dist/scripts/orchestrator-daemon.js +399 -0
  8. package/dist/scripts/threads-campaign.js +208 -0
  9. package/dist/src/community/fetcher.js +109 -0
  10. package/dist/src/community/index.js +6 -0
  11. package/dist/src/community/publisher.js +191 -0
  12. package/dist/src/community/remote-api.js +121 -0
  13. package/dist/src/community/types.js +3 -0
  14. package/dist/src/community/validator.js +95 -0
  15. package/dist/src/context-tracker.js +489 -0
  16. package/dist/src/ingestion/coverage-auditor.js +233 -0
  17. package/dist/src/ingestion/doc-parser.js +164 -0
  18. package/dist/src/ingestion/index.js +8 -0
  19. package/dist/src/ingestion/menu-scanner.js +152 -0
  20. package/dist/src/ingestion/reference-merger.js +186 -0
  21. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  22. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  23. package/dist/src/ingestion/types.js +3 -0
  24. package/dist/src/jobs/manager.js +82 -14
  25. package/dist/src/jobs/runner.js +138 -15
  26. package/dist/src/learning/engine.js +356 -0
  27. package/dist/src/learning/index.js +9 -0
  28. package/dist/src/learning/locator-policy.js +120 -0
  29. package/dist/src/learning/pattern-policy.js +89 -0
  30. package/dist/src/learning/recovery-policy.js +116 -0
  31. package/dist/src/learning/sensor-policy.js +115 -0
  32. package/dist/src/learning/timing-model.js +204 -0
  33. package/dist/src/learning/topology-policy.js +90 -0
  34. package/dist/src/learning/types.js +9 -0
  35. package/dist/src/logging/timeline-logger.js +4 -1
  36. package/dist/src/memory/playbook-seeds.js +200 -0
  37. package/dist/src/memory/recall.js +60 -8
  38. package/dist/src/memory/service.js +30 -5
  39. package/dist/src/memory/store.js +34 -5
  40. package/dist/src/native/bridge-client.js +253 -31
  41. package/dist/src/observer/state.js +199 -0
  42. package/dist/src/observer/types.js +43 -0
  43. package/dist/src/orchestrator/state.js +68 -0
  44. package/dist/src/orchestrator/types.js +22 -0
  45. package/dist/src/perception/ax-source.js +162 -0
  46. package/dist/src/perception/cdp-source.js +162 -0
  47. package/dist/src/perception/coordinator.js +771 -0
  48. package/dist/src/perception/frame-differ.js +287 -0
  49. package/dist/src/perception/index.js +22 -0
  50. package/dist/src/perception/manager.js +199 -0
  51. package/dist/src/perception/types.js +47 -0
  52. package/dist/src/perception/vision-source.js +399 -0
  53. package/dist/src/planner/deterministic.js +298 -0
  54. package/dist/src/planner/executor.js +870 -0
  55. package/dist/src/planner/goal-store.js +92 -0
  56. package/dist/src/planner/index.js +21 -0
  57. package/dist/src/planner/planner.js +520 -0
  58. package/dist/src/planner/tool-registry.js +71 -0
  59. package/dist/src/planner/types.js +22 -0
  60. package/dist/src/platform/explorer.js +213 -0
  61. package/dist/src/platform/help-center-markdown.js +527 -0
  62. package/dist/src/platform/learner.js +257 -0
  63. package/dist/src/playbook/engine.js +296 -11
  64. package/dist/src/playbook/mcp-recorder.js +204 -0
  65. package/dist/src/playbook/recorder.js +3 -2
  66. package/dist/src/playbook/runner.js +1 -1
  67. package/dist/src/playbook/store.js +139 -10
  68. package/dist/src/recovery/detectors.js +156 -0
  69. package/dist/src/recovery/engine.js +327 -0
  70. package/dist/src/recovery/index.js +20 -0
  71. package/dist/src/recovery/strategies.js +274 -0
  72. package/dist/src/recovery/types.js +20 -0
  73. package/dist/src/runtime/accessibility-adapter.js +55 -18
  74. package/dist/src/runtime/applescript-adapter.js +8 -2
  75. package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
  76. package/dist/src/runtime/executor.js +23 -3
  77. package/dist/src/runtime/locator-cache.js +24 -2
  78. package/dist/src/runtime/service.js +59 -15
  79. package/dist/src/runtime/session-manager.js +4 -1
  80. package/dist/src/runtime/vision-adapter.js +2 -1
  81. package/dist/src/state/app-map-types.js +72 -0
  82. package/dist/src/state/app-map.js +1974 -0
  83. package/dist/src/state/entity-tracker.js +108 -0
  84. package/dist/src/state/fusion.js +96 -0
  85. package/dist/src/state/index.js +21 -0
  86. package/dist/src/state/ladder-generator.js +236 -0
  87. package/dist/src/state/persistence.js +156 -0
  88. package/dist/src/state/types.js +17 -0
  89. package/dist/src/state/world-model.js +1456 -0
  90. package/dist/src/util/atomic-write.js +19 -4
  91. package/dist/src/util/sanitize.js +146 -0
  92. package/dist-app-maps/com.figma.Desktop.json +959 -0
  93. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  94. package/dist-app-maps/notion.id.json +2831 -0
  95. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  96. package/dist-playbooks/codex-desktop.json +76 -0
  97. package/dist-playbooks/competitor-research-stack.json +122 -0
  98. package/dist-playbooks/davinci-color-grade.json +153 -0
  99. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  100. package/dist-playbooks/davinci-render.json +114 -0
  101. package/dist-playbooks/devto.json +52 -0
  102. package/dist-playbooks/discord.json +41 -0
  103. package/dist-playbooks/google-flow-create-project.json +59 -0
  104. package/dist-playbooks/google-flow-edit-image.json +90 -0
  105. package/dist-playbooks/google-flow-edit-video.json +90 -0
  106. package/dist-playbooks/google-flow-generate-image.json +68 -0
  107. package/dist-playbooks/google-flow-generate-video.json +191 -0
  108. package/dist-playbooks/google-flow-open-project.json +48 -0
  109. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  110. package/dist-playbooks/google-flow-search-assets.json +64 -0
  111. package/dist-playbooks/instagram.json +57 -0
  112. package/dist-playbooks/linkedin.json +52 -0
  113. package/dist-playbooks/n8n.json +43 -0
  114. package/dist-playbooks/reddit.json +52 -0
  115. package/dist-playbooks/threads.json +59 -0
  116. package/dist-playbooks/x-twitter.json +59 -0
  117. package/dist-playbooks/youtube.json +59 -0
  118. package/dist-references/canva.json +646 -0
  119. package/dist-references/codex-desktop.json +305 -0
  120. package/dist-references/davinci-resolve-keyboard.json +594 -0
  121. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  122. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  123. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  124. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  125. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  126. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  127. package/dist-references/devpost.json +186 -0
  128. package/dist-references/devto.json +317 -0
  129. package/dist-references/discord.json +549 -0
  130. package/dist-references/figma.json +1186 -0
  131. package/dist-references/finder.json +146 -0
  132. package/dist-references/google-ads-transparency.json +95 -0
  133. package/dist-references/google-flow.json +649 -0
  134. package/dist-references/instagram.json +341 -0
  135. package/dist-references/linkedin.json +324 -0
  136. package/dist-references/meta-ad-library.json +86 -0
  137. package/dist-references/n8n.json +387 -0
  138. package/dist-references/notes.json +27 -0
  139. package/dist-references/notion.json +163 -0
  140. package/dist-references/reddit.json +341 -0
  141. package/dist-references/threads.json +337 -0
  142. package/dist-references/x-twitter.json +403 -0
  143. package/dist-references/youtube.json +373 -0
  144. package/native/macos-bridge/Package.swift +22 -0
  145. package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
  146. package/native/macos-bridge/Sources/AppManagement.swift +339 -0
  147. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
  148. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  149. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  150. package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
  151. package/native/macos-bridge/Sources/main.swift +498 -0
  152. package/native/windows-bridge/AppManagement.cs +234 -0
  153. package/native/windows-bridge/InputBridge.cs +436 -0
  154. package/native/windows-bridge/Program.cs +270 -0
  155. package/native/windows-bridge/ScreenCapture.cs +453 -0
  156. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  157. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  158. package/package.json +12 -1
  159. package/scripts/postinstall.cjs +127 -0
  160. package/dist/.audit-log.jsonl +0 -55
  161. package/dist/.screenhand/memory/.lock +0 -1
  162. package/dist/.screenhand/memory/actions.jsonl +0 -85
  163. package/dist/.screenhand/memory/errors.jsonl +0 -5
  164. package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
  165. package/dist/.screenhand/memory/state.json +0 -35
  166. package/dist/.screenhand/memory/state.json.bak +0 -35
  167. package/dist/.screenhand/memory/strategies.jsonl +0 -12
  168. package/dist/agent/cli.js +0 -73
  169. package/dist/agent/loop.js +0 -258
  170. package/dist/config.js +0 -9
  171. package/dist/index.js +0 -56
  172. package/dist/logging/timeline-logger.js +0 -29
  173. package/dist/mcp/mcp-stdio-server.js +0 -448
  174. package/dist/mcp/server.js +0 -347
  175. package/dist/mcp-entry.js +0 -59
  176. package/dist/memory/recall.js +0 -160
  177. package/dist/memory/research.js +0 -98
  178. package/dist/memory/seeds.js +0 -89
  179. package/dist/memory/session.js +0 -161
  180. package/dist/memory/store.js +0 -391
  181. package/dist/memory/types.js +0 -4
  182. package/dist/monitor/codex-monitor.js +0 -377
  183. package/dist/monitor/task-queue.js +0 -84
  184. package/dist/monitor/types.js +0 -49
  185. package/dist/native/bridge-client.js +0 -174
  186. package/dist/native/macos-bridge-client.js +0 -5
  187. package/dist/npm-publish-helper.js +0 -117
  188. package/dist/npm-token-cdp.js +0 -113
  189. package/dist/npm-token-create.js +0 -135
  190. package/dist/npm-token-finish.js +0 -126
  191. package/dist/playbook/engine.js +0 -193
  192. package/dist/playbook/index.js +0 -4
  193. package/dist/playbook/recorder.js +0 -519
  194. package/dist/playbook/runner.js +0 -392
  195. package/dist/playbook/store.js +0 -166
  196. package/dist/playbook/types.js +0 -4
  197. package/dist/runtime/accessibility-adapter.js +0 -377
  198. package/dist/runtime/app-adapter.js +0 -48
  199. package/dist/runtime/applescript-adapter.js +0 -283
  200. package/dist/runtime/ax-role-map.js +0 -80
  201. package/dist/runtime/browser-adapter.js +0 -36
  202. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  203. package/dist/runtime/composite-adapter.js +0 -205
  204. package/dist/runtime/executor.js +0 -250
  205. package/dist/runtime/locator-cache.js +0 -12
  206. package/dist/runtime/planning-loop.js +0 -47
  207. package/dist/runtime/service.js +0 -372
  208. package/dist/runtime/session-manager.js +0 -28
  209. package/dist/runtime/state-observer.js +0 -105
  210. package/dist/runtime/vision-adapter.js +0 -208
  211. package/dist/test-mcp-protocol.js +0 -138
  212. package/dist/types.js +0 -1
@@ -0,0 +1,527 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ import fs from "node:fs";
4
+ import path from "node:path";
5
+ import { existsSync } from "node:fs";
6
+ import { getChromePath, launch } from "chrome-launcher";
7
+ import CDP from "chrome-remote-interface";
8
+ import { writeFileAtomicSync } from "../util/atomic-write.js";
9
+ const HELP_SCOPE_SEGMENTS = new Set([
10
+ "help",
11
+ "docs",
12
+ "support",
13
+ "kb",
14
+ "hc",
15
+ "guide",
16
+ "knowledgebase",
17
+ "knowledge-base",
18
+ ]);
19
+ const TRACKING_PARAMS = new Set([
20
+ "fbclid",
21
+ "gclid",
22
+ "ref",
23
+ "source",
24
+ "utm_campaign",
25
+ "utm_content",
26
+ "utm_medium",
27
+ "utm_source",
28
+ "utm_term",
29
+ ]);
30
+ const STOP_MARKERS = [
31
+ "Skip to end of footer",
32
+ "Download Canva for free",
33
+ "How would you rate the help you received from this article?",
34
+ "People also viewed",
35
+ "Privacy",
36
+ "Terms",
37
+ ];
38
+ const BLOCK_SELECTOR = "h1,h2,h3,h4,p,li,pre,blockquote";
39
+ export function deriveScopePrefix(startUrl) {
40
+ const url = new URL(startUrl);
41
+ const parts = url.pathname.split("/").filter(Boolean);
42
+ const helpIdx = parts.findIndex((part) => HELP_SCOPE_SEGMENTS.has(part.toLowerCase()));
43
+ if (helpIdx >= 0) {
44
+ return `/${parts.slice(0, helpIdx + 1).join("/")}/`;
45
+ }
46
+ if (parts.length === 0) {
47
+ return "/";
48
+ }
49
+ if (url.pathname.endsWith("/")) {
50
+ return `/${parts.join("/")}/`;
51
+ }
52
+ return `/${parts.slice(0, -1).join("/")}/`;
53
+ }
54
+ export function normalizeCrawlUrl(candidate, baseUrl) {
55
+ if (!candidate || /^(javascript:|mailto:|tel:)/i.test(candidate)) {
56
+ return null;
57
+ }
58
+ let url;
59
+ try {
60
+ url = new URL(candidate, baseUrl);
61
+ }
62
+ catch {
63
+ return null;
64
+ }
65
+ if (!/^https?:$/.test(url.protocol)) {
66
+ return null;
67
+ }
68
+ url.hash = "";
69
+ for (const key of [...url.searchParams.keys()]) {
70
+ if (TRACKING_PARAMS.has(key.toLowerCase()) || key.toLowerCase().startsWith("utm_")) {
71
+ url.searchParams.delete(key);
72
+ }
73
+ }
74
+ const pathname = url.pathname.toLowerCase();
75
+ if (/\.(png|jpe?g|gif|svg|webp|pdf|zip|mp4|mp3|mov)$/i.test(pathname)) {
76
+ return null;
77
+ }
78
+ return url.toString();
79
+ }
80
+ export function filterScopedLinks(links, startUrl, scopePrefix) {
81
+ const start = new URL(startUrl);
82
+ const seen = new Set();
83
+ const filtered = [];
84
+ for (const link of links) {
85
+ const normalized = normalizeCrawlUrl(link.url, startUrl);
86
+ if (!normalized)
87
+ continue;
88
+ const url = new URL(normalized);
89
+ if (url.origin !== start.origin)
90
+ continue;
91
+ if (!url.pathname.startsWith(scopePrefix))
92
+ continue;
93
+ if (normalized === start.toString())
94
+ continue;
95
+ if (seen.has(normalized))
96
+ continue;
97
+ seen.add(normalized);
98
+ filtered.push({
99
+ url: normalized,
100
+ title: cleanLine(link.title),
101
+ ...(link.description ? { description: cleanLine(link.description) } : {}),
102
+ });
103
+ }
104
+ return filtered;
105
+ }
106
+ export function renderMergedMarkdown(startUrl, scopePrefix, pages, exportedAt = new Date().toISOString()) {
107
+ const lines = [
108
+ "# Help Center Export",
109
+ "",
110
+ `- Source: ${startUrl}`,
111
+ `- Scope: ${scopePrefix}`,
112
+ `- Exported: ${exportedAt}`,
113
+ `- Pages: ${pages.length}`,
114
+ "",
115
+ "## Included Pages",
116
+ "",
117
+ ];
118
+ for (const page of pages) {
119
+ lines.push(`- [${page.title}](${page.url})`);
120
+ }
121
+ for (const [index, page] of pages.entries()) {
122
+ lines.push("");
123
+ lines.push("---");
124
+ lines.push("");
125
+ lines.push(`## ${index + 1}. ${page.title}`);
126
+ lines.push("");
127
+ lines.push(`Source: ${page.url}`);
128
+ lines.push("");
129
+ lines.push(page.markdown.trim() || "_No visible article body extracted._");
130
+ }
131
+ return `${lines.join("\n").trim()}\n`;
132
+ }
133
+ export async function exportHelpCenterToMarkdown(options) {
134
+ const startUrl = normalizeCrawlUrl(options.startUrl, options.startUrl);
135
+ if (!startUrl) {
136
+ throw new Error(`Invalid start URL: ${options.startUrl}`);
137
+ }
138
+ const scopePrefix = options.scopePrefix ?? deriveScopePrefix(startUrl);
139
+ const maxPages = options.maxPages ?? 25;
140
+ const waitAfterLoadMs = options.waitAfterLoadMs ?? 1200;
141
+ const headless = options.headless ?? false;
142
+ const outputPath = path.resolve(options.outputPath);
143
+ const pages = [];
144
+ const queue = [startUrl];
145
+ const visited = new Set();
146
+ let chrome;
147
+ let client;
148
+ try {
149
+ ({ chrome, client } = await openChromeSession(headless));
150
+ while (queue.length > 0 && pages.length < maxPages) {
151
+ const currentUrl = queue.shift();
152
+ if (!currentUrl || visited.has(currentUrl))
153
+ continue;
154
+ visited.add(currentUrl);
155
+ options.onProgress?.(`Crawling ${pages.length + 1}/${maxPages}: ${currentUrl}`);
156
+ await navigateAndWait(client, currentUrl, waitAfterLoadMs);
157
+ const page = await extractPage(client, currentUrl, scopePrefix);
158
+ pages.push(page);
159
+ for (const link of page.links) {
160
+ if (!visited.has(link.url) && !queue.includes(link.url)) {
161
+ queue.push(link.url);
162
+ }
163
+ }
164
+ }
165
+ }
166
+ finally {
167
+ if (client) {
168
+ try {
169
+ await client.close();
170
+ }
171
+ catch {
172
+ // Best effort cleanup.
173
+ }
174
+ }
175
+ if (chrome) {
176
+ await chrome.kill();
177
+ }
178
+ }
179
+ fs.mkdirSync(path.dirname(outputPath), { recursive: true });
180
+ const rendered = renderMergedMarkdown(startUrl, scopePrefix, pages);
181
+ writeFileAtomicSync(outputPath, rendered);
182
+ return {
183
+ outputPath,
184
+ pageCount: pages.length,
185
+ pages,
186
+ scopePrefix,
187
+ };
188
+ }
189
+ async function openChromeSession(headless) {
190
+ const chrome = await launch({
191
+ chromePath: resolveChromePath(),
192
+ startingUrl: "about:blank",
193
+ chromeFlags: buildChromeFlags(headless),
194
+ });
195
+ const targetId = await resolveTargetId(chrome.port);
196
+ const client = await CDP({ port: chrome.port, target: targetId });
197
+ await Promise.all([client.Page.enable(), client.Runtime.enable()]);
198
+ return { chrome, client };
199
+ }
200
+ async function navigateAndWait(client, url, waitAfterLoadMs) {
201
+ await client.Page.navigate({ url });
202
+ const deadline = Date.now() + 15_000;
203
+ while (Date.now() < deadline) {
204
+ const state = await evaluateJson(client, `(() => ({
205
+ readyState: document.readyState,
206
+ textLength: (document.body?.innerText || "").trim().length,
207
+ }))()`);
208
+ if ((state.readyState === "interactive" || state.readyState === "complete") &&
209
+ state.textLength > 40) {
210
+ break;
211
+ }
212
+ await sleep(250);
213
+ }
214
+ await evaluateJson(client, `(() => new Promise((resolve) => {
215
+ window.scrollTo(0, document.body.scrollHeight);
216
+ setTimeout(() => {
217
+ window.scrollTo(0, 0);
218
+ resolve(true);
219
+ }, 250);
220
+ }))()`);
221
+ await sleep(waitAfterLoadMs);
222
+ }
223
+ async function extractPage(client, currentUrl, scopePrefix) {
224
+ const current = new URL(currentUrl);
225
+ const result = await evaluateJson(client, buildExtractionExpression(currentUrl, current.origin, scopePrefix));
226
+ if (result.blocked) {
227
+ throw new Error(`Browser was blocked while loading ${currentUrl}. Try running the exporter in headed mode.`);
228
+ }
229
+ return {
230
+ url: currentUrl,
231
+ title: cleanLine(result.title) || current.hostname,
232
+ kind: result.kind,
233
+ markdown: result.markdown.trim(),
234
+ links: filterScopedLinks(result.links, currentUrl, scopePrefix),
235
+ };
236
+ }
237
+ function buildChromeFlags(headless) {
238
+ const flags = [
239
+ "--remote-allow-origins=*",
240
+ "--no-first-run",
241
+ "--no-default-browser-check",
242
+ "--disable-background-networking",
243
+ "--disable-background-timer-throttling",
244
+ "--disable-renderer-backgrounding",
245
+ ];
246
+ if (headless) {
247
+ flags.push("--headless=new");
248
+ }
249
+ return flags;
250
+ }
251
+ async function resolveTargetId(port) {
252
+ const targets = await CDP.List({ port });
253
+ const pageTarget = targets.find((target) => target.type === "page");
254
+ if (pageTarget?.id) {
255
+ return pageTarget.id;
256
+ }
257
+ const created = await CDP.New({ port });
258
+ if (typeof created === "string") {
259
+ return created;
260
+ }
261
+ if (created && typeof created.id === "string") {
262
+ return created.id;
263
+ }
264
+ throw new Error("Could not create a Chrome page target.");
265
+ }
266
+ function resolveChromePath() {
267
+ const envPath = process.env.CHROME_PATH;
268
+ if (envPath && existsSync(envPath)) {
269
+ return envPath;
270
+ }
271
+ try {
272
+ const discovered = getChromePath();
273
+ if (discovered && existsSync(discovered)) {
274
+ return discovered;
275
+ }
276
+ }
277
+ catch {
278
+ // Fall through to fixed candidates.
279
+ }
280
+ const candidates = [
281
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
282
+ "/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing",
283
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
284
+ ];
285
+ for (const candidate of candidates) {
286
+ if (existsSync(candidate)) {
287
+ return candidate;
288
+ }
289
+ }
290
+ throw new Error("Chrome executable not found. Set CHROME_PATH or install Google Chrome.");
291
+ }
292
+ async function evaluateJson(client, expression) {
293
+ const result = await client.Runtime.evaluate({
294
+ expression,
295
+ awaitPromise: true,
296
+ returnByValue: true,
297
+ });
298
+ if (result.exceptionDetails) {
299
+ throw new Error(result.exceptionDetails.exception?.description ?? "Runtime.evaluate failed");
300
+ }
301
+ return result.result.value;
302
+ }
303
+ function buildExtractionExpression(currentUrl, origin, scopePrefix) {
304
+ const config = JSON.stringify({ currentUrl, origin, scopePrefix, stopMarkers: STOP_MARKERS });
305
+ return `
306
+ (() => {
307
+ const config = ${config};
308
+ const root = document.querySelector("main, [role='main'], article") || document.body;
309
+ const h1 = root.querySelector("h1") || document.querySelector("h1");
310
+ const h1Top = h1 ? h1.getBoundingClientRect().top : Number.NEGATIVE_INFINITY;
311
+
312
+ const normalize = (value) =>
313
+ String(value ?? "")
314
+ .replace(/\\u00a0/g, " ")
315
+ .replace(/\\s+/g, " ")
316
+ .trim();
317
+
318
+ const cleanLine = (value) => normalize(value).replace(/\\s*\\n\\s*/g, " ");
319
+
320
+ const isVisible = (element) => {
321
+ if (!(element instanceof Element)) return false;
322
+ const style = window.getComputedStyle(element);
323
+ const rect = element.getBoundingClientRect();
324
+ return (
325
+ style.display !== "none" &&
326
+ style.visibility !== "hidden" &&
327
+ style.opacity !== "0" &&
328
+ rect.width > 0 &&
329
+ rect.height > 0
330
+ );
331
+ };
332
+
333
+ const isBlocked = () => {
334
+ const title = normalize(document.title);
335
+ const bodyText = normalize(document.body?.innerText || "").slice(0, 500);
336
+ return /access denied|forbidden|just a moment|verify you are human|captcha/i.test(
337
+ title + " " + bodyText,
338
+ );
339
+ };
340
+
341
+ const normalizeLink = (href) => {
342
+ try {
343
+ const url = new URL(href, location.href);
344
+ if (!/^https?:$/.test(url.protocol)) return null;
345
+ url.hash = "";
346
+ ["fbclid", "gclid", "ref", "source"].forEach((key) => url.searchParams.delete(key));
347
+ for (const key of [...url.searchParams.keys()]) {
348
+ if (key.toLowerCase().startsWith("utm_")) url.searchParams.delete(key);
349
+ }
350
+ if (/\\.(png|jpe?g|gif|svg|webp|pdf|zip|mp4|mp3|mov)$/i.test(url.pathname)) {
351
+ return null;
352
+ }
353
+ return url.toString();
354
+ } catch {
355
+ return null;
356
+ }
357
+ };
358
+
359
+ const inScope = (href) => {
360
+ try {
361
+ const url = new URL(href);
362
+ return url.origin === config.origin && url.pathname.startsWith(config.scopePrefix);
363
+ } catch {
364
+ return false;
365
+ }
366
+ };
367
+
368
+ const extractAnchorText = (anchor) => {
369
+ const childTexts = Array.from(
370
+ anchor.querySelectorAll("h2, h3, h4, p, span, strong, em, div"),
371
+ )
372
+ .filter(isVisible)
373
+ .map((node) => cleanLine(node.textContent))
374
+ .filter(Boolean);
375
+
376
+ const uniqueTexts = [...new Set(childTexts)];
377
+ const discreteTexts = uniqueTexts.filter(
378
+ (text) =>
379
+ !uniqueTexts.some(
380
+ (other) =>
381
+ other !== text &&
382
+ text.length > other.length + 8 &&
383
+ text.includes(other),
384
+ ),
385
+ );
386
+ const candidates = discreteTexts.length > 0 ? discreteTexts : uniqueTexts;
387
+ const fallback = cleanLine(anchor.textContent);
388
+ const title =
389
+ candidates
390
+ .filter((text) => text.length <= 140)
391
+ .sort((left, right) => left.length - right.length)[0] || fallback;
392
+ const description =
393
+ candidates.find(
394
+ (text) =>
395
+ text !== title &&
396
+ !text.includes(title) &&
397
+ !title.includes(text),
398
+ ) || undefined;
399
+ return { title, description };
400
+ };
401
+
402
+ const links = [];
403
+ const seenLinks = new Set();
404
+ for (const anchor of Array.from(root.querySelectorAll("a[href]"))) {
405
+ if (!isVisible(anchor)) continue;
406
+ if (anchor.closest("footer")) continue;
407
+ if (anchor.getBoundingClientRect().top < h1Top - 8) continue;
408
+
409
+ const normalizedUrl = normalizeLink(anchor.href);
410
+ if (!normalizedUrl || !inScope(normalizedUrl) || normalizedUrl === config.currentUrl) {
411
+ continue;
412
+ }
413
+
414
+ const { title, description } = extractAnchorText(anchor);
415
+ if (!title || /^help centre$/i.test(title)) continue;
416
+ if (seenLinks.has(normalizedUrl)) continue;
417
+ seenLinks.add(normalizedUrl);
418
+ links.push({ url: normalizedUrl, title, description });
419
+ }
420
+
421
+ const blocks = [];
422
+ let started = h1 ? false : true;
423
+ for (const node of Array.from(root.querySelectorAll(${JSON.stringify(BLOCK_SELECTOR)}))) {
424
+ if (!isVisible(node)) continue;
425
+ if (h1 && !started) {
426
+ started = node === h1;
427
+ if (!started) continue;
428
+ }
429
+
430
+ const text = cleanLine(node.textContent);
431
+ if (!text) continue;
432
+ if (config.stopMarkers.includes(text)) break;
433
+
434
+ const tag = node.tagName.toLowerCase();
435
+ const anchor = node.closest("a[href]");
436
+ const href = anchor ? anchor.getAttribute("href") || "" : "";
437
+ const samePageLink =
438
+ Boolean(anchor) &&
439
+ (href.startsWith("#") ||
440
+ (() => {
441
+ try {
442
+ const resolved = new URL(anchor.href, location.href);
443
+ return resolved.pathname === location.pathname && resolved.search === location.search;
444
+ } catch {
445
+ return false;
446
+ }
447
+ })());
448
+
449
+ blocks.push({
450
+ tag,
451
+ text,
452
+ insideLink: Boolean(anchor),
453
+ samePageLink,
454
+ });
455
+ }
456
+
457
+ const articleParagraphs = blocks.filter((block) => block.tag === "p" && !block.insideLink);
458
+ const articleHeadings = blocks.filter((block) => block.tag === "h2" || block.tag === "h3");
459
+ const kind =
460
+ articleParagraphs.length >= 3 || articleHeadings.length >= 2 ? "article" : "listing";
461
+
462
+ const lines = [];
463
+ const push = (line = "") => {
464
+ if (line === "" && lines[lines.length - 1] === "") return;
465
+ lines.push(line);
466
+ };
467
+
468
+ const title = cleanLine(h1?.textContent || document.title || location.pathname);
469
+
470
+ if (kind === "article") {
471
+ for (const block of blocks) {
472
+ if (block.tag === "h1") continue;
473
+ if (block.text === title) continue;
474
+ if (block.insideLink && !block.samePageLink) continue;
475
+
476
+ if (block.tag === "h2") push("## " + block.text);
477
+ else if (block.tag === "h3") push("### " + block.text);
478
+ else if (block.tag === "h4") push("#### " + block.text);
479
+ else if (block.tag === "li") push("- " + block.text);
480
+ else if (block.tag === "pre" || block.tag === "blockquote") {
481
+ push("~~~");
482
+ push(block.text);
483
+ push("~~~");
484
+ } else {
485
+ push(block.text);
486
+ }
487
+
488
+ push("");
489
+ }
490
+ } else {
491
+ for (const block of blocks) {
492
+ if (block.tag === "h1") continue;
493
+ if (block.insideLink) continue;
494
+ if (block.tag === "p") {
495
+ push(block.text);
496
+ push("");
497
+ }
498
+ }
499
+
500
+ if (links.length > 0) {
501
+ push("### Linked Pages");
502
+ push("");
503
+ for (const link of links) {
504
+ const description = link.description && link.description !== link.title
505
+ ? " - " + link.description
506
+ : "";
507
+ push("- [" + link.title + "](" + link.url + ")" + description);
508
+ }
509
+ push("");
510
+ }
511
+ }
512
+
513
+ return {
514
+ blocked: isBlocked(),
515
+ links,
516
+ markdown: lines.join("\\n").trim(),
517
+ kind,
518
+ title,
519
+ };
520
+ })()`;
521
+ }
522
+ function cleanLine(value) {
523
+ return value.replace(/\s+/g, " ").trim();
524
+ }
525
+ function sleep(ms) {
526
+ return new Promise((resolve) => setTimeout(resolve, ms));
527
+ }