autokap 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/cursors/macos.svg +4 -0
- package/assets/cursors/windows.svg +15 -0
- package/assets/skill/OPCODE-REFERENCE.md +607 -0
- package/assets/skill/README.md +39 -0
- package/assets/skill/SKILL.md +453 -468
- package/assets/skill/STUDIO-SKILL.md +476 -0
- package/assets/skill/references/examples.md +104 -0
- package/assets/skill/references/interactive-demo.md +225 -0
- package/assets/skill/references/mock-data.md +178 -0
- package/dist/action-verifier.d.ts +29 -0
- package/dist/action-verifier.js +133 -0
- package/dist/agent-action-recovery.d.ts +45 -0
- package/dist/agent-action-recovery.js +370 -0
- package/dist/agent-message-utils.d.ts +21 -0
- package/dist/agent-message-utils.js +77 -0
- package/dist/agent-url-utils.d.ts +30 -0
- package/dist/agent-url-utils.js +138 -0
- package/dist/agent.d.ts +92 -8
- package/dist/agent.js +2936 -781
- package/dist/ak-tree.d.ts +39 -0
- package/dist/ak-tree.js +368 -0
- package/dist/alt-text.d.ts +26 -0
- package/dist/alt-text.js +55 -0
- package/dist/auth-capture.d.ts +17 -0
- package/dist/auth-capture.js +164 -0
- package/dist/benchmark.d.ts +59 -0
- package/dist/benchmark.js +135 -0
- package/dist/browser-bar.d.ts +14 -6
- package/dist/browser-bar.js +145 -8
- package/dist/browser-pool.d.ts +7 -0
- package/dist/browser-pool.js +15 -5
- package/dist/browser-utils.d.ts +31 -0
- package/dist/browser-utils.js +97 -0
- package/dist/browser.d.ts +51 -1
- package/dist/browser.js +1481 -31
- package/dist/capture-alt-text.js +2 -1
- package/dist/capture-language-preflight.js +14 -0
- package/dist/capture-llm-page-identity.js +22 -10
- package/dist/capture-page-identity.d.ts +5 -7
- package/dist/capture-page-identity.js +211 -78
- package/dist/capture-preset-credentials.d.ts +50 -0
- package/dist/capture-preset-credentials.js +127 -0
- package/dist/capture-request-plan.d.ts +2 -2
- package/dist/capture-request-plan.js +64 -16
- package/dist/capture-run-optimizer.js +48 -33
- package/dist/capture-selector-memory.d.ts +5 -0
- package/dist/capture-selector-memory.js +18 -0
- package/dist/capture-strategy.d.ts +36 -0
- package/dist/capture-strategy.js +95 -0
- package/dist/capture-studio-sync.d.ts +1 -0
- package/dist/capture-studio-sync.js +9 -3
- package/dist/capture-surface-contract.d.ts +36 -0
- package/dist/capture-surface-contract.js +299 -0
- package/dist/capture-transition-engine.d.ts +28 -0
- package/dist/capture-transition-engine.js +292 -0
- package/dist/capture-variant-state.d.ts +2 -0
- package/dist/capture-variant-state.js +26 -0
- package/dist/capture-verification.d.ts +35 -0
- package/dist/capture-verification.js +95 -0
- package/dist/capture-viewport-lock.d.ts +48 -0
- package/dist/capture-viewport-lock.js +74 -0
- package/dist/circuit-breaker.d.ts +42 -0
- package/dist/circuit-breaker.js +119 -0
- package/dist/cli-config.d.ts +8 -1
- package/dist/cli-config.js +62 -6
- package/dist/cli-contract.d.ts +15 -0
- package/dist/cli-contract.js +167 -0
- package/dist/cli-runner-local.d.ts +12 -0
- package/dist/cli-runner-local.js +102 -0
- package/dist/cli-runner.d.ts +34 -0
- package/dist/cli-runner.js +433 -0
- package/dist/cli-utils.d.ts +0 -1
- package/dist/cli-utils.js +2 -5
- package/dist/cli.js +1005 -267
- package/dist/clip-orchestrator.js +9 -2
- package/dist/clip-postprocess.js +25 -16
- package/dist/cookie-dismiss.d.ts +2 -0
- package/dist/cookie-dismiss.js +48 -13
- package/dist/cost-logging.d.ts +8 -0
- package/dist/cost-logging.js +160 -46
- package/dist/cost-resolution-monitor.d.ts +16 -0
- package/dist/cost-resolution-monitor.js +34 -0
- package/dist/credential-templates.js +2 -2
- package/dist/cursor-overlay-script.d.ts +6 -0
- package/dist/cursor-overlay-script.js +169 -0
- package/dist/dom-css-purger.d.ts +65 -0
- package/dist/dom-css-purger.js +333 -0
- package/dist/dom-font-inliner.d.ts +45 -0
- package/dist/dom-font-inliner.js +148 -0
- package/dist/dom-patch-resolver.d.ts +52 -0
- package/dist/dom-patch-resolver.js +242 -0
- package/dist/dom-serializer.d.ts +82 -0
- package/dist/dom-serializer.js +378 -0
- package/dist/element-capture.d.ts +1 -41
- package/dist/element-capture.js +202 -446
- package/dist/env-validation.d.ts +5 -0
- package/dist/env-validation.js +29 -0
- package/dist/execution-schema.d.ts +4423 -0
- package/dist/execution-schema.js +507 -0
- package/dist/execution-types.d.ts +886 -0
- package/dist/execution-types.js +65 -0
- package/dist/fonts-loader.d.ts +14 -0
- package/dist/fonts-loader.js +55 -0
- package/dist/hybrid-navigator.js +12 -12
- package/dist/index.d.ts +9 -6
- package/dist/index.js +10 -4
- package/dist/legacy/agent-action-recovery.d.ts +45 -0
- package/dist/legacy/agent-action-recovery.js +370 -0
- package/dist/legacy/agent-message-utils.d.ts +21 -0
- package/dist/legacy/agent-message-utils.js +77 -0
- package/dist/legacy/agent-url-utils.d.ts +30 -0
- package/dist/legacy/agent-url-utils.js +138 -0
- package/dist/legacy/agent.d.ts +226 -0
- package/dist/legacy/agent.js +6666 -0
- package/dist/legacy/clip-orchestrator.d.ts +148 -0
- package/dist/legacy/clip-orchestrator.js +957 -0
- package/dist/legacy/credential-templates.d.ts +5 -0
- package/dist/legacy/credential-templates.js +60 -0
- package/dist/legacy/hybrid-navigator.d.ts +138 -0
- package/dist/legacy/hybrid-navigator.js +468 -0
- package/dist/legacy/llm-usage.d.ts +17 -0
- package/dist/legacy/llm-usage.js +45 -0
- package/dist/legacy/prompt-cache.d.ts +10 -0
- package/dist/legacy/prompt-cache.js +24 -0
- package/dist/legacy/prompts.d.ts +175 -0
- package/dist/legacy/prompts.js +1038 -0
- package/dist/legacy/tools.d.ts +4 -0
- package/dist/legacy/tools.js +216 -0
- package/dist/legacy/video-agent.d.ts +143 -0
- package/dist/legacy/video-agent.js +4788 -0
- package/dist/legacy/video-observation.d.ts +36 -0
- package/dist/legacy/video-observation.js +192 -0
- package/dist/legacy/video-planner.d.ts +12 -0
- package/dist/legacy/video-planner.js +501 -0
- package/dist/legacy/video-prompts.d.ts +37 -0
- package/dist/legacy/video-prompts.js +569 -0
- package/dist/legacy/video-tools.d.ts +3 -0
- package/dist/legacy/video-tools.js +59 -0
- package/dist/legacy/video-variant-state.d.ts +29 -0
- package/dist/legacy/video-variant-state.js +80 -0
- package/dist/legacy/vision-model.d.ts +17 -0
- package/dist/legacy/vision-model.js +74 -0
- package/dist/llm-healer.d.ts +63 -0
- package/dist/llm-healer.js +166 -0
- package/dist/llm-provider.d.ts +29 -0
- package/dist/llm-provider.js +80 -0
- package/dist/logger.d.ts +6 -2
- package/dist/logger.js +15 -1
- package/dist/mockup-html.js +35 -25
- package/dist/mockup.d.ts +95 -2
- package/dist/mockup.js +427 -166
- package/dist/mouse-animation.d.ts +2 -2
- package/dist/mouse-animation.js +34 -20
- package/dist/opcode-actions.d.ts +42 -0
- package/dist/opcode-actions.js +511 -0
- package/dist/opcode-runner.d.ts +51 -0
- package/dist/opcode-runner.js +770 -0
- package/dist/openrouter-client.d.ts +40 -0
- package/dist/openrouter-client.js +16 -0
- package/dist/overlay-engine.d.ts +24 -0
- package/dist/overlay-engine.js +176 -0
- package/dist/postcondition.d.ts +16 -0
- package/dist/postcondition.js +269 -0
- package/dist/program-patcher.d.ts +25 -0
- package/dist/program-patcher.js +44 -0
- package/dist/prompts.d.ts +13 -5
- package/dist/prompts.js +224 -351
- package/dist/provider-config.d.ts +12 -0
- package/dist/provider-config.js +15 -0
- package/dist/recovery-chain.d.ts +37 -0
- package/dist/recovery-chain.js +350 -0
- package/dist/remote-browser.d.ts +28 -4
- package/dist/remote-browser.js +60 -5
- package/dist/safari-browser-bar.d.ts +15 -0
- package/dist/safari-browser-bar.js +95 -0
- package/dist/safari-toolbar-asset.d.ts +15 -0
- package/dist/safari-toolbar-asset.js +12 -0
- package/dist/security.d.ts +2 -1
- package/dist/security.js +49 -10
- package/dist/selector-resolver.d.ts +34 -0
- package/dist/selector-resolver.js +181 -0
- package/dist/semantic-resolver.d.ts +35 -0
- package/dist/semantic-resolver.js +161 -0
- package/dist/server-capture-runtime.d.ts +5 -3
- package/dist/server-capture-runtime.js +42 -95
- package/dist/server-credit-usage.d.ts +2 -2
- package/dist/server-project-webhooks.d.ts +15 -1
- package/dist/server-project-webhooks.js +34 -8
- package/dist/server-screenshot-watermark.js +27 -5
- package/dist/session-profile.js +164 -1
- package/dist/sf-pro-symbols.d.ts +1 -0
- package/dist/sf-pro-symbols.js +55 -0
- package/dist/skill-packaging.d.ts +28 -0
- package/dist/skill-packaging.js +169 -0
- package/dist/smart-wait.d.ts +27 -0
- package/dist/smart-wait.js +81 -0
- package/dist/status-bar-render.d.ts +20 -0
- package/dist/status-bar-render.js +410 -0
- package/dist/status-bar.d.ts +9 -0
- package/dist/status-bar.js +298 -14
- package/dist/svg-browser-bar.d.ts +33 -0
- package/dist/svg-browser-bar.js +206 -0
- package/dist/svg-status-bar.d.ts +36 -0
- package/dist/svg-status-bar.js +597 -0
- package/dist/svg-text.d.ts +61 -0
- package/dist/svg-text.js +118 -0
- package/dist/tools.js +89 -451
- package/dist/types.d.ts +240 -5
- package/dist/types.js +23 -1
- package/dist/v2/action-verifier.d.ts +29 -0
- package/dist/v2/action-verifier.js +133 -0
- package/dist/v2/alt-text.d.ts +26 -0
- package/dist/v2/alt-text.js +55 -0
- package/dist/v2/benchmark.d.ts +59 -0
- package/dist/v2/benchmark.js +135 -0
- package/dist/v2/capture-strategy.d.ts +30 -0
- package/dist/v2/capture-strategy.js +67 -0
- package/dist/v2/capture-verification.d.ts +35 -0
- package/dist/v2/capture-verification.js +95 -0
- package/dist/v2/circuit-breaker.d.ts +42 -0
- package/dist/v2/circuit-breaker.js +119 -0
- package/dist/v2/cli-runner-local.d.ts +11 -0
- package/dist/v2/cli-runner-local.js +91 -0
- package/dist/v2/cli-runner.d.ts +34 -0
- package/dist/v2/cli-runner.js +300 -0
- package/dist/v2/compiler-prompts.d.ts +27 -0
- package/dist/v2/compiler-prompts.js +123 -0
- package/dist/v2/compiler.d.ts +37 -0
- package/dist/v2/compiler.js +147 -0
- package/dist/v2/explorer.d.ts +41 -0
- package/dist/v2/explorer.js +56 -0
- package/dist/v2/index.d.ts +37 -0
- package/dist/v2/index.js +31 -0
- package/dist/v2/llm-healer.d.ts +62 -0
- package/dist/v2/llm-healer.js +166 -0
- package/dist/v2/llm-provider.d.ts +29 -0
- package/dist/v2/llm-provider.js +80 -0
- package/dist/v2/opcode-runner.d.ts +47 -0
- package/dist/v2/opcode-runner.js +634 -0
- package/dist/v2/overlay-engine.d.ts +24 -0
- package/dist/v2/overlay-engine.js +150 -0
- package/dist/v2/postcondition.d.ts +16 -0
- package/dist/v2/postcondition.js +249 -0
- package/dist/v2/program-patcher.d.ts +25 -0
- package/dist/v2/program-patcher.js +44 -0
- package/dist/v2/recovery-chain.d.ts +30 -0
- package/dist/v2/recovery-chain.js +368 -0
- package/dist/v2/schema.d.ts +2580 -0
- package/dist/v2/schema.js +295 -0
- package/dist/v2/selector-resolver.d.ts +34 -0
- package/dist/v2/selector-resolver.js +181 -0
- package/dist/v2/semantic-resolver.d.ts +35 -0
- package/dist/v2/semantic-resolver.js +161 -0
- package/dist/v2/smart-wait.d.ts +27 -0
- package/dist/v2/smart-wait.js +81 -0
- package/dist/v2/types.d.ts +444 -0
- package/dist/v2/types.js +19 -0
- package/dist/v2/web-playwright-local.d.ts +69 -0
- package/dist/v2/web-playwright-local.js +392 -0
- package/dist/version.d.ts +1 -0
- package/dist/version.js +5 -0
- package/dist/video-agent.js +18 -13
- package/dist/video-planner.js +2 -1
- package/dist/video-prompts.js +3 -3
- package/dist/web-playwright-local.d.ts +126 -0
- package/dist/web-playwright-local.js +819 -0
- package/dist/ws-auth.js +4 -1
- package/dist/ws-broadcast.d.ts +34 -0
- package/dist/ws-broadcast.js +85 -0
- package/dist/ws-connection-limits.d.ts +12 -0
- package/dist/ws-connection-limits.js +44 -0
- package/dist/ws-handler-utils.d.ts +32 -0
- package/dist/ws-handler-utils.js +139 -0
- package/dist/ws-handler.js +294 -164
- package/dist/ws-metrics-server.d.ts +9 -0
- package/dist/ws-metrics-server.js +31 -0
- package/dist/ws-server.js +41 -1
- package/package.json +51 -34
package/dist/element-capture.js
CHANGED
|
@@ -5,47 +5,15 @@ import { logger } from './logger.js';
|
|
|
5
5
|
import { getPostHog, DISTINCT_ID } from './posthog.js';
|
|
6
6
|
import { isAbortError, throwIfAborted } from './abort.js';
|
|
7
7
|
import { callVisionCapableModel } from './vision-model.js';
|
|
8
|
+
import { zdrParam } from './provider-config.js';
|
|
8
9
|
const MAX_ELEMENT_ITERATIONS = 8;
|
|
9
10
|
const ELEMENT_CAPTURE_TEMPERATURE = 0;
|
|
10
|
-
function isTransientSearchSelector(selector) {
|
|
11
|
-
return /\[data-ak-(search|container)-index=/.test(selector);
|
|
12
|
-
}
|
|
13
11
|
export function isLooseElementCaptureRejectionReason(reason) {
|
|
14
12
|
if (!reason)
|
|
15
13
|
return false;
|
|
16
14
|
return /(too loose|too broad|tighten the frame|tighten the crop|significant unrelated surrounding content|large amount of unrelated page|minimal surrounding context|surrounding context|adjacent elements|wrong framing)/i
|
|
17
15
|
.test(reason);
|
|
18
16
|
}
|
|
19
|
-
export function isTagOnlyStructuralSelector(selector) {
|
|
20
|
-
const normalized = selector.trim();
|
|
21
|
-
if (!normalized)
|
|
22
|
-
return false;
|
|
23
|
-
const usesStructuralPath = /[>+~]/.test(normalized)
|
|
24
|
-
|| /:(first|last|nth)-(child|of-type)/i.test(normalized);
|
|
25
|
-
if (!usesStructuralPath)
|
|
26
|
-
return false;
|
|
27
|
-
const hasStableAnchor = /[#[]/.test(normalized)
|
|
28
|
-
|| /\.[A-Za-z_][A-Za-z0-9_-]*/.test(normalized)
|
|
29
|
-
|| /:has\(/i.test(normalized);
|
|
30
|
-
if (hasStableAnchor)
|
|
31
|
-
return false;
|
|
32
|
-
const segments = normalized
|
|
33
|
-
.split(/\s*[>+~]\s*/)
|
|
34
|
-
.map((segment) => segment.trim())
|
|
35
|
-
.filter(Boolean);
|
|
36
|
-
if (segments.length === 0)
|
|
37
|
-
return false;
|
|
38
|
-
return segments.every((segment) => /^[a-z][a-z0-9-]*(?::(first|last|nth)-(child|of-type)(\([^)]+\))?)?$/i
|
|
39
|
-
.test(segment));
|
|
40
|
-
}
|
|
41
|
-
export function shouldBlockUngroundedStructuralSelector(params) {
|
|
42
|
-
if (!params.verifierRejectedAsTooLoose)
|
|
43
|
-
return false;
|
|
44
|
-
if (!isTagOnlyStructuralSelector(params.selector))
|
|
45
|
-
return false;
|
|
46
|
-
const grounded = new Set(params.groundedSelectors);
|
|
47
|
-
return !grounded.has(params.selector);
|
|
48
|
-
}
|
|
49
17
|
export function outscaleAddsPadding(outscale) {
|
|
50
18
|
if (!outscale)
|
|
51
19
|
return false;
|
|
@@ -72,65 +40,6 @@ function buildTightOutscale(outscale) {
|
|
|
72
40
|
export function buildVerificationOutscale(outscale) {
|
|
73
41
|
return buildTightOutscale(outscale ?? {});
|
|
74
42
|
}
|
|
75
|
-
function getOrCreateSelectorEvidence(selectorEvidence, selector) {
|
|
76
|
-
const existing = selectorEvidence.get(selector);
|
|
77
|
-
if (existing)
|
|
78
|
-
return existing;
|
|
79
|
-
const created = {
|
|
80
|
-
observedAsInteractive: false,
|
|
81
|
-
directQueries: new Set(),
|
|
82
|
-
containerQueries: new Set(),
|
|
83
|
-
};
|
|
84
|
-
selectorEvidence.set(selector, created);
|
|
85
|
-
return created;
|
|
86
|
-
}
|
|
87
|
-
export function shouldAcceptDomCorroboratedSelector(params) {
|
|
88
|
-
if (!params.verifierRejectedAsTooLoose)
|
|
89
|
-
return false;
|
|
90
|
-
if (params.looseFailureCount < 2)
|
|
91
|
-
return false;
|
|
92
|
-
if (!params.validation.boundingBox)
|
|
93
|
-
return false;
|
|
94
|
-
if (!params.observedAsInteractive)
|
|
95
|
-
return false;
|
|
96
|
-
const { width, height } = params.validation.boundingBox;
|
|
97
|
-
if (width <= 0 || height <= 0)
|
|
98
|
-
return false;
|
|
99
|
-
if (params.viewport) {
|
|
100
|
-
const bboxArea = width * height;
|
|
101
|
-
const viewportArea = params.viewport.width * params.viewport.height;
|
|
102
|
-
if (viewportArea > 0 && bboxArea > viewportArea * 0.25) {
|
|
103
|
-
return false;
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
return params.containerQueryCount >= 1 || params.directQueryCount >= 2;
|
|
107
|
-
}
|
|
108
|
-
export function computeElementCaptureDomSignature(params) {
|
|
109
|
-
const sample = params.interactiveElements
|
|
110
|
-
.slice(0, 40)
|
|
111
|
-
.map((el) => [
|
|
112
|
-
el.index,
|
|
113
|
-
el.tag,
|
|
114
|
-
el.role,
|
|
115
|
-
(el.text || '').slice(0, 32).replace(/\s+/g, ' '),
|
|
116
|
-
el.selector,
|
|
117
|
-
el.visibilityState,
|
|
118
|
-
].join(':'))
|
|
119
|
-
.join('|');
|
|
120
|
-
return `${params.currentUrl}#${params.interactiveElements.length}#${sample}`;
|
|
121
|
-
}
|
|
122
|
-
export function shouldAllowSearchRefresh(params) {
|
|
123
|
-
if (!params.cached)
|
|
124
|
-
return true;
|
|
125
|
-
if (params.cached.domSignature !== params.domSignature)
|
|
126
|
-
return true;
|
|
127
|
-
if (params.lastFailedTransientSelector
|
|
128
|
-
&& params.cached.selectors.includes(params.lastFailedTransientSelector)
|
|
129
|
-
&& params.cached.hasTransientSelectors) {
|
|
130
|
-
return true;
|
|
131
|
-
}
|
|
132
|
-
return false;
|
|
133
|
-
}
|
|
134
43
|
const elementVerificationTools = [
|
|
135
44
|
{
|
|
136
45
|
type: 'function',
|
|
@@ -150,6 +59,24 @@ const elementVerificationTools = [
|
|
|
150
59
|
},
|
|
151
60
|
},
|
|
152
61
|
},
|
|
62
|
+
{
|
|
63
|
+
type: 'function',
|
|
64
|
+
function: {
|
|
65
|
+
name: 'accept_with_note',
|
|
66
|
+
description: 'Approve the capture with a minor note. Use when the correct element IS shown and fully visible, but the framing includes some extra surrounding context that does not materially affect capture quality.',
|
|
67
|
+
parameters: {
|
|
68
|
+
type: 'object',
|
|
69
|
+
properties: {
|
|
70
|
+
reason: {
|
|
71
|
+
type: 'string',
|
|
72
|
+
description: 'Short note about the minor framing issue.',
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
required: ['reason'],
|
|
76
|
+
additionalProperties: false,
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
},
|
|
153
80
|
{
|
|
154
81
|
type: 'function',
|
|
155
82
|
function: {
|
|
@@ -177,7 +104,7 @@ async function verifyElementCapture(client, model, element, screenshot, assessme
|
|
|
177
104
|
const messages = [
|
|
178
105
|
{
|
|
179
106
|
role: 'system',
|
|
180
|
-
content: 'You verify isolated UI element screenshots. The screenshot shows EXACTLY the pixel region captured by the element\'s
|
|
107
|
+
content: 'You verify isolated UI element screenshots. The screenshot shows EXACTLY the pixel region captured by the element\'s bounding box — every pixel visible is INSIDE that element. Nothing outside the element is included.\n\nYou have THREE tools:\n- **accept_capture** — the correct component is shown, fully visible, well-framed\n- **accept_with_note** — the correct component IS shown and fully visible, but the framing includes some extra surrounding context. Use this instead of rejecting when the target element is clearly present and complete.\n- **retry_capture** — ONLY for serious issues: wrong element entirely, clipped edges, tiny fragment, or overlay obstruction\n\nReject (retry_capture) ONLY for:\n1. The element is clipped — a side is cut off by the image edges\n2. The wrong element was captured — the content clearly does not match the description at all\n3. Only a tiny fragment is captured — e.g., just a heading when a full card was requested\n4. An overlay or modal is obscuring the element\n\nWhen the correct element is visible but framing is slightly loose, use accept_with_note — do NOT reject.\n\nCRITICAL: Since the screenshot IS the element\'s exact bounding box, there is no such thing as "adjacent cards" or "neighboring elements" visible in this image. What looks like a neighboring card is an internal preview thumbnail. What looks like surrounding context is internal structure. Only reject if the image edges clip the component or the wrong component is shown entirely.',
|
|
181
108
|
},
|
|
182
109
|
{
|
|
183
110
|
role: 'user',
|
|
@@ -197,9 +124,10 @@ This verification image is the raw element crop before any user-requested outsca
|
|
|
197
124
|
|
|
198
125
|
The screenshot IS the captured element — every pixel is inside its bounding box.
|
|
199
126
|
|
|
200
|
-
|
|
127
|
+
Use accept_capture if: the correct component is shown, fully visible, well-framed.
|
|
128
|
+
Use accept_with_note if: the correct component IS shown and fully visible, but framing is slightly loose with some extra context. This is still an approval.
|
|
201
129
|
|
|
202
|
-
|
|
130
|
+
Use retry_capture ONLY if:
|
|
203
131
|
- a side of the component is clipped by the image boundary
|
|
204
132
|
- the content clearly does not match the description at all (completely wrong element)
|
|
205
133
|
- only a tiny fragment is shown when a full component (card, panel, form) was requested
|
|
@@ -208,11 +136,14 @@ Reject ONLY if:
|
|
|
208
136
|
Do NOT reject because:
|
|
209
137
|
- the element contains screenshot previews, thumbnails, or mockups inside it
|
|
210
138
|
- the internal layout has multiple visual sections that look like separate cards
|
|
211
|
-
- content appears "above", "beside", or "around" other content within the image — this is the element's own internal layout, not neighboring page elements
|
|
139
|
+
- content appears "above", "beside", or "around" other content within the image — this is the element's own internal layout, not neighboring page elements
|
|
140
|
+
- the framing is slightly loose but the correct element is clearly present and complete`,
|
|
212
141
|
},
|
|
213
142
|
],
|
|
214
143
|
},
|
|
215
144
|
];
|
|
145
|
+
let response;
|
|
146
|
+
let usedModel = model;
|
|
216
147
|
try {
|
|
217
148
|
const visionResult = await callVisionCapableModel({
|
|
218
149
|
primaryModel: model,
|
|
@@ -222,43 +153,48 @@ Do NOT reject because:
|
|
|
222
153
|
messages,
|
|
223
154
|
tools: elementVerificationTools,
|
|
224
155
|
tool_choice: 'required',
|
|
225
|
-
temperature:
|
|
156
|
+
temperature: 0,
|
|
226
157
|
max_tokens: 256,
|
|
227
|
-
provider: {
|
|
158
|
+
provider: { ...zdrParam() },
|
|
228
159
|
}, { signal }),
|
|
160
|
+
onFallbackActivated: (m, reason) => logger.info(`Element verification vision fallback: ${m} (reason: ${reason})`),
|
|
229
161
|
});
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
stepNumber,
|
|
233
|
-
stepType: 'element_capture',
|
|
234
|
-
generationId: response.id ?? null,
|
|
235
|
-
modelRequested: model,
|
|
236
|
-
modelUsed: response.model ?? null,
|
|
237
|
-
promptTokens: response.usage?.prompt_tokens ?? null,
|
|
238
|
-
completionTokens: response.usage?.completion_tokens ?? null,
|
|
239
|
-
totalTokens: response.usage?.total_tokens ?? null,
|
|
240
|
-
imagesInPrompt: 1,
|
|
241
|
-
};
|
|
242
|
-
const toolCall = response.choices?.[0]?.message?.tool_calls?.[0];
|
|
243
|
-
if (!toolCall || !('function' in toolCall)) {
|
|
244
|
-
return { verified: false, reason: 'Element verification returned no actionable result', usage };
|
|
245
|
-
}
|
|
246
|
-
const args = JSON.parse(toolCall.function.arguments || '{}');
|
|
247
|
-
if (toolCall.function.name === 'accept_capture') {
|
|
248
|
-
logger.success(`Element capture verification passed: ${args.reason || 'approved'}`);
|
|
249
|
-
return { verified: true, reason: args.reason, usage };
|
|
250
|
-
}
|
|
251
|
-
const reason = args.reason || 'Element capture was rejected';
|
|
252
|
-
logger.ai(`Element capture verification failed: ${reason}`);
|
|
253
|
-
return { verified: false, reason, usage };
|
|
162
|
+
response = visionResult.result;
|
|
163
|
+
usedModel = visionResult.model;
|
|
254
164
|
}
|
|
255
165
|
catch (err) {
|
|
256
|
-
if (isAbortError(err))
|
|
166
|
+
if (isAbortError(err))
|
|
257
167
|
throw err;
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
|
|
168
|
+
logger.error(`Element verification failed: ${err.message}`);
|
|
169
|
+
return { verified: false, reason: 'Verification service unavailable', usage: null };
|
|
170
|
+
}
|
|
171
|
+
const usage = {
|
|
172
|
+
stepNumber,
|
|
173
|
+
stepType: 'element_capture',
|
|
174
|
+
generationId: response.id ?? null,
|
|
175
|
+
modelRequested: model,
|
|
176
|
+
modelUsed: response.model ?? usedModel,
|
|
177
|
+
promptTokens: response.usage?.prompt_tokens ?? null,
|
|
178
|
+
completionTokens: response.usage?.completion_tokens ?? null,
|
|
179
|
+
totalTokens: response.usage?.total_tokens ?? null,
|
|
180
|
+
imagesInPrompt: 1,
|
|
181
|
+
};
|
|
182
|
+
const toolCall = response.choices?.[0]?.message?.tool_calls?.[0];
|
|
183
|
+
if (!toolCall || !('function' in toolCall)) {
|
|
184
|
+
return { verified: false, reason: 'Verification returned no decision', usage };
|
|
185
|
+
}
|
|
186
|
+
const name = toolCall.function.name;
|
|
187
|
+
let args;
|
|
188
|
+
try {
|
|
189
|
+
args = JSON.parse(toolCall.function.arguments);
|
|
190
|
+
}
|
|
191
|
+
catch {
|
|
192
|
+
return { verified: false, reason: 'Verification returned invalid JSON', usage };
|
|
261
193
|
}
|
|
194
|
+
if (name === 'accept_capture' || name === 'accept_with_note') {
|
|
195
|
+
return { verified: true, reason: args.reason || undefined, usage };
|
|
196
|
+
}
|
|
197
|
+
return { verified: false, reason: args.reason || 'Verifier rejected', usage };
|
|
262
198
|
}
|
|
263
199
|
export async function captureIsolatedElement(browser, element, apiKey, model, options = {}) {
|
|
264
200
|
const { abortSignal, distinctId, fallbackModel, uploadImage } = options;
|
|
@@ -274,43 +210,18 @@ export async function captureIsolatedElement(browser, element, apiKey, model, op
|
|
|
274
210
|
const actionHistory = [];
|
|
275
211
|
const usageLog = [];
|
|
276
212
|
let stepCounter = 0;
|
|
277
|
-
// Map from query key → top candidate lines (shown again when a duplicate is blocked)
|
|
278
|
-
const usedSearchQueries = new Map();
|
|
279
|
-
const selectorEvidence = new Map();
|
|
280
|
-
const looseFailureCountsBySelector = new Map();
|
|
281
|
-
let lastFailedTransientSelector = null;
|
|
282
213
|
let lastVerifierRejectedAsTooLoose = false;
|
|
283
|
-
|
|
284
|
-
const originalViewport = browser.currentPage.viewportSize();
|
|
285
|
-
const restoreViewport = async () => {
|
|
286
|
-
const current = browser.currentPage.viewportSize();
|
|
287
|
-
if (originalViewport && current &&
|
|
288
|
-
(current.width !== originalViewport.width || current.height !== originalViewport.height)) {
|
|
289
|
-
await browser.resizeViewport(originalViewport.width, originalViewport.height);
|
|
290
|
-
}
|
|
291
|
-
};
|
|
214
|
+
let looseRejectionCount = 0;
|
|
292
215
|
try {
|
|
293
216
|
for (let iteration = 1; iteration <= MAX_ELEMENT_ITERATIONS; iteration++) {
|
|
294
217
|
throwIfAborted(abortSignal, `Element capture cancelled for "${element.name}".`);
|
|
295
|
-
// 1. Capture page state
|
|
296
|
-
const
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
browser.getSimplifiedDOM(),
|
|
300
|
-
browser.takeScreenshotForAI(),
|
|
301
|
-
]);
|
|
218
|
+
// 1. Capture page state with AKTree
|
|
219
|
+
const pageState = await browser.getPageState();
|
|
220
|
+
const screenshotBuf = pageState.cleanScreenshot;
|
|
221
|
+
const serializedAKTree = pageState.serializedAKTree;
|
|
302
222
|
const screenshotUrl = uploadImage
|
|
303
223
|
? await uploadImage(screenshotBuf, 'image/png').catch(() => `data:image/png;base64,${screenshotBuf.toString('base64')}`)
|
|
304
224
|
: `data:image/png;base64,${screenshotBuf.toString('base64')}`;
|
|
305
|
-
const domSignature = computeElementCaptureDomSignature({
|
|
306
|
-
currentUrl: browser.currentPage.url(),
|
|
307
|
-
interactiveElements,
|
|
308
|
-
});
|
|
309
|
-
for (const interactiveElement of interactiveElements) {
|
|
310
|
-
if (!interactiveElement.selector)
|
|
311
|
-
continue;
|
|
312
|
-
getOrCreateSelectorEvidence(selectorEvidence, interactiveElement.selector).observedAsInteractive = true;
|
|
313
|
-
}
|
|
314
225
|
// 2. Build messages
|
|
315
226
|
const messages = [
|
|
316
227
|
{ role: 'system', content: buildElementSystemPrompt(element.description) },
|
|
@@ -319,20 +230,22 @@ export async function captureIsolatedElement(browser, element, apiKey, model, op
|
|
|
319
230
|
content: buildElementIterationMessage({
|
|
320
231
|
elementName: element.name,
|
|
321
232
|
elementDescription: element.description,
|
|
322
|
-
|
|
323
|
-
interactiveElements,
|
|
324
|
-
simplifiedDOM,
|
|
233
|
+
serializedAKTree,
|
|
325
234
|
currentUrl: browser.currentPage.url(),
|
|
326
235
|
iteration,
|
|
327
236
|
maxIterations: MAX_ELEMENT_ITERATIONS,
|
|
328
237
|
actionHistory: actionHistory.length > 0 ? actionHistory : undefined,
|
|
329
238
|
viewport: browser.currentPage.viewportSize() ?? undefined,
|
|
330
|
-
|
|
239
|
+
scrollInfo: {
|
|
240
|
+
scrollY: pageState.scrollInfo.scrollY,
|
|
241
|
+
scrollHeight: pageState.scrollInfo.scrollHeight,
|
|
242
|
+
viewportHeight: browser.currentPage.viewportSize()?.height ?? 900,
|
|
243
|
+
},
|
|
331
244
|
screenshotUrl,
|
|
332
245
|
}),
|
|
333
246
|
},
|
|
334
247
|
];
|
|
335
|
-
// 3. Call LLM
|
|
248
|
+
// 3. Call LLM
|
|
336
249
|
let response;
|
|
337
250
|
let usedModel = model;
|
|
338
251
|
let callMessages = messages;
|
|
@@ -350,7 +263,7 @@ export async function captureIsolatedElement(browser, element, apiKey, model, op
|
|
|
350
263
|
tool_choice: 'required',
|
|
351
264
|
temperature: ELEMENT_CAPTURE_TEMPERATURE,
|
|
352
265
|
max_tokens: 1024,
|
|
353
|
-
provider: {
|
|
266
|
+
provider: { ...zdrParam() },
|
|
354
267
|
}, { signal: abortSignal }),
|
|
355
268
|
onFallbackActivated: (m, reason) => logger.info(`Element capture vision fallback activated: ${m} (reason: ${reason})`),
|
|
356
269
|
});
|
|
@@ -364,7 +277,6 @@ export async function captureIsolatedElement(browser, element, apiKey, model, op
|
|
|
364
277
|
if (!assistantContent)
|
|
365
278
|
break;
|
|
366
279
|
logger.info(`Model returned text without tool call; coercing (attempt ${coercionAttempt}/${MAX_COERCION_RETRIES})...`);
|
|
367
|
-
logger.ai(assistantContent.slice(0, 200));
|
|
368
280
|
callMessages = [
|
|
369
281
|
...callMessages,
|
|
370
282
|
{ role: 'assistant', content: assistantContent },
|
|
@@ -387,9 +299,8 @@ export async function captureIsolatedElement(browser, element, apiKey, model, op
|
|
|
387
299
|
});
|
|
388
300
|
}
|
|
389
301
|
catch (err) {
|
|
390
|
-
if (isAbortError(err))
|
|
302
|
+
if (isAbortError(err))
|
|
391
303
|
throw err;
|
|
392
|
-
}
|
|
393
304
|
logger.error(`Element capture API call failed: ${err.message}`);
|
|
394
305
|
actionHistory.push(`Iteration ${iteration}: API error — ${err.message}`);
|
|
395
306
|
continue;
|
|
@@ -410,336 +321,172 @@ export async function captureIsolatedElement(browser, element, apiKey, model, op
|
|
|
410
321
|
}
|
|
411
322
|
catch {
|
|
412
323
|
logger.error(`Invalid JSON in element tool arguments: ${toolCall.function.arguments}`);
|
|
413
|
-
callMessages = [
|
|
414
|
-
...callMessages,
|
|
415
|
-
{ role: 'assistant', content: null, tool_calls: [toolCall] },
|
|
416
|
-
{ role: 'tool', tool_call_id: toolCall.id, content: 'ERROR: Invalid JSON in tool arguments. Please retry with correctly formatted JSON.' },
|
|
417
|
-
];
|
|
418
324
|
continue;
|
|
419
325
|
}
|
|
420
326
|
// 4. Handle tool calls
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
const
|
|
424
|
-
const reasoning = args.reasoning;
|
|
327
|
+
// ── capture(nodeId?) ──
|
|
328
|
+
if (name === 'capture') {
|
|
329
|
+
const nodeId = typeof args.nodeId === 'string' ? args.nodeId : undefined;
|
|
425
330
|
const baseOutscale = element.outscale ?? { padding: element.padding ?? 0 };
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
: '';
|
|
440
|
-
logger.info(`Element "${element.name}": blocking ungrounded structural selector "${selector}" after a loose-crop rejection`);
|
|
441
|
-
actionHistory.push(`Iteration ${iteration}: capture_by_selector("${selector}") BLOCKED — after a verifier rejection for loose framing, do NOT invent a tag-only DOM path from <page_dom>. Re-run search_text with distinctive in-card text and use a grounded sel= directly, preferring any ↳ container selector.${candidateHint}`);
|
|
331
|
+
if (!nodeId) {
|
|
332
|
+
// Full page capture — unusual for element capture but handle it
|
|
333
|
+
logger.info(`Element "${element.name}": capture() without nodeId — full page`);
|
|
334
|
+
const buffer = await browser.takeScreenshot();
|
|
335
|
+
const viewport = browser.currentPage.viewportSize();
|
|
336
|
+
const { verified, reason: verifyReason, usage } = await verifyElementCapture(client, model, element, buffer, 'Full page capture', ++stepCounter, abortSignal, fallbackModel, uploadImage);
|
|
337
|
+
if (usage)
|
|
338
|
+
usageLog.push(usage);
|
|
339
|
+
if (verified) {
|
|
340
|
+
logger.success(`Element "${element.name}" captured (full page)`);
|
|
341
|
+
return { element, success: true, buffer, assessment: verifyReason || 'Full page', usage: usageLog };
|
|
342
|
+
}
|
|
343
|
+
actionHistory.push(`Iteration ${iteration}: capture() full page rejected — ${verifyReason}. Specify a nodeId to crop to the target element.`);
|
|
442
344
|
continue;
|
|
443
345
|
}
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
}
|
|
346
|
+
// Resolve nodeId to bounds
|
|
347
|
+
const entry = await browser.resolveAKNode(nodeId);
|
|
348
|
+
if (!entry) {
|
|
349
|
+
logger.error(`Element "${element.name}": nodeId "${nodeId}" not found in AKTree`);
|
|
350
|
+
actionHistory.push(`Iteration ${iteration}: capture("${nodeId}") — node not found. Re-read the AKTree and use a valid nodeId.`);
|
|
351
|
+
continue;
|
|
352
|
+
}
|
|
353
|
+
logger.info(`Element "${element.name}": capture("${nodeId}") — ${entry.label} ${entry.bounds.w}x${entry.bounds.h}`);
|
|
354
|
+
// Check area threshold
|
|
355
|
+
const viewport = browser.currentPage.viewportSize();
|
|
356
|
+
if (viewport) {
|
|
357
|
+
const captureArea = entry.bounds.w * entry.bounds.h;
|
|
358
|
+
const viewportArea = viewport.width * viewport.height;
|
|
359
|
+
const isLargeComponent = /\b(hero|header|footer|navigation|navbar|banner|full.?width|feature.?section|testimonial|above.the.fold)\b/i.test(element.description);
|
|
360
|
+
const areaThreshold = isLargeComponent ? 0.85 : 0.70;
|
|
361
|
+
if (captureArea > viewportArea * areaThreshold) {
|
|
362
|
+
logger.info(`Element "${element.name}": node "${nodeId}" covers ${Math.round(captureArea / viewportArea * 100)}% of viewport — too large`);
|
|
363
|
+
actionHistory.push(`Iteration ${iteration}: capture("${nodeId}") rejected — element covers >${Math.round(areaThreshold * 100)}% of viewport (${entry.bounds.w}x${entry.bounds.h}), likely a wrapper. Find a more specific child node with focus(within: "${nodeId}").`);
|
|
364
|
+
continue;
|
|
461
365
|
}
|
|
462
|
-
|
|
366
|
+
}
|
|
367
|
+
// Capture the node
|
|
368
|
+
try {
|
|
369
|
+
const buffer = await browser.captureNode(nodeId);
|
|
370
|
+
// Verify
|
|
371
|
+
const { verified, reason: verifyReason, usage } = await verifyElementCapture(client, model, element, buffer, `Captured ${entry.label} (${nodeId})`, ++stepCounter, abortSignal, fallbackModel, uploadImage);
|
|
463
372
|
if (usage)
|
|
464
373
|
usageLog.push(usage);
|
|
465
374
|
if (!verified) {
|
|
466
375
|
lastVerifierRejectedAsTooLoose = isLooseElementCaptureRejectionReason(verifyReason);
|
|
467
376
|
if (lastVerifierRejectedAsTooLoose) {
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
if (selectorEvidenceEntry && shouldAcceptDomCorroboratedSelector({
|
|
473
|
-
looseFailureCount,
|
|
474
|
-
verifierRejectedAsTooLoose: true,
|
|
475
|
-
validation,
|
|
476
|
-
viewport,
|
|
477
|
-
observedAsInteractive: selectorEvidenceEntry.observedAsInteractive,
|
|
478
|
-
directQueryCount: selectorEvidenceEntry.directQueries.size,
|
|
479
|
-
containerQueryCount: selectorEvidenceEntry.containerQueries.size,
|
|
480
|
-
})) {
|
|
481
|
-
logger.info(`Element "${element.name}": accepting selector "${selector}" after repeated loose-only verifier rejections because DOM evidence consistently corroborates the same component`);
|
|
377
|
+
looseRejectionCount++;
|
|
378
|
+
// Accept after repeated loose-only rejections (likely false positive from verifier)
|
|
379
|
+
if (looseRejectionCount >= 3) {
|
|
380
|
+
logger.info(`Element "${element.name}": accepting after ${looseRejectionCount} loose-only rejections`);
|
|
482
381
|
return {
|
|
483
|
-
element,
|
|
484
|
-
|
|
485
|
-
buffer,
|
|
486
|
-
assessment: 'Accepted after repeated loose-only verifier rejections; DOM evidence consistently confirmed the same component and the verifier likely misread embedded preview content as surrounding page context.',
|
|
487
|
-
capturedSelector: selector,
|
|
488
|
-
validation,
|
|
489
|
-
confidence,
|
|
382
|
+
element, success: true, buffer,
|
|
383
|
+
assessment: 'Accepted after repeated loose-only verifier rejections.',
|
|
490
384
|
usage: usageLog,
|
|
491
385
|
};
|
|
492
386
|
}
|
|
493
387
|
}
|
|
494
|
-
|
|
495
|
-
actionHistory.push(lastVerifierRejectedAsTooLoose
|
|
496
|
-
? `Iteration ${iteration}: capture_by_selector("${selector}") rejected by verifier — ${verifyReason}. The selector matched a real element, but the framing was too loose. Stay grounded on sel= values returned by the tools, search for a more distinctive in-card string if needed, and prefer any ↳ container selector instead of inventing a tag-only DOM path.`
|
|
497
|
-
: `Iteration ${iteration}: capture_by_selector("${selector}") rejected by verifier — ${verifyReason}. Try a different or more specific selector.`);
|
|
388
|
+
actionHistory.push(`Iteration ${iteration}: capture("${nodeId}") rejected by verifier — ${verifyReason}. ${lastVerifierRejectedAsTooLoose ? 'Try a more specific child node with focus(within: "' + nodeId + '").' : 'Try a different nodeId.'}`);
|
|
498
389
|
continue;
|
|
499
390
|
}
|
|
500
391
|
lastVerifierRejectedAsTooLoose = false;
|
|
392
|
+
// Apply outscale if needed
|
|
501
393
|
const finalBuffer = outscaleAddsPadding(baseOutscale)
|
|
502
|
-
?
|
|
394
|
+
? await browser.screenshotByRegion({ x: entry.bounds.x, y: entry.bounds.y, width: entry.bounds.w, height: entry.bounds.h }, baseOutscale)
|
|
503
395
|
: buffer;
|
|
504
|
-
logger.success(`Element "${element.name}" captured via
|
|
396
|
+
logger.success(`Element "${element.name}" captured via nodeId "${nodeId}"`);
|
|
505
397
|
getPostHog().capture({
|
|
506
398
|
distinctId: distinctId ?? DISTINCT_ID,
|
|
507
399
|
event: 'element_capture_succeeded',
|
|
508
|
-
properties: {
|
|
509
|
-
element_name: element.name,
|
|
510
|
-
method: 'selector',
|
|
511
|
-
selector,
|
|
512
|
-
confidence,
|
|
513
|
-
iterations: iteration,
|
|
514
|
-
},
|
|
400
|
+
properties: { element_name: element.name, method: 'nodeId', nodeId, iterations: iteration },
|
|
515
401
|
});
|
|
516
402
|
return {
|
|
517
|
-
element,
|
|
518
|
-
|
|
519
|
-
buffer: finalBuffer,
|
|
520
|
-
assessment: verifyReason || reasoning,
|
|
521
|
-
capturedSelector: selector,
|
|
522
|
-
validation,
|
|
523
|
-
confidence,
|
|
403
|
+
element, success: true, buffer: finalBuffer,
|
|
404
|
+
assessment: verifyReason || `Captured ${entry.label}`,
|
|
524
405
|
usage: usageLog,
|
|
525
406
|
};
|
|
526
407
|
}
|
|
527
408
|
catch (err) {
|
|
528
|
-
if (isAbortError(err))
|
|
409
|
+
if (isAbortError(err))
|
|
529
410
|
throw err;
|
|
530
|
-
}
|
|
531
|
-
|
|
532
|
-
const selectorErr = err;
|
|
533
|
-
if (selectorErr.error && selectorErr.errorMessage) {
|
|
534
|
-
if (selectorErr.error === 'no_match' && isTransientSearchSelector(selector)) {
|
|
535
|
-
lastFailedTransientSelector = selector;
|
|
536
|
-
}
|
|
537
|
-
else {
|
|
538
|
-
lastFailedTransientSelector = null;
|
|
539
|
-
}
|
|
540
|
-
const selectorSuggestions = {
|
|
541
|
-
no_match: 'The selector matched nothing. Try a broader selector, remove nth-child constraints, or use search_text to locate the element first.',
|
|
542
|
-
ambiguous: 'Multiple elements matched. Add parent context, a unique ID, data-testid, or aria-label to narrow down to one element.',
|
|
543
|
-
invisible: 'Element is hidden. Use scroll_to_element or scroll to bring it into view, or check if a parent element must be expanded first.',
|
|
544
|
-
zero_size: 'Element has zero rendered size. It may be dynamically rendered or conditionally shown. Try scrolling the page or waiting.',
|
|
545
|
-
};
|
|
546
|
-
const suggestion = selectorErr.error === 'no_match' && isTransientSearchSelector(selector)
|
|
547
|
-
? 'This looks like a stale temporary selector from an earlier search_text result. Re-run search_text to refresh the sel= value, then use the new selector immediately.'
|
|
548
|
-
: selectorSuggestions[selectorErr.error] ?? 'Refine the selector and retry.';
|
|
549
|
-
logger.error(`Selector validation failed: ${selectorErr.errorMessage}`);
|
|
550
|
-
actionHistory.push(`Iteration ${iteration}: capture_by_selector("${selector}") — ${selectorErr.errorMessage}. ${suggestion}`);
|
|
551
|
-
}
|
|
552
|
-
else {
|
|
553
|
-
const errorMsg = err.message;
|
|
554
|
-
logger.error(`Selector capture failed: ${errorMsg}`);
|
|
555
|
-
actionHistory.push(`Iteration ${iteration}: capture_by_selector("${selector}") failed — ${errorMsg}`);
|
|
556
|
-
}
|
|
411
|
+
logger.error(`Element "${element.name}": capture("${nodeId}") failed — ${err.message}`);
|
|
412
|
+
actionHistory.push(`Iteration ${iteration}: capture("${nodeId}") error — ${err.message}`);
|
|
557
413
|
continue;
|
|
558
414
|
}
|
|
559
415
|
}
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
const
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
416
|
+
// ── focus(query) ──
|
|
417
|
+
if (name === 'focus') {
|
|
418
|
+
const query = args;
|
|
419
|
+
try {
|
|
420
|
+
const focusResult = await browser.focusTree(query);
|
|
421
|
+
logger.info(`Element "${element.name}": focus → ${focusResult.matches.length} match(es)`);
|
|
422
|
+
actionHistory.push(`Iteration ${iteration}: focus(${JSON.stringify(args)}) → ${focusResult.matches.length} match(es)\n${focusResult.serialized.slice(0, 800)}`);
|
|
423
|
+
}
|
|
424
|
+
catch (err) {
|
|
425
|
+
logger.error(`Element "${element.name}": focus failed — ${err.message}`);
|
|
426
|
+
actionHistory.push(`Iteration ${iteration}: focus error — ${err.message}`);
|
|
427
|
+
}
|
|
567
428
|
continue;
|
|
568
429
|
}
|
|
430
|
+
// ── scroll ──
|
|
569
431
|
if (name === 'scroll') {
|
|
570
|
-
const
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
432
|
+
const centerOn = typeof args.centerOn === 'string' ? args.centerOn : undefined;
|
|
433
|
+
const target = typeof args.target === 'string' ? args.target : undefined;
|
|
434
|
+
const direction = args.direction ?? 'down';
|
|
435
|
+
const offset = typeof args.offset === 'number' ? args.offset : undefined;
|
|
436
|
+
if (centerOn) {
|
|
437
|
+
logger.info(`Element "${element.name}": scroll centerOn="${centerOn}"`);
|
|
438
|
+
try {
|
|
439
|
+
await browser.centerNodeInView(centerOn, { containerNodeId: target, offset });
|
|
440
|
+
await browser.wait(300);
|
|
441
|
+
actionHistory.push(`Iteration ${iteration}: scroll(centerOn="${centerOn}")`);
|
|
442
|
+
}
|
|
443
|
+
catch (err) {
|
|
444
|
+
actionHistory.push(`Iteration ${iteration}: scroll centerOn error — ${err.message}`);
|
|
445
|
+
}
|
|
578
446
|
}
|
|
579
447
|
else {
|
|
580
|
-
const
|
|
581
|
-
|
|
582
|
-
logger.info(`Element "${element.name}": scrolling ${direction} ${amount}px`);
|
|
448
|
+
const amount = offset ?? 500;
|
|
449
|
+
logger.info(`Element "${element.name}": scroll ${direction} ${amount}px`);
|
|
583
450
|
await browser.scroll(direction, amount);
|
|
584
451
|
await browser.wait(500);
|
|
585
452
|
actionHistory.push(`Iteration ${iteration}: scroll(${direction}, ${amount}px)`);
|
|
586
453
|
}
|
|
587
454
|
continue;
|
|
588
455
|
}
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
const
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
}
|
|
608
|
-
const
|
|
609
|
-
|
|
610
|
-
const candidateHint = cached.candidateLines.length > 0
|
|
611
|
-
? `\nTop candidates from that search (use their sel= in capture_by_selector):\n${cached.candidateLines.join('\n')}`
|
|
612
|
-
: '';
|
|
613
|
-
actionHistory.push(`Iteration ${iteration}: search_text("${query}") BLOCKED — DOM unchanged, results identical.${candidateHint}\nYou MUST either: (a) call capture_by_selector with a sel= from above, OR (b) search with a DIFFERENT query (unique text from the target: subtitle, price, feature text).`);
|
|
614
|
-
continue;
|
|
615
|
-
}
|
|
616
|
-
logger.info(`Element "${element.name}": searching for "${query}"`);
|
|
617
|
-
const results = await browser.searchText(query);
|
|
618
|
-
for (const result of results) {
|
|
619
|
-
getOrCreateSelectorEvidence(selectorEvidence, result.selector).directQueries.add(queryKey);
|
|
620
|
-
if (result.container?.selector) {
|
|
621
|
-
getOrCreateSelectorEvidence(selectorEvidence, result.container.selector).containerQueries.add(queryKey);
|
|
622
|
-
}
|
|
623
|
-
}
|
|
624
|
-
const MAX_SEARCH_RESULTS_IN_HISTORY = 5;
|
|
625
|
-
const resultLines = results.map((r, i) => {
|
|
626
|
-
const cx = Math.round(r.boundingBox.x + r.boundingBox.width / 2);
|
|
627
|
-
const cy = Math.round(r.boundingBox.y + r.boundingBox.height / 2);
|
|
628
|
-
const visibility = r.visibilityState === 'full'
|
|
629
|
-
? 'fully-visible'
|
|
630
|
-
: r.visibilityState === 'partial'
|
|
631
|
-
? 'partially-visible'
|
|
632
|
-
: 'off-screen';
|
|
633
|
-
const text = r.text.slice(0, 80).replace(/\s+/g, ' ');
|
|
634
|
-
let line = ` ${i}. <${r.tag}> "${text}" @${cx},${cy} ${r.boundingBox.width}x${r.boundingBox.height} (${visibility}) sel="${r.selector}"`;
|
|
635
|
-
if (r.container) {
|
|
636
|
-
line += `\n ↳ container: <${r.container.tag}> ${r.container.boundingBox.width}x${r.container.boundingBox.height} (${r.container.reason}) sel="${r.container.selector}"`;
|
|
637
|
-
}
|
|
638
|
-
return line;
|
|
639
|
-
});
|
|
640
|
-
const resultText = results.length > 0
|
|
641
|
-
? resultLines.slice(0, MAX_SEARCH_RESULTS_IN_HISTORY).join('\n')
|
|
642
|
-
+ (results.length > MAX_SEARCH_RESULTS_IN_HISTORY ? `\n (${results.length - MAX_SEARCH_RESULTS_IN_HISTORY} more results not shown)` : '')
|
|
643
|
-
: ' (no matches found)';
|
|
644
|
-
// Cache the top 3 most promising candidates (prefer off-screen small elements — likely specific items)
|
|
645
|
-
const topCandidates = resultLines
|
|
646
|
-
.filter(l => !l.includes('<body>'))
|
|
647
|
-
.slice(0, 3);
|
|
648
|
-
// Pick the best bounding box for coordinate-based fallback:
|
|
649
|
-
// prefer a result whose container has a reasonable card-like size (>= 100x100, < 70% viewport).
|
|
650
|
-
const viewportArea = 1440 * 900; // approximate
|
|
651
|
-
let bestRegion = null;
|
|
652
|
-
for (const result of results) {
|
|
653
|
-
const container = result.container;
|
|
654
|
-
if (container) {
|
|
655
|
-
const cb = container.boundingBox;
|
|
656
|
-
if (cb.width >= 100 && cb.height >= 100 && cb.width * cb.height < viewportArea * 0.7) {
|
|
657
|
-
bestRegion = cb;
|
|
658
|
-
break;
|
|
659
|
-
}
|
|
660
|
-
}
|
|
456
|
+
// ── analyze_screenshot ──
|
|
457
|
+
if (name === 'analyze_screenshot') {
|
|
458
|
+
const question = args.question || 'Describe what you see';
|
|
459
|
+
logger.info(`Element "${element.name}": analyze_screenshot — "${question}"`);
|
|
460
|
+
try {
|
|
461
|
+
const analysisResponse = await client.chat.completions.create({
|
|
462
|
+
model,
|
|
463
|
+
messages: [
|
|
464
|
+
{
|
|
465
|
+
role: 'user',
|
|
466
|
+
content: [
|
|
467
|
+
{ type: 'image_url', image_url: { url: screenshotUrl } },
|
|
468
|
+
{ type: 'text', text: question },
|
|
469
|
+
],
|
|
470
|
+
},
|
|
471
|
+
],
|
|
472
|
+
max_tokens: 256,
|
|
473
|
+
provider: { ...zdrParam() },
|
|
474
|
+
}, { signal: abortSignal });
|
|
475
|
+
const answer = analysisResponse.choices?.[0]?.message?.content || '(no answer)';
|
|
476
|
+
actionHistory.push(`Iteration ${iteration}: analyze_screenshot("${question}") → ${answer.slice(0, 300)}`);
|
|
661
477
|
}
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
const container = bestResult.container;
|
|
667
|
-
if (container && container.boundingBox.width * container.boundingBox.height < viewportArea * 0.7) {
|
|
668
|
-
bestRegion = container.boundingBox;
|
|
669
|
-
}
|
|
670
|
-
else {
|
|
671
|
-
bestRegion = bestResult.boundingBox;
|
|
672
|
-
}
|
|
673
|
-
}
|
|
478
|
+
catch (err) {
|
|
479
|
+
if (isAbortError(err))
|
|
480
|
+
throw err;
|
|
481
|
+
actionHistory.push(`Iteration ${iteration}: analyze_screenshot error — ${err.message}`);
|
|
674
482
|
}
|
|
675
|
-
usedSearchQueries.set(queryKey, {
|
|
676
|
-
candidateLines: topCandidates,
|
|
677
|
-
domSignature,
|
|
678
|
-
selectors: results.slice(0, 5).flatMap((result) => [
|
|
679
|
-
result.selector,
|
|
680
|
-
...(result.container?.selector ? [result.container.selector] : []),
|
|
681
|
-
]),
|
|
682
|
-
hasTransientSelectors: results.some((result) => isTransientSearchSelector(result.selector)
|
|
683
|
-
|| (result.container?.selector ? isTransientSearchSelector(result.container.selector) : false)),
|
|
684
|
-
bestRegion,
|
|
685
|
-
});
|
|
686
|
-
lastFailedTransientSelector = null;
|
|
687
|
-
logger.info(`Search results:\n${resultLines.join('\n')}`);
|
|
688
|
-
actionHistory.push(`Iteration ${iteration}: search_text("${query}") → ${results.length} match(es)\n${resultText}`);
|
|
689
483
|
continue;
|
|
690
484
|
}
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
getPostHog().capture({
|
|
695
|
-
distinctId: distinctId ?? DISTINCT_ID,
|
|
696
|
-
event: 'element_capture_failed',
|
|
697
|
-
properties: {
|
|
698
|
-
element_name: element.name,
|
|
699
|
-
reason,
|
|
700
|
-
iterations: iteration,
|
|
701
|
-
failure_type: 'gave_up',
|
|
702
|
-
},
|
|
703
|
-
});
|
|
704
|
-
return {
|
|
705
|
-
element,
|
|
706
|
-
success: false,
|
|
707
|
-
buffer: Buffer.alloc(0),
|
|
708
|
-
assessment: reason,
|
|
709
|
-
usage: usageLog,
|
|
710
|
-
};
|
|
711
|
-
}
|
|
712
|
-
}
|
|
713
|
-
// Coordinate-based fallback: if the LLM couldn't find a valid CSS selector but
|
|
714
|
-
// search_text found the element with a bounding box, capture by region as a last resort.
|
|
715
|
-
const allRegions = Array.from(usedSearchQueries.values())
|
|
716
|
-
.map(entry => entry.bestRegion)
|
|
717
|
-
.filter((r) => r != null && r.width > 0 && r.height > 0);
|
|
718
|
-
if (allRegions.length > 0) {
|
|
719
|
-
// Prefer regions with reasonable card-like dimensions over tiny text bboxes
|
|
720
|
-
const region = [...allRegions].sort((a, b) => {
|
|
721
|
-
const aOk = a.width >= 100 && a.height >= 100 ? 1 : 0;
|
|
722
|
-
const bOk = b.width >= 100 && b.height >= 100 ? 1 : 0;
|
|
723
|
-
if (aOk !== bOk)
|
|
724
|
-
return bOk - aOk;
|
|
725
|
-
return (b.width * b.height) - (a.width * a.height);
|
|
726
|
-
})[0];
|
|
727
|
-
try {
|
|
728
|
-
logger.info(`Element "${element.name}": falling back to coordinate-based capture at ${region.x},${region.y} ${region.width}x${region.height}`);
|
|
729
|
-
const buffer = await browser.screenshotByRegion(region, element.outscale ?? element.padding ?? 0);
|
|
730
|
-
return {
|
|
731
|
-
element,
|
|
732
|
-
success: true,
|
|
733
|
-
buffer,
|
|
734
|
-
assessment: `Captured by coordinate fallback at (${region.x},${region.y}) ${region.width}x${region.height}`,
|
|
735
|
-
capturedRegion: region,
|
|
736
|
-
usage: usageLog,
|
|
737
|
-
};
|
|
738
|
-
}
|
|
739
|
-
catch (err) {
|
|
740
|
-
logger.error(`Element "${element.name}": coordinate fallback failed: ${err.message}`);
|
|
741
|
-
}
|
|
485
|
+
// Unknown tool
|
|
486
|
+
logger.warn(`Element "${element.name}": unknown tool "${name}"`);
|
|
487
|
+
actionHistory.push(`Iteration ${iteration}: unknown tool "${name}" — use focus, scroll, analyze_screenshot, or capture.`);
|
|
742
488
|
}
|
|
489
|
+
// Max iterations reached — no coordinate fallback needed since AKTree provides bounds directly
|
|
743
490
|
logger.error(`Element "${element.name}": max iterations reached`);
|
|
744
491
|
getPostHog().capture({
|
|
745
492
|
distinctId: distinctId ?? DISTINCT_ID,
|
|
@@ -759,8 +506,17 @@ export async function captureIsolatedElement(browser, element, apiKey, model, op
|
|
|
759
506
|
usage: usageLog,
|
|
760
507
|
};
|
|
761
508
|
}
|
|
762
|
-
|
|
763
|
-
|
|
509
|
+
catch (err) {
|
|
510
|
+
if (isAbortError(err))
|
|
511
|
+
throw err;
|
|
512
|
+
logger.error(`Element capture unexpected error: ${err.message}`);
|
|
513
|
+
return {
|
|
514
|
+
element,
|
|
515
|
+
success: false,
|
|
516
|
+
buffer: Buffer.alloc(0),
|
|
517
|
+
assessment: `Unexpected error: ${err.message}`,
|
|
518
|
+
usage: usageLog,
|
|
519
|
+
};
|
|
764
520
|
}
|
|
765
521
|
}
|
|
766
522
|
//# sourceMappingURL=element-capture.js.map
|