autokap 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-config.d.ts +13 -0
- package/dist/cli-config.js +42 -0
- package/dist/cli-utils.d.ts +0 -19
- package/dist/cli-utils.js +2 -65
- package/dist/cli.d.ts +0 -1
- package/dist/cli.js +266 -305
- package/package.json +26 -19
- package/assets/chrome/ios-statusbar-comparison-reference.jpg +0 -0
- package/assets/chrome/ios-statusbar-dark-reference.jpg +0 -0
- package/assets/chrome/ios-statusbar-light-reference.jpg +0 -0
- package/assets/devices/ipad-pro-11-m4.json +0 -52
- package/assets/devices/iphone-16-pro.json +0 -53
- package/assets/devices/macbook-air-13.json +0 -45
- package/assets/frames/MacBook Air 13.svg +0 -242
- package/assets/frames/Status bar - iPhone.png +0 -0
- package/assets/frames/Status bar and Menu bar- iPad.png +0 -0
- package/assets/frames/iPad Pro M4 11_.png +0 -0
- package/assets/frames/iPhone 16 Pro.png +0 -0
- package/assets/icons/Cellular Connection.svg +0 -3
- package/assets/icons/Union.svg +0 -6
- package/assets/icons/Wifi.svg +0 -3
- package/assets/icons/battery.svg +0 -5
- package/assets/icons/battery_charging.svg +0 -8
- package/dist/abort.d.ts +0 -5
- package/dist/abort.js +0 -44
- package/dist/agent.d.ts +0 -142
- package/dist/agent.js +0 -4504
- package/dist/browser-bar.d.ts +0 -40
- package/dist/browser-bar.js +0 -147
- package/dist/clip-orchestrator.d.ts +0 -148
- package/dist/clip-orchestrator.js +0 -950
- package/dist/clip-postprocess.d.ts +0 -42
- package/dist/clip-postprocess.js +0 -192
- package/dist/credential-templates.d.ts +0 -5
- package/dist/credential-templates.js +0 -60
- package/dist/element-capture.d.ts +0 -53
- package/dist/element-capture.js +0 -766
- package/dist/hybrid-navigator.d.ts +0 -138
- package/dist/hybrid-navigator.js +0 -468
- package/dist/index.d.ts +0 -15
- package/dist/index.js +0 -11
- package/dist/llm-usage.d.ts +0 -17
- package/dist/llm-usage.js +0 -45
- package/dist/mockup-html.d.ts +0 -119
- package/dist/mockup-html.js +0 -253
- package/dist/mockup.d.ts +0 -94
- package/dist/mockup.js +0 -604
- package/dist/mouse-animation.d.ts +0 -46
- package/dist/mouse-animation.js +0 -100
- package/dist/overlay-utils.d.ts +0 -14
- package/dist/overlay-utils.js +0 -13
- package/dist/posthog.d.ts +0 -4
- package/dist/posthog.js +0 -26
- package/dist/prompt-cache.d.ts +0 -10
- package/dist/prompt-cache.js +0 -24
- package/dist/prompts.d.ts +0 -167
- package/dist/prompts.js +0 -1165
- package/dist/security.d.ts +0 -20
- package/dist/security.js +0 -569
- package/dist/session-profile.d.ts +0 -86
- package/dist/session-profile.js +0 -1471
- package/dist/sf-pro-fonts.d.ts +0 -4
- package/dist/sf-pro-fonts.js +0 -7
- package/dist/status-bar-l10n.d.ts +0 -14
- package/dist/status-bar-l10n.js +0 -177
- package/dist/status-bar.d.ts +0 -44
- package/dist/status-bar.js +0 -336
- package/dist/tools.d.ts +0 -4
- package/dist/tools.js +0 -578
- package/dist/video-agent.d.ts +0 -143
- package/dist/video-agent.js +0 -4783
- package/dist/video-observation.d.ts +0 -36
- package/dist/video-observation.js +0 -192
- package/dist/video-planner.d.ts +0 -12
- package/dist/video-planner.js +0 -500
- package/dist/video-prompts.d.ts +0 -37
- package/dist/video-prompts.js +0 -554
- package/dist/video-tools.d.ts +0 -3
- package/dist/video-tools.js +0 -59
- package/dist/video-variant-state.d.ts +0 -29
- package/dist/video-variant-state.js +0 -80
- package/dist/vision-model.d.ts +0 -17
- package/dist/vision-model.js +0 -74
package/dist/element-capture.js
DELETED
|
@@ -1,766 +0,0 @@
|
|
|
1
|
-
import OpenAI from 'openai';
|
|
2
|
-
import { elementCaptureTools } from './tools.js';
|
|
3
|
-
import { buildElementSystemPrompt, buildElementIterationMessage } from './prompts.js';
|
|
4
|
-
import { logger } from './logger.js';
|
|
5
|
-
import { getPostHog, DISTINCT_ID } from './posthog.js';
|
|
6
|
-
import { isAbortError, throwIfAborted } from './abort.js';
|
|
7
|
-
import { callVisionCapableModel } from './vision-model.js';
|
|
8
|
-
const MAX_ELEMENT_ITERATIONS = 8;
|
|
9
|
-
const ELEMENT_CAPTURE_TEMPERATURE = 0;
|
|
10
|
-
function isTransientSearchSelector(selector) {
|
|
11
|
-
return /\[data-ak-(search|container)-index=/.test(selector);
|
|
12
|
-
}
|
|
13
|
-
export function isLooseElementCaptureRejectionReason(reason) {
|
|
14
|
-
if (!reason)
|
|
15
|
-
return false;
|
|
16
|
-
return /(too loose|too broad|tighten the frame|tighten the crop|significant unrelated surrounding content|large amount of unrelated page|minimal surrounding context|surrounding context|adjacent elements|wrong framing)/i
|
|
17
|
-
.test(reason);
|
|
18
|
-
}
|
|
19
|
-
export function isTagOnlyStructuralSelector(selector) {
|
|
20
|
-
const normalized = selector.trim();
|
|
21
|
-
if (!normalized)
|
|
22
|
-
return false;
|
|
23
|
-
const usesStructuralPath = /[>+~]/.test(normalized)
|
|
24
|
-
|| /:(first|last|nth)-(child|of-type)/i.test(normalized);
|
|
25
|
-
if (!usesStructuralPath)
|
|
26
|
-
return false;
|
|
27
|
-
const hasStableAnchor = /[#[]/.test(normalized)
|
|
28
|
-
|| /\.[A-Za-z_][A-Za-z0-9_-]*/.test(normalized)
|
|
29
|
-
|| /:has\(/i.test(normalized);
|
|
30
|
-
if (hasStableAnchor)
|
|
31
|
-
return false;
|
|
32
|
-
const segments = normalized
|
|
33
|
-
.split(/\s*[>+~]\s*/)
|
|
34
|
-
.map((segment) => segment.trim())
|
|
35
|
-
.filter(Boolean);
|
|
36
|
-
if (segments.length === 0)
|
|
37
|
-
return false;
|
|
38
|
-
return segments.every((segment) => /^[a-z][a-z0-9-]*(?::(first|last|nth)-(child|of-type)(\([^)]+\))?)?$/i
|
|
39
|
-
.test(segment));
|
|
40
|
-
}
|
|
41
|
-
export function shouldBlockUngroundedStructuralSelector(params) {
|
|
42
|
-
if (!params.verifierRejectedAsTooLoose)
|
|
43
|
-
return false;
|
|
44
|
-
if (!isTagOnlyStructuralSelector(params.selector))
|
|
45
|
-
return false;
|
|
46
|
-
const grounded = new Set(params.groundedSelectors);
|
|
47
|
-
return !grounded.has(params.selector);
|
|
48
|
-
}
|
|
49
|
-
export function outscaleAddsPadding(outscale) {
|
|
50
|
-
if (!outscale)
|
|
51
|
-
return false;
|
|
52
|
-
return [
|
|
53
|
-
outscale.padding,
|
|
54
|
-
outscale.paddingTop,
|
|
55
|
-
outscale.paddingRight,
|
|
56
|
-
outscale.paddingBottom,
|
|
57
|
-
outscale.paddingLeft,
|
|
58
|
-
outscale.paddingPercent,
|
|
59
|
-
].some((value) => typeof value === 'number' && value > 0);
|
|
60
|
-
}
|
|
61
|
-
function buildTightOutscale(outscale) {
|
|
62
|
-
return {
|
|
63
|
-
...outscale,
|
|
64
|
-
padding: 0,
|
|
65
|
-
paddingTop: 0,
|
|
66
|
-
paddingRight: 0,
|
|
67
|
-
paddingBottom: 0,
|
|
68
|
-
paddingLeft: 0,
|
|
69
|
-
paddingPercent: 0,
|
|
70
|
-
};
|
|
71
|
-
}
|
|
72
|
-
export function buildVerificationOutscale(outscale) {
|
|
73
|
-
return buildTightOutscale(outscale ?? {});
|
|
74
|
-
}
|
|
75
|
-
function getOrCreateSelectorEvidence(selectorEvidence, selector) {
|
|
76
|
-
const existing = selectorEvidence.get(selector);
|
|
77
|
-
if (existing)
|
|
78
|
-
return existing;
|
|
79
|
-
const created = {
|
|
80
|
-
observedAsInteractive: false,
|
|
81
|
-
directQueries: new Set(),
|
|
82
|
-
containerQueries: new Set(),
|
|
83
|
-
};
|
|
84
|
-
selectorEvidence.set(selector, created);
|
|
85
|
-
return created;
|
|
86
|
-
}
|
|
87
|
-
export function shouldAcceptDomCorroboratedSelector(params) {
|
|
88
|
-
if (!params.verifierRejectedAsTooLoose)
|
|
89
|
-
return false;
|
|
90
|
-
if (params.looseFailureCount < 2)
|
|
91
|
-
return false;
|
|
92
|
-
if (!params.validation.boundingBox)
|
|
93
|
-
return false;
|
|
94
|
-
if (!params.observedAsInteractive)
|
|
95
|
-
return false;
|
|
96
|
-
const { width, height } = params.validation.boundingBox;
|
|
97
|
-
if (width <= 0 || height <= 0)
|
|
98
|
-
return false;
|
|
99
|
-
if (params.viewport) {
|
|
100
|
-
const bboxArea = width * height;
|
|
101
|
-
const viewportArea = params.viewport.width * params.viewport.height;
|
|
102
|
-
if (viewportArea > 0 && bboxArea > viewportArea * 0.25) {
|
|
103
|
-
return false;
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
return params.containerQueryCount >= 1 || params.directQueryCount >= 2;
|
|
107
|
-
}
|
|
108
|
-
export function computeElementCaptureDomSignature(params) {
|
|
109
|
-
const sample = params.interactiveElements
|
|
110
|
-
.slice(0, 40)
|
|
111
|
-
.map((el) => [
|
|
112
|
-
el.index,
|
|
113
|
-
el.tag,
|
|
114
|
-
el.role,
|
|
115
|
-
(el.text || '').slice(0, 32).replace(/\s+/g, ' '),
|
|
116
|
-
el.selector,
|
|
117
|
-
el.visibilityState,
|
|
118
|
-
].join(':'))
|
|
119
|
-
.join('|');
|
|
120
|
-
return `${params.currentUrl}#${params.interactiveElements.length}#${sample}`;
|
|
121
|
-
}
|
|
122
|
-
export function shouldAllowSearchRefresh(params) {
|
|
123
|
-
if (!params.cached)
|
|
124
|
-
return true;
|
|
125
|
-
if (params.cached.domSignature !== params.domSignature)
|
|
126
|
-
return true;
|
|
127
|
-
if (params.lastFailedTransientSelector
|
|
128
|
-
&& params.cached.selectors.includes(params.lastFailedTransientSelector)
|
|
129
|
-
&& params.cached.hasTransientSelectors) {
|
|
130
|
-
return true;
|
|
131
|
-
}
|
|
132
|
-
return false;
|
|
133
|
-
}
|
|
134
|
-
const elementVerificationTools = [
|
|
135
|
-
{
|
|
136
|
-
type: 'function',
|
|
137
|
-
function: {
|
|
138
|
-
name: 'accept_capture',
|
|
139
|
-
description: 'Approve the isolated element screenshot when the requested element/component is fully present and well framed.',
|
|
140
|
-
parameters: {
|
|
141
|
-
type: 'object',
|
|
142
|
-
properties: {
|
|
143
|
-
reason: {
|
|
144
|
-
type: 'string',
|
|
145
|
-
description: 'Short explanation of why the crop is good.',
|
|
146
|
-
},
|
|
147
|
-
},
|
|
148
|
-
required: ['reason'],
|
|
149
|
-
additionalProperties: false,
|
|
150
|
-
},
|
|
151
|
-
},
|
|
152
|
-
},
|
|
153
|
-
{
|
|
154
|
-
type: 'function',
|
|
155
|
-
function: {
|
|
156
|
-
name: 'retry_capture',
|
|
157
|
-
description: 'Reject the isolated element screenshot when the crop is wrong, clipped, too loose, or shows the wrong target.',
|
|
158
|
-
parameters: {
|
|
159
|
-
type: 'object',
|
|
160
|
-
properties: {
|
|
161
|
-
reason: {
|
|
162
|
-
type: 'string',
|
|
163
|
-
description: 'What is wrong and what needs to be fixed before capturing again.',
|
|
164
|
-
},
|
|
165
|
-
},
|
|
166
|
-
required: ['reason'],
|
|
167
|
-
additionalProperties: false,
|
|
168
|
-
},
|
|
169
|
-
},
|
|
170
|
-
},
|
|
171
|
-
];
|
|
172
|
-
async function verifyElementCapture(client, model, element, screenshot, assessment, stepNumber, signal, fallbackModel, uploadImage) {
|
|
173
|
-
throwIfAborted(signal, 'Element capture verification cancelled.');
|
|
174
|
-
const imgUrl = uploadImage
|
|
175
|
-
? await uploadImage(screenshot, 'image/png').catch(() => `data:image/png;base64,${screenshot.toString('base64')}`)
|
|
176
|
-
: `data:image/png;base64,${screenshot.toString('base64')}`;
|
|
177
|
-
const messages = [
|
|
178
|
-
{
|
|
179
|
-
role: 'system',
|
|
180
|
-
content: 'You verify isolated UI element screenshots. The screenshot shows EXACTLY the pixel region captured by the element\'s CSS selector — every pixel visible is INSIDE that element\'s bounding box. Nothing outside the element is included.\n\nApprove if the screenshot shows the correct requested component, fully visible, and not clipped at the edges. Reject only for these reasons:\n1. The element is clipped — a side is cut off by the image edges\n2. The wrong element was captured — the content clearly does not match the description at all\n3. Only a tiny fragment is captured — e.g., just a heading when a full card was requested\n4. An overlay or modal is obscuring the element\n\nDo NOT reject for any of the following — these are correct captures:\n- The screenshot contains what looks like multiple cards, sections, or UI components side by side — they are INTERNAL layout of the captured element (e.g., a preview carousel, a thumbnail grid, embedded mockups)\n- The screenshot shows pricing cards, phone mockups, app screenshots, or promotional content — these are screenshot previews INSIDE the component\n- Content appears "to the left", "to the right", "above", or "below" other content — spatial arrangement is the element\'s own layout, not neighboring page elements\n- The element has a complex internal structure with multiple visual sections\n\nCRITICAL: Since the screenshot IS the element\'s exact bounding box, there is no such thing as "adjacent cards" or "neighboring elements" visible in this image. What looks like a neighboring card is an internal preview thumbnail. What looks like surrounding context is internal structure. Only reject if the image edges clip the component or the wrong component is shown entirely.',
|
|
181
|
-
},
|
|
182
|
-
{
|
|
183
|
-
role: 'user',
|
|
184
|
-
content: [
|
|
185
|
-
{
|
|
186
|
-
type: 'image_url',
|
|
187
|
-
image_url: { url: imgUrl },
|
|
188
|
-
},
|
|
189
|
-
{
|
|
190
|
-
type: 'text',
|
|
191
|
-
text: `## Requested isolated capture
|
|
192
|
-
Name: "${element.name}"
|
|
193
|
-
Description: "${element.description}"
|
|
194
|
-
Candidate assessment: "${assessment}"
|
|
195
|
-
|
|
196
|
-
This verification image is the raw element crop before any user-requested outscale or extra margin is applied programmatically after validation.
|
|
197
|
-
|
|
198
|
-
The screenshot IS the captured element — every pixel is inside its bounding box.
|
|
199
|
-
|
|
200
|
-
Approve if: the correct component is shown, fully visible, not clipped at image edges.
|
|
201
|
-
|
|
202
|
-
Reject ONLY if:
|
|
203
|
-
- a side of the component is clipped by the image boundary
|
|
204
|
-
- the content clearly does not match the description at all (completely wrong element)
|
|
205
|
-
- only a tiny fragment is shown when a full component (card, panel, form) was requested
|
|
206
|
-
- an overlay or sticky header is covering it
|
|
207
|
-
|
|
208
|
-
Do NOT reject because:
|
|
209
|
-
- the element contains screenshot previews, thumbnails, or mockups inside it
|
|
210
|
-
- the internal layout has multiple visual sections that look like separate cards
|
|
211
|
-
- content appears "above", "beside", or "around" other content within the image — this is the element's own internal layout, not neighboring page elements`,
|
|
212
|
-
},
|
|
213
|
-
],
|
|
214
|
-
},
|
|
215
|
-
];
|
|
216
|
-
try {
|
|
217
|
-
const visionResult = await callVisionCapableModel({
|
|
218
|
-
primaryModel: model,
|
|
219
|
-
fallbackModel,
|
|
220
|
-
callModel: (m) => client.chat.completions.create({
|
|
221
|
-
model: m,
|
|
222
|
-
messages,
|
|
223
|
-
tools: elementVerificationTools,
|
|
224
|
-
tool_choice: 'required',
|
|
225
|
-
temperature: ELEMENT_CAPTURE_TEMPERATURE,
|
|
226
|
-
max_tokens: 256,
|
|
227
|
-
provider: { zdr: true },
|
|
228
|
-
}, { signal }),
|
|
229
|
-
});
|
|
230
|
-
const response = visionResult.result;
|
|
231
|
-
const usage = {
|
|
232
|
-
stepNumber,
|
|
233
|
-
stepType: 'element_capture',
|
|
234
|
-
generationId: response.id ?? null,
|
|
235
|
-
modelRequested: model,
|
|
236
|
-
modelUsed: response.model ?? null,
|
|
237
|
-
promptTokens: response.usage?.prompt_tokens ?? null,
|
|
238
|
-
completionTokens: response.usage?.completion_tokens ?? null,
|
|
239
|
-
totalTokens: response.usage?.total_tokens ?? null,
|
|
240
|
-
imagesInPrompt: 1,
|
|
241
|
-
};
|
|
242
|
-
const toolCall = response.choices?.[0]?.message?.tool_calls?.[0];
|
|
243
|
-
if (!toolCall || !('function' in toolCall)) {
|
|
244
|
-
return { verified: false, reason: 'Element verification returned no actionable result', usage };
|
|
245
|
-
}
|
|
246
|
-
const args = JSON.parse(toolCall.function.arguments || '{}');
|
|
247
|
-
if (toolCall.function.name === 'accept_capture') {
|
|
248
|
-
logger.success(`Element capture verification passed: ${args.reason || 'approved'}`);
|
|
249
|
-
return { verified: true, reason: args.reason, usage };
|
|
250
|
-
}
|
|
251
|
-
const reason = args.reason || 'Element capture was rejected';
|
|
252
|
-
logger.ai(`Element capture verification failed: ${reason}`);
|
|
253
|
-
return { verified: false, reason, usage };
|
|
254
|
-
}
|
|
255
|
-
catch (err) {
|
|
256
|
-
if (isAbortError(err)) {
|
|
257
|
-
throw err;
|
|
258
|
-
}
|
|
259
|
-
logger.error(`Element capture verification failed: ${err.message}`);
|
|
260
|
-
return { verified: true, reason: 'Verification unavailable', usage: null };
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
export async function captureIsolatedElement(browser, element, apiKey, model, options = {}) {
|
|
264
|
-
const { abortSignal, distinctId, fallbackModel, uploadImage } = options;
|
|
265
|
-
const client = new OpenAI({
|
|
266
|
-
baseURL: 'https://openrouter.ai/api/v1',
|
|
267
|
-
apiKey,
|
|
268
|
-
defaultHeaders: {
|
|
269
|
-
'HTTP-Referer': 'https://github.com/screenshot-agent',
|
|
270
|
-
'X-Title': 'Screenshot Agent',
|
|
271
|
-
},
|
|
272
|
-
});
|
|
273
|
-
logger.info(`Identifying element: "${element.name}" — "${element.description}"`);
|
|
274
|
-
const actionHistory = [];
|
|
275
|
-
const usageLog = [];
|
|
276
|
-
let stepCounter = 0;
|
|
277
|
-
// Map from query key → top candidate lines (shown again when a duplicate is blocked)
|
|
278
|
-
const usedSearchQueries = new Map();
|
|
279
|
-
const selectorEvidence = new Map();
|
|
280
|
-
const looseFailureCountsBySelector = new Map();
|
|
281
|
-
let lastFailedTransientSelector = null;
|
|
282
|
-
let lastVerifierRejectedAsTooLoose = false;
|
|
283
|
-
// Save original viewport to restore after element capture (agent may resize it)
|
|
284
|
-
const originalViewport = browser.currentPage.viewportSize();
|
|
285
|
-
const restoreViewport = async () => {
|
|
286
|
-
const current = browser.currentPage.viewportSize();
|
|
287
|
-
if (originalViewport && current &&
|
|
288
|
-
(current.width !== originalViewport.width || current.height !== originalViewport.height)) {
|
|
289
|
-
await browser.resizeViewport(originalViewport.width, originalViewport.height);
|
|
290
|
-
}
|
|
291
|
-
};
|
|
292
|
-
try {
|
|
293
|
-
for (let iteration = 1; iteration <= MAX_ELEMENT_ITERATIONS; iteration++) {
|
|
294
|
-
throwIfAborted(abortSignal, `Element capture cancelled for "${element.name}".`);
|
|
295
|
-
// 1. Capture page state (including a screenshot for visual disambiguation)
|
|
296
|
-
const [accessibilityTree, interactiveElements, simplifiedDOM, screenshotBuf] = await Promise.all([
|
|
297
|
-
browser.getAccessibilityTree(),
|
|
298
|
-
browser.getInteractiveElements(),
|
|
299
|
-
browser.getSimplifiedDOM(),
|
|
300
|
-
browser.takeScreenshotForAI(),
|
|
301
|
-
]);
|
|
302
|
-
const screenshotUrl = uploadImage
|
|
303
|
-
? await uploadImage(screenshotBuf, 'image/png').catch(() => `data:image/png;base64,${screenshotBuf.toString('base64')}`)
|
|
304
|
-
: `data:image/png;base64,${screenshotBuf.toString('base64')}`;
|
|
305
|
-
const domSignature = computeElementCaptureDomSignature({
|
|
306
|
-
currentUrl: browser.currentPage.url(),
|
|
307
|
-
interactiveElements,
|
|
308
|
-
});
|
|
309
|
-
for (const interactiveElement of interactiveElements) {
|
|
310
|
-
if (!interactiveElement.selector)
|
|
311
|
-
continue;
|
|
312
|
-
getOrCreateSelectorEvidence(selectorEvidence, interactiveElement.selector).observedAsInteractive = true;
|
|
313
|
-
}
|
|
314
|
-
// 2. Build messages
|
|
315
|
-
const messages = [
|
|
316
|
-
{ role: 'system', content: buildElementSystemPrompt(element.description) },
|
|
317
|
-
{
|
|
318
|
-
role: 'user',
|
|
319
|
-
content: buildElementIterationMessage({
|
|
320
|
-
elementName: element.name,
|
|
321
|
-
elementDescription: element.description,
|
|
322
|
-
accessibilityTree,
|
|
323
|
-
interactiveElements,
|
|
324
|
-
simplifiedDOM,
|
|
325
|
-
currentUrl: browser.currentPage.url(),
|
|
326
|
-
iteration,
|
|
327
|
-
maxIterations: MAX_ELEMENT_ITERATIONS,
|
|
328
|
-
actionHistory: actionHistory.length > 0 ? actionHistory : undefined,
|
|
329
|
-
viewport: browser.currentPage.viewportSize() ?? undefined,
|
|
330
|
-
forbiddenSearchQueries: usedSearchQueries.size > 0 ? [...usedSearchQueries.keys()] : undefined,
|
|
331
|
-
screenshotUrl,
|
|
332
|
-
}),
|
|
333
|
-
},
|
|
334
|
-
];
|
|
335
|
-
// 3. Call LLM (with vision fallback + coercion retry when the model ignores tool_choice: 'required')
|
|
336
|
-
let response;
|
|
337
|
-
let usedModel = model;
|
|
338
|
-
let callMessages = messages;
|
|
339
|
-
const MAX_COERCION_RETRIES = 2;
|
|
340
|
-
try {
|
|
341
|
-
for (let coercionAttempt = 1; coercionAttempt <= MAX_COERCION_RETRIES + 1; coercionAttempt++) {
|
|
342
|
-
throwIfAborted(abortSignal, `Element capture cancelled for "${element.name}".`);
|
|
343
|
-
const visionResult = await callVisionCapableModel({
|
|
344
|
-
primaryModel: model,
|
|
345
|
-
fallbackModel,
|
|
346
|
-
callModel: (m) => client.chat.completions.create({
|
|
347
|
-
model: m,
|
|
348
|
-
messages: callMessages,
|
|
349
|
-
tools: elementCaptureTools,
|
|
350
|
-
tool_choice: 'required',
|
|
351
|
-
temperature: ELEMENT_CAPTURE_TEMPERATURE,
|
|
352
|
-
max_tokens: 1024,
|
|
353
|
-
provider: { zdr: true },
|
|
354
|
-
}, { signal: abortSignal }),
|
|
355
|
-
onFallbackActivated: (m, reason) => logger.info(`Element capture vision fallback activated: ${m} (reason: ${reason})`),
|
|
356
|
-
});
|
|
357
|
-
response = visionResult.result;
|
|
358
|
-
usedModel = visionResult.model;
|
|
359
|
-
const msg = response.choices?.[0]?.message;
|
|
360
|
-
const hasTool = !!msg?.tool_calls?.[0] && 'function' in msg.tool_calls[0];
|
|
361
|
-
if (hasTool || coercionAttempt > MAX_COERCION_RETRIES)
|
|
362
|
-
break;
|
|
363
|
-
const assistantContent = msg?.content;
|
|
364
|
-
if (!assistantContent)
|
|
365
|
-
break;
|
|
366
|
-
logger.info(`Model returned text without tool call; coercing (attempt ${coercionAttempt}/${MAX_COERCION_RETRIES})...`);
|
|
367
|
-
logger.ai(assistantContent.slice(0, 200));
|
|
368
|
-
callMessages = [
|
|
369
|
-
...callMessages,
|
|
370
|
-
{ role: 'assistant', content: assistantContent },
|
|
371
|
-
{
|
|
372
|
-
role: 'user',
|
|
373
|
-
content: 'You MUST call one of the available tools. Do not respond with text — select the most appropriate tool and call it now.',
|
|
374
|
-
},
|
|
375
|
-
];
|
|
376
|
-
}
|
|
377
|
-
usageLog.push({
|
|
378
|
-
stepNumber: ++stepCounter,
|
|
379
|
-
stepType: 'element_capture',
|
|
380
|
-
generationId: response.id ?? null,
|
|
381
|
-
modelRequested: model,
|
|
382
|
-
modelUsed: response.model ?? usedModel,
|
|
383
|
-
promptTokens: response.usage?.prompt_tokens ?? null,
|
|
384
|
-
completionTokens: response.usage?.completion_tokens ?? null,
|
|
385
|
-
totalTokens: response.usage?.total_tokens ?? null,
|
|
386
|
-
imagesInPrompt: 1,
|
|
387
|
-
});
|
|
388
|
-
}
|
|
389
|
-
catch (err) {
|
|
390
|
-
if (isAbortError(err)) {
|
|
391
|
-
throw err;
|
|
392
|
-
}
|
|
393
|
-
logger.error(`Element capture API call failed: ${err.message}`);
|
|
394
|
-
actionHistory.push(`Iteration ${iteration}: API error — ${err.message}`);
|
|
395
|
-
continue;
|
|
396
|
-
}
|
|
397
|
-
const message = response.choices?.[0]?.message;
|
|
398
|
-
const toolCall = message?.tool_calls?.[0];
|
|
399
|
-
if (message?.content && !toolCall) {
|
|
400
|
-
logger.ai(message.content.slice(0, 200));
|
|
401
|
-
}
|
|
402
|
-
if (!toolCall || !('function' in toolCall)) {
|
|
403
|
-
logger.error(`No tool call at element iteration ${iteration}`);
|
|
404
|
-
continue;
|
|
405
|
-
}
|
|
406
|
-
const name = toolCall.function.name;
|
|
407
|
-
let args;
|
|
408
|
-
try {
|
|
409
|
-
args = JSON.parse(toolCall.function.arguments);
|
|
410
|
-
}
|
|
411
|
-
catch {
|
|
412
|
-
logger.error(`Invalid JSON in element tool arguments: ${toolCall.function.arguments}`);
|
|
413
|
-
callMessages = [
|
|
414
|
-
...callMessages,
|
|
415
|
-
{ role: 'assistant', content: null, tool_calls: [toolCall] },
|
|
416
|
-
{ role: 'tool', tool_call_id: toolCall.id, content: 'ERROR: Invalid JSON in tool arguments. Please retry with correctly formatted JSON.' },
|
|
417
|
-
];
|
|
418
|
-
continue;
|
|
419
|
-
}
|
|
420
|
-
// 4. Handle tool calls
|
|
421
|
-
if (name === 'capture_by_selector') {
|
|
422
|
-
const selector = args.selector;
|
|
423
|
-
const confidence = args.confidence;
|
|
424
|
-
const reasoning = args.reasoning;
|
|
425
|
-
const baseOutscale = element.outscale ?? { padding: element.padding ?? 0 };
|
|
426
|
-
const groundedSelectors = new Set([
|
|
427
|
-
...interactiveElements.map((entry) => entry.selector).filter((entry) => !!entry),
|
|
428
|
-
...Array.from(usedSearchQueries.values()).flatMap((entry) => entry.selectors),
|
|
429
|
-
]);
|
|
430
|
-
if (shouldBlockUngroundedStructuralSelector({
|
|
431
|
-
selector,
|
|
432
|
-
groundedSelectors,
|
|
433
|
-
verifierRejectedAsTooLoose: lastVerifierRejectedAsTooLoose,
|
|
434
|
-
})) {
|
|
435
|
-
const latestSearchEntries = Array.from(usedSearchQueries.values());
|
|
436
|
-
const latestSearch = latestSearchEntries[latestSearchEntries.length - 1];
|
|
437
|
-
const candidateHint = latestSearch?.candidateLines.length
|
|
438
|
-
? `\nGrounded candidates from search_text (use one of their sel= values directly):\n${latestSearch.candidateLines.join('\n')}`
|
|
439
|
-
: '';
|
|
440
|
-
logger.info(`Element "${element.name}": blocking ungrounded structural selector "${selector}" after a loose-crop rejection`);
|
|
441
|
-
actionHistory.push(`Iteration ${iteration}: capture_by_selector("${selector}") BLOCKED — after a verifier rejection for loose framing, do NOT invent a tag-only DOM path from <page_dom>. Re-run search_text with distinctive in-card text and use a grounded sel= directly, preferring any ↳ container selector.${candidateHint}`);
|
|
442
|
-
continue;
|
|
443
|
-
}
|
|
444
|
-
logger.info(`Element "${element.name}": capture_by_selector("${selector}") (confidence: ${confidence.toFixed(2)})`);
|
|
445
|
-
logger.ai(reasoning);
|
|
446
|
-
try {
|
|
447
|
-
throwIfAborted(abortSignal, `Element capture cancelled for "${element.name}".`);
|
|
448
|
-
const verificationOutscale = buildVerificationOutscale(baseOutscale);
|
|
449
|
-
const { buffer, validation } = await browser.screenshotBySelector(selector, verificationOutscale);
|
|
450
|
-
// Reject captures that are too large (>70% of viewport area) — likely a wrapper, not the target element.
|
|
451
|
-
const viewport = browser.currentPage.viewportSize();
|
|
452
|
-
if (viewport && validation.boundingBox) {
|
|
453
|
-
const bb = validation.boundingBox;
|
|
454
|
-
const captureArea = bb.width * bb.height;
|
|
455
|
-
const viewportArea = viewport.width * viewport.height;
|
|
456
|
-
if (captureArea > viewportArea * 0.7) {
|
|
457
|
-
logger.info(`Element "${element.name}": selector "${selector}" covers ${Math.round(captureArea / viewportArea * 100)}% of viewport — too large, rejecting.`);
|
|
458
|
-
actionHistory.push(`Iteration ${iteration}: capture_by_selector("${selector}") rejected — element covers >70% of viewport, likely a wrapper not the target component. Use a more specific selector.`);
|
|
459
|
-
continue;
|
|
460
|
-
}
|
|
461
|
-
}
|
|
462
|
-
const { verified, reason: verifyReason, usage } = await verifyElementCapture(client, model, element, buffer, reasoning, ++stepCounter, abortSignal, fallbackModel, uploadImage);
|
|
463
|
-
if (usage)
|
|
464
|
-
usageLog.push(usage);
|
|
465
|
-
if (!verified) {
|
|
466
|
-
lastVerifierRejectedAsTooLoose = isLooseElementCaptureRejectionReason(verifyReason);
|
|
467
|
-
if (lastVerifierRejectedAsTooLoose) {
|
|
468
|
-
const looseFailureCount = (looseFailureCountsBySelector.get(selector) ?? 0) + 1;
|
|
469
|
-
looseFailureCountsBySelector.set(selector, looseFailureCount);
|
|
470
|
-
const selectorEvidenceEntry = selectorEvidence.get(selector);
|
|
471
|
-
const viewport = browser.currentPage.viewportSize();
|
|
472
|
-
if (selectorEvidenceEntry && shouldAcceptDomCorroboratedSelector({
|
|
473
|
-
looseFailureCount,
|
|
474
|
-
verifierRejectedAsTooLoose: true,
|
|
475
|
-
validation,
|
|
476
|
-
viewport,
|
|
477
|
-
observedAsInteractive: selectorEvidenceEntry.observedAsInteractive,
|
|
478
|
-
directQueryCount: selectorEvidenceEntry.directQueries.size,
|
|
479
|
-
containerQueryCount: selectorEvidenceEntry.containerQueries.size,
|
|
480
|
-
})) {
|
|
481
|
-
logger.info(`Element "${element.name}": accepting selector "${selector}" after repeated loose-only verifier rejections because DOM evidence consistently corroborates the same component`);
|
|
482
|
-
return {
|
|
483
|
-
element,
|
|
484
|
-
success: true,
|
|
485
|
-
buffer,
|
|
486
|
-
assessment: 'Accepted after repeated loose-only verifier rejections; DOM evidence consistently confirmed the same component and the verifier likely misread embedded preview content as surrounding page context.',
|
|
487
|
-
capturedSelector: selector,
|
|
488
|
-
validation,
|
|
489
|
-
confidence,
|
|
490
|
-
usage: usageLog,
|
|
491
|
-
};
|
|
492
|
-
}
|
|
493
|
-
}
|
|
494
|
-
lastFailedTransientSelector = isTransientSearchSelector(selector) ? selector : null;
|
|
495
|
-
actionHistory.push(lastVerifierRejectedAsTooLoose
|
|
496
|
-
? `Iteration ${iteration}: capture_by_selector("${selector}") rejected by verifier — ${verifyReason}. The selector matched a real element, but the framing was too loose. Stay grounded on sel= values returned by the tools, search for a more distinctive in-card string if needed, and prefer any ↳ container selector instead of inventing a tag-only DOM path.`
|
|
497
|
-
: `Iteration ${iteration}: capture_by_selector("${selector}") rejected by verifier — ${verifyReason}. Try a different or more specific selector.`);
|
|
498
|
-
continue;
|
|
499
|
-
}
|
|
500
|
-
lastVerifierRejectedAsTooLoose = false;
|
|
501
|
-
const finalBuffer = outscaleAddsPadding(baseOutscale)
|
|
502
|
-
? (await browser.screenshotBySelector(selector, baseOutscale)).buffer
|
|
503
|
-
: buffer;
|
|
504
|
-
logger.success(`Element "${element.name}" captured via selector "${selector}"`);
|
|
505
|
-
getPostHog().capture({
|
|
506
|
-
distinctId: distinctId ?? DISTINCT_ID,
|
|
507
|
-
event: 'element_capture_succeeded',
|
|
508
|
-
properties: {
|
|
509
|
-
element_name: element.name,
|
|
510
|
-
method: 'selector',
|
|
511
|
-
selector,
|
|
512
|
-
confidence,
|
|
513
|
-
iterations: iteration,
|
|
514
|
-
},
|
|
515
|
-
});
|
|
516
|
-
return {
|
|
517
|
-
element,
|
|
518
|
-
success: true,
|
|
519
|
-
buffer: finalBuffer,
|
|
520
|
-
assessment: verifyReason || reasoning,
|
|
521
|
-
capturedSelector: selector,
|
|
522
|
-
validation,
|
|
523
|
-
confidence,
|
|
524
|
-
usage: usageLog,
|
|
525
|
-
};
|
|
526
|
-
}
|
|
527
|
-
catch (err) {
|
|
528
|
-
if (isAbortError(err)) {
|
|
529
|
-
throw err;
|
|
530
|
-
}
|
|
531
|
-
// Discriminate structured selector validation errors from generic errors
|
|
532
|
-
const selectorErr = err;
|
|
533
|
-
if (selectorErr.error && selectorErr.errorMessage) {
|
|
534
|
-
if (selectorErr.error === 'no_match' && isTransientSearchSelector(selector)) {
|
|
535
|
-
lastFailedTransientSelector = selector;
|
|
536
|
-
}
|
|
537
|
-
else {
|
|
538
|
-
lastFailedTransientSelector = null;
|
|
539
|
-
}
|
|
540
|
-
const selectorSuggestions = {
|
|
541
|
-
no_match: 'The selector matched nothing. Try a broader selector, remove nth-child constraints, or use search_text to locate the element first.',
|
|
542
|
-
ambiguous: 'Multiple elements matched. Add parent context, a unique ID, data-testid, or aria-label to narrow down to one element.',
|
|
543
|
-
invisible: 'Element is hidden. Use scroll_to_element or scroll to bring it into view, or check if a parent element must be expanded first.',
|
|
544
|
-
zero_size: 'Element has zero rendered size. It may be dynamically rendered or conditionally shown. Try scrolling the page or waiting.',
|
|
545
|
-
};
|
|
546
|
-
const suggestion = selectorErr.error === 'no_match' && isTransientSearchSelector(selector)
|
|
547
|
-
? 'This looks like a stale temporary selector from an earlier search_text result. Re-run search_text to refresh the sel= value, then use the new selector immediately.'
|
|
548
|
-
: selectorSuggestions[selectorErr.error] ?? 'Refine the selector and retry.';
|
|
549
|
-
logger.error(`Selector validation failed: ${selectorErr.errorMessage}`);
|
|
550
|
-
actionHistory.push(`Iteration ${iteration}: capture_by_selector("${selector}") — ${selectorErr.errorMessage}. ${suggestion}`);
|
|
551
|
-
}
|
|
552
|
-
else {
|
|
553
|
-
const errorMsg = err.message;
|
|
554
|
-
logger.error(`Selector capture failed: ${errorMsg}`);
|
|
555
|
-
actionHistory.push(`Iteration ${iteration}: capture_by_selector("${selector}") failed — ${errorMsg}`);
|
|
556
|
-
}
|
|
557
|
-
continue;
|
|
558
|
-
}
|
|
559
|
-
}
|
|
560
|
-
if (name === 'resize_viewport') {
|
|
561
|
-
const w = args.width;
|
|
562
|
-
const h = args.height;
|
|
563
|
-
logger.info(`Element "${element.name}": resizing viewport to ${w}x${h}`);
|
|
564
|
-
await browser.resizeViewport(w, h);
|
|
565
|
-
await browser.wait(500);
|
|
566
|
-
actionHistory.push(`Iteration ${iteration}: resized viewport to ${w}x${h}`);
|
|
567
|
-
continue;
|
|
568
|
-
}
|
|
569
|
-
if (name === 'scroll') {
|
|
570
|
-
const index = args.index;
|
|
571
|
-
if (index !== undefined) {
|
|
572
|
-
const align = args.align;
|
|
573
|
-
const margin = args.margin;
|
|
574
|
-
logger.info(`Element "${element.name}": scrolling to index ${index}${align ? ` (${align})` : ''}`);
|
|
575
|
-
await browser.scrollElementIntoView(index, { align, margin });
|
|
576
|
-
await browser.wait(300);
|
|
577
|
-
actionHistory.push(`Iteration ${iteration}: scroll(index=${index}, align=${align || 'center'})`);
|
|
578
|
-
}
|
|
579
|
-
else {
|
|
580
|
-
const direction = args.direction ?? 'down';
|
|
581
|
-
const amount = args.amount ?? 500;
|
|
582
|
-
logger.info(`Element "${element.name}": scrolling ${direction} ${amount}px`);
|
|
583
|
-
await browser.scroll(direction, amount);
|
|
584
|
-
await browser.wait(500);
|
|
585
|
-
actionHistory.push(`Iteration ${iteration}: scroll(${direction}, ${amount}px)`);
|
|
586
|
-
}
|
|
587
|
-
continue;
|
|
588
|
-
}
|
|
589
|
-
if (name === 'dismiss_overlays') {
|
|
590
|
-
logger.info(`Element "${element.name}": dismissing overlays`);
|
|
591
|
-
const result = await browser.dismissOverlays();
|
|
592
|
-
await browser.wait(300);
|
|
593
|
-
actionHistory.push(`Iteration ${iteration}: dismiss_overlays() → ${result.dismissed ? `dismissed via ${result.method}` : 'no visible overlay dismissed'}`);
|
|
594
|
-
continue;
|
|
595
|
-
}
|
|
596
|
-
if (name === 'search_text') {
|
|
597
|
-
const query = args.query;
|
|
598
|
-
const queryKey = query.toLowerCase().trim();
|
|
599
|
-
// Block duplicate searches — the DOM hasn't changed, results would be identical.
|
|
600
|
-
// Re-surface the top candidates so the model can act on them instead of searching again.
|
|
601
|
-
const cachedSearch = usedSearchQueries.get(queryKey);
|
|
602
|
-
if (cachedSearch
|
|
603
|
-
&& !shouldAllowSearchRefresh({
|
|
604
|
-
cached: cachedSearch,
|
|
605
|
-
domSignature,
|
|
606
|
-
lastFailedTransientSelector,
|
|
607
|
-
})) {
|
|
608
|
-
const cached = cachedSearch;
|
|
609
|
-
logger.info(`Element "${element.name}": duplicate search_text("${query}") blocked`);
|
|
610
|
-
const candidateHint = cached.candidateLines.length > 0
|
|
611
|
-
? `\nTop candidates from that search (use their sel= in capture_by_selector):\n${cached.candidateLines.join('\n')}`
|
|
612
|
-
: '';
|
|
613
|
-
actionHistory.push(`Iteration ${iteration}: search_text("${query}") BLOCKED — DOM unchanged, results identical.${candidateHint}\nYou MUST either: (a) call capture_by_selector with a sel= from above, OR (b) search with a DIFFERENT query (unique text from the target: subtitle, price, feature text).`);
|
|
614
|
-
continue;
|
|
615
|
-
}
|
|
616
|
-
logger.info(`Element "${element.name}": searching for "${query}"`);
|
|
617
|
-
const results = await browser.searchText(query);
|
|
618
|
-
for (const result of results) {
|
|
619
|
-
getOrCreateSelectorEvidence(selectorEvidence, result.selector).directQueries.add(queryKey);
|
|
620
|
-
if (result.container?.selector) {
|
|
621
|
-
getOrCreateSelectorEvidence(selectorEvidence, result.container.selector).containerQueries.add(queryKey);
|
|
622
|
-
}
|
|
623
|
-
}
|
|
624
|
-
const MAX_SEARCH_RESULTS_IN_HISTORY = 5;
|
|
625
|
-
const resultLines = results.map((r, i) => {
|
|
626
|
-
const cx = Math.round(r.boundingBox.x + r.boundingBox.width / 2);
|
|
627
|
-
const cy = Math.round(r.boundingBox.y + r.boundingBox.height / 2);
|
|
628
|
-
const visibility = r.visibilityState === 'full'
|
|
629
|
-
? 'fully-visible'
|
|
630
|
-
: r.visibilityState === 'partial'
|
|
631
|
-
? 'partially-visible'
|
|
632
|
-
: 'off-screen';
|
|
633
|
-
const text = r.text.slice(0, 80).replace(/\s+/g, ' ');
|
|
634
|
-
let line = ` ${i}. <${r.tag}> "${text}" @${cx},${cy} ${r.boundingBox.width}x${r.boundingBox.height} (${visibility}) sel="${r.selector}"`;
|
|
635
|
-
if (r.container) {
|
|
636
|
-
line += `\n ↳ container: <${r.container.tag}> ${r.container.boundingBox.width}x${r.container.boundingBox.height} (${r.container.reason}) sel="${r.container.selector}"`;
|
|
637
|
-
}
|
|
638
|
-
return line;
|
|
639
|
-
});
|
|
640
|
-
const resultText = results.length > 0
|
|
641
|
-
? resultLines.slice(0, MAX_SEARCH_RESULTS_IN_HISTORY).join('\n')
|
|
642
|
-
+ (results.length > MAX_SEARCH_RESULTS_IN_HISTORY ? `\n (${results.length - MAX_SEARCH_RESULTS_IN_HISTORY} more results not shown)` : '')
|
|
643
|
-
: ' (no matches found)';
|
|
644
|
-
// Cache the top 3 most promising candidates (prefer off-screen small elements — likely specific items)
|
|
645
|
-
const topCandidates = resultLines
|
|
646
|
-
.filter(l => !l.includes('<body>'))
|
|
647
|
-
.slice(0, 3);
|
|
648
|
-
// Pick the best bounding box for coordinate-based fallback:
|
|
649
|
-
// prefer a result whose container has a reasonable card-like size (>= 100x100, < 70% viewport).
|
|
650
|
-
const viewportArea = 1440 * 900; // approximate
|
|
651
|
-
let bestRegion = null;
|
|
652
|
-
for (const result of results) {
|
|
653
|
-
const container = result.container;
|
|
654
|
-
if (container) {
|
|
655
|
-
const cb = container.boundingBox;
|
|
656
|
-
if (cb.width >= 100 && cb.height >= 100 && cb.width * cb.height < viewportArea * 0.7) {
|
|
657
|
-
bestRegion = cb;
|
|
658
|
-
break;
|
|
659
|
-
}
|
|
660
|
-
}
|
|
661
|
-
}
|
|
662
|
-
// Fallback: if no result had a suitably-sized container, use existing logic
|
|
663
|
-
if (!bestRegion) {
|
|
664
|
-
const bestResult = results[0] ?? null;
|
|
665
|
-
if (bestResult) {
|
|
666
|
-
const container = bestResult.container;
|
|
667
|
-
if (container && container.boundingBox.width * container.boundingBox.height < viewportArea * 0.7) {
|
|
668
|
-
bestRegion = container.boundingBox;
|
|
669
|
-
}
|
|
670
|
-
else {
|
|
671
|
-
bestRegion = bestResult.boundingBox;
|
|
672
|
-
}
|
|
673
|
-
}
|
|
674
|
-
}
|
|
675
|
-
usedSearchQueries.set(queryKey, {
|
|
676
|
-
candidateLines: topCandidates,
|
|
677
|
-
domSignature,
|
|
678
|
-
selectors: results.slice(0, 5).flatMap((result) => [
|
|
679
|
-
result.selector,
|
|
680
|
-
...(result.container?.selector ? [result.container.selector] : []),
|
|
681
|
-
]),
|
|
682
|
-
hasTransientSelectors: results.some((result) => isTransientSearchSelector(result.selector)
|
|
683
|
-
|| (result.container?.selector ? isTransientSearchSelector(result.container.selector) : false)),
|
|
684
|
-
bestRegion,
|
|
685
|
-
});
|
|
686
|
-
lastFailedTransientSelector = null;
|
|
687
|
-
logger.info(`Search results:\n${resultLines.join('\n')}`);
|
|
688
|
-
actionHistory.push(`Iteration ${iteration}: search_text("${query}") → ${results.length} match(es)\n${resultText}`);
|
|
689
|
-
continue;
|
|
690
|
-
}
|
|
691
|
-
if (name === 'give_up') {
|
|
692
|
-
const reason = args.reason || 'Unknown reason';
|
|
693
|
-
logger.error(`Element "${element.name}" not found: ${reason}`);
|
|
694
|
-
getPostHog().capture({
|
|
695
|
-
distinctId: distinctId ?? DISTINCT_ID,
|
|
696
|
-
event: 'element_capture_failed',
|
|
697
|
-
properties: {
|
|
698
|
-
element_name: element.name,
|
|
699
|
-
reason,
|
|
700
|
-
iterations: iteration,
|
|
701
|
-
failure_type: 'gave_up',
|
|
702
|
-
},
|
|
703
|
-
});
|
|
704
|
-
return {
|
|
705
|
-
element,
|
|
706
|
-
success: false,
|
|
707
|
-
buffer: Buffer.alloc(0),
|
|
708
|
-
assessment: reason,
|
|
709
|
-
usage: usageLog,
|
|
710
|
-
};
|
|
711
|
-
}
|
|
712
|
-
}
|
|
713
|
-
// Coordinate-based fallback: if the LLM couldn't find a valid CSS selector but
|
|
714
|
-
// search_text found the element with a bounding box, capture by region as a last resort.
|
|
715
|
-
const allRegions = Array.from(usedSearchQueries.values())
|
|
716
|
-
.map(entry => entry.bestRegion)
|
|
717
|
-
.filter((r) => r != null && r.width > 0 && r.height > 0);
|
|
718
|
-
if (allRegions.length > 0) {
|
|
719
|
-
// Prefer regions with reasonable card-like dimensions over tiny text bboxes
|
|
720
|
-
const region = [...allRegions].sort((a, b) => {
|
|
721
|
-
const aOk = a.width >= 100 && a.height >= 100 ? 1 : 0;
|
|
722
|
-
const bOk = b.width >= 100 && b.height >= 100 ? 1 : 0;
|
|
723
|
-
if (aOk !== bOk)
|
|
724
|
-
return bOk - aOk;
|
|
725
|
-
return (b.width * b.height) - (a.width * a.height);
|
|
726
|
-
})[0];
|
|
727
|
-
try {
|
|
728
|
-
logger.info(`Element "${element.name}": falling back to coordinate-based capture at ${region.x},${region.y} ${region.width}x${region.height}`);
|
|
729
|
-
const buffer = await browser.screenshotByRegion(region, element.outscale ?? element.padding ?? 0);
|
|
730
|
-
return {
|
|
731
|
-
element,
|
|
732
|
-
success: true,
|
|
733
|
-
buffer,
|
|
734
|
-
assessment: `Captured by coordinate fallback at (${region.x},${region.y}) ${region.width}x${region.height}`,
|
|
735
|
-
capturedRegion: region,
|
|
736
|
-
usage: usageLog,
|
|
737
|
-
};
|
|
738
|
-
}
|
|
739
|
-
catch (err) {
|
|
740
|
-
logger.error(`Element "${element.name}": coordinate fallback failed: ${err.message}`);
|
|
741
|
-
}
|
|
742
|
-
}
|
|
743
|
-
logger.error(`Element "${element.name}": max iterations reached`);
|
|
744
|
-
getPostHog().capture({
|
|
745
|
-
distinctId: distinctId ?? DISTINCT_ID,
|
|
746
|
-
event: 'element_capture_failed',
|
|
747
|
-
properties: {
|
|
748
|
-
element_name: element.name,
|
|
749
|
-
reason: 'Max iterations reached',
|
|
750
|
-
iterations: MAX_ELEMENT_ITERATIONS,
|
|
751
|
-
failure_type: 'max_iterations',
|
|
752
|
-
},
|
|
753
|
-
});
|
|
754
|
-
return {
|
|
755
|
-
element,
|
|
756
|
-
success: false,
|
|
757
|
-
buffer: Buffer.alloc(0),
|
|
758
|
-
assessment: 'Max iterations reached for element identification',
|
|
759
|
-
usage: usageLog,
|
|
760
|
-
};
|
|
761
|
-
}
|
|
762
|
-
finally {
|
|
763
|
-
await restoreViewport();
|
|
764
|
-
}
|
|
765
|
-
}
|
|
766
|
-
//# sourceMappingURL=element-capture.js.map
|