autokap 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-config.d.ts +13 -0
- package/dist/cli-config.js +42 -0
- package/dist/cli-utils.d.ts +0 -19
- package/dist/cli-utils.js +2 -65
- package/dist/cli.d.ts +0 -1
- package/dist/cli.js +266 -305
- package/package.json +26 -19
- package/assets/chrome/ios-statusbar-comparison-reference.jpg +0 -0
- package/assets/chrome/ios-statusbar-dark-reference.jpg +0 -0
- package/assets/chrome/ios-statusbar-light-reference.jpg +0 -0
- package/assets/devices/ipad-pro-11-m4.json +0 -52
- package/assets/devices/iphone-16-pro.json +0 -53
- package/assets/devices/macbook-air-13.json +0 -45
- package/assets/frames/MacBook Air 13.svg +0 -242
- package/assets/frames/Status bar - iPhone.png +0 -0
- package/assets/frames/Status bar and Menu bar- iPad.png +0 -0
- package/assets/frames/iPad Pro M4 11_.png +0 -0
- package/assets/frames/iPhone 16 Pro.png +0 -0
- package/assets/icons/Cellular Connection.svg +0 -3
- package/assets/icons/Union.svg +0 -6
- package/assets/icons/Wifi.svg +0 -3
- package/assets/icons/battery.svg +0 -5
- package/assets/icons/battery_charging.svg +0 -8
- package/dist/abort.d.ts +0 -5
- package/dist/abort.js +0 -44
- package/dist/agent.d.ts +0 -142
- package/dist/agent.js +0 -4504
- package/dist/browser-bar.d.ts +0 -40
- package/dist/browser-bar.js +0 -147
- package/dist/clip-orchestrator.d.ts +0 -148
- package/dist/clip-orchestrator.js +0 -950
- package/dist/clip-postprocess.d.ts +0 -42
- package/dist/clip-postprocess.js +0 -192
- package/dist/credential-templates.d.ts +0 -5
- package/dist/credential-templates.js +0 -60
- package/dist/element-capture.d.ts +0 -53
- package/dist/element-capture.js +0 -766
- package/dist/hybrid-navigator.d.ts +0 -138
- package/dist/hybrid-navigator.js +0 -468
- package/dist/index.d.ts +0 -15
- package/dist/index.js +0 -11
- package/dist/llm-usage.d.ts +0 -17
- package/dist/llm-usage.js +0 -45
- package/dist/mockup-html.d.ts +0 -119
- package/dist/mockup-html.js +0 -253
- package/dist/mockup.d.ts +0 -94
- package/dist/mockup.js +0 -604
- package/dist/mouse-animation.d.ts +0 -46
- package/dist/mouse-animation.js +0 -100
- package/dist/overlay-utils.d.ts +0 -14
- package/dist/overlay-utils.js +0 -13
- package/dist/posthog.d.ts +0 -4
- package/dist/posthog.js +0 -26
- package/dist/prompt-cache.d.ts +0 -10
- package/dist/prompt-cache.js +0 -24
- package/dist/prompts.d.ts +0 -167
- package/dist/prompts.js +0 -1165
- package/dist/security.d.ts +0 -20
- package/dist/security.js +0 -569
- package/dist/session-profile.d.ts +0 -86
- package/dist/session-profile.js +0 -1471
- package/dist/sf-pro-fonts.d.ts +0 -4
- package/dist/sf-pro-fonts.js +0 -7
- package/dist/status-bar-l10n.d.ts +0 -14
- package/dist/status-bar-l10n.js +0 -177
- package/dist/status-bar.d.ts +0 -44
- package/dist/status-bar.js +0 -336
- package/dist/tools.d.ts +0 -4
- package/dist/tools.js +0 -578
- package/dist/video-agent.d.ts +0 -143
- package/dist/video-agent.js +0 -4783
- package/dist/video-observation.d.ts +0 -36
- package/dist/video-observation.js +0 -192
- package/dist/video-planner.d.ts +0 -12
- package/dist/video-planner.js +0 -500
- package/dist/video-prompts.d.ts +0 -37
- package/dist/video-prompts.js +0 -554
- package/dist/video-tools.d.ts +0 -3
- package/dist/video-tools.js +0 -59
- package/dist/video-variant-state.d.ts +0 -29
- package/dist/video-variant-state.js +0 -80
- package/dist/vision-model.d.ts +0 -17
- package/dist/vision-model.js +0 -74
package/dist/video-planner.js
DELETED
|
@@ -1,500 +0,0 @@
|
|
|
1
|
-
import OpenAI from 'openai';
|
|
2
|
-
import { buildVideoPlannerSystemPrompt, buildVideoPlannerUserMessage, } from './video-prompts.js';
|
|
3
|
-
export { observePlanningContext } from './video-observation.js';
|
|
4
|
-
import { throwIfAborted } from './abort.js';
|
|
5
|
-
import { extractStepUsage } from './llm-usage.js';
|
|
6
|
-
function createClient(apiKey) {
|
|
7
|
-
return new OpenAI({
|
|
8
|
-
baseURL: 'https://openrouter.ai/api/v1',
|
|
9
|
-
apiKey,
|
|
10
|
-
defaultHeaders: {
|
|
11
|
-
'HTTP-Referer': 'https://github.com/screenshot-agent',
|
|
12
|
-
'X-Title': 'Screenshot Agent',
|
|
13
|
-
},
|
|
14
|
-
});
|
|
15
|
-
}
|
|
16
|
-
function isCoordinates(value) {
|
|
17
|
-
if (!value || typeof value !== 'object')
|
|
18
|
-
return false;
|
|
19
|
-
const candidate = value;
|
|
20
|
-
return typeof candidate.x === 'number' && typeof candidate.y === 'number';
|
|
21
|
-
}
|
|
22
|
-
function containsInternalAutomationSelector(selector) {
|
|
23
|
-
return typeof selector === 'string' && /\[data-ak-[^\]]+\]|data-ak-interactive-index/i.test(selector);
|
|
24
|
-
}
|
|
25
|
-
function normalizeTarget(target) {
|
|
26
|
-
if (!target)
|
|
27
|
-
return undefined;
|
|
28
|
-
const selectorAlternates = Array.isArray(target.selectorAlternates)
|
|
29
|
-
? target.selectorAlternates
|
|
30
|
-
.filter((selector) => typeof selector === 'string' && selector.trim().length > 0)
|
|
31
|
-
.filter((selector) => !containsInternalAutomationSelector(selector))
|
|
32
|
-
: [];
|
|
33
|
-
const selector = typeof target.selector === 'string' && target.selector.trim().length > 0 && !containsInternalAutomationSelector(target.selector)
|
|
34
|
-
? target.selector.trim()
|
|
35
|
-
: undefined;
|
|
36
|
-
const normalized = {
|
|
37
|
-
...target,
|
|
38
|
-
selector,
|
|
39
|
-
selectorAlternates,
|
|
40
|
-
coordinates: isCoordinates(target.coordinates) ? target.coordinates : undefined,
|
|
41
|
-
index: typeof target.index === 'number' ? target.index : undefined,
|
|
42
|
-
};
|
|
43
|
-
if (!normalized.selector && selectorAlternates.length > 0) {
|
|
44
|
-
normalized.selector = selectorAlternates[0];
|
|
45
|
-
}
|
|
46
|
-
return normalized;
|
|
47
|
-
}
|
|
48
|
-
function isPageExpectation(value) {
|
|
49
|
-
if (!value || typeof value !== 'object')
|
|
50
|
-
return false;
|
|
51
|
-
const expectation = value;
|
|
52
|
-
if (Array.isArray(expectation.selectors) && expectation.selectors.some((selector) => containsInternalAutomationSelector(selector))) {
|
|
53
|
-
return false;
|
|
54
|
-
}
|
|
55
|
-
const listFields = ['urlPatterns', 'titlePatterns', 'textPatterns', 'navPatterns', 'breadcrumbPatterns', 'selectors']
|
|
56
|
-
.some((field) => Array.isArray(expectation[field]) && expectation[field].length > 0);
|
|
57
|
-
const scalarFields = typeof expectation.locale === 'string' || expectation.theme === 'light' || expectation.theme === 'dark';
|
|
58
|
-
return listFields || scalarFields;
|
|
59
|
-
}
|
|
60
|
-
function validateStep(step) {
|
|
61
|
-
if (!step || typeof step !== 'object')
|
|
62
|
-
return false;
|
|
63
|
-
const s = step;
|
|
64
|
-
if (typeof s.id !== 'string' || !s.id)
|
|
65
|
-
return false;
|
|
66
|
-
if (typeof s.type !== 'string')
|
|
67
|
-
return false;
|
|
68
|
-
if (typeof s.description !== 'string' || !s.description)
|
|
69
|
-
return false;
|
|
70
|
-
if (s.expectedPageAfter !== undefined && !isPageExpectation(s.expectedPageAfter))
|
|
71
|
-
return false;
|
|
72
|
-
const target = normalizeTarget(s.target);
|
|
73
|
-
const toTarget = normalizeTarget(s.toTarget);
|
|
74
|
-
const hasSelector = typeof s.selector === 'string' && !!s.selector.trim() && !containsInternalAutomationSelector(s.selector);
|
|
75
|
-
const hasCoordinates = isCoordinates(s.coordinates);
|
|
76
|
-
const hasTarget = !!(target?.selector || target?.coordinates);
|
|
77
|
-
switch (s.type) {
|
|
78
|
-
case 'navigate':
|
|
79
|
-
return typeof s.url === 'string' && !!s.url;
|
|
80
|
-
case 'dismiss_overlays':
|
|
81
|
-
return true;
|
|
82
|
-
case 'select_option':
|
|
83
|
-
return (hasSelector || hasTarget) && ((typeof s.optionLabel === 'string' && !!s.optionLabel.trim())
|
|
84
|
-
|| (typeof s.optionValue === 'string' && !!s.optionValue.trim())
|
|
85
|
-
|| typeof s.optionIndex === 'number');
|
|
86
|
-
case 'click':
|
|
87
|
-
case 'hover':
|
|
88
|
-
case 'highlight':
|
|
89
|
-
return hasSelector || hasCoordinates || hasTarget;
|
|
90
|
-
case 'type':
|
|
91
|
-
return (hasSelector || hasCoordinates || hasTarget) && typeof s.text === 'string';
|
|
92
|
-
case 'drag': {
|
|
93
|
-
const hasTargetSelector = typeof s.toSelector === 'string' && !!s.toSelector.trim() && !containsInternalAutomationSelector(s.toSelector);
|
|
94
|
-
const hasTargetCoordinates = isCoordinates(s.toCoordinates);
|
|
95
|
-
const hasStructuredDestination = !!(toTarget?.selector || toTarget?.coordinates);
|
|
96
|
-
return (hasSelector || hasCoordinates || hasTarget) && (hasTargetSelector || hasTargetCoordinates || hasStructuredDestination);
|
|
97
|
-
}
|
|
98
|
-
case 'scroll':
|
|
99
|
-
if (hasSelector || hasTarget)
|
|
100
|
-
return true;
|
|
101
|
-
return ((s.direction === 'up' || s.direction === 'down' || s.direction === 'left' || s.direction === 'right')
|
|
102
|
-
&& typeof s.amount === 'number');
|
|
103
|
-
case 'wait':
|
|
104
|
-
return typeof s.waitMs === 'number' && s.waitMs >= 0;
|
|
105
|
-
case 'key':
|
|
106
|
-
return typeof s.key === 'string' && !!s.key;
|
|
107
|
-
case 'assert_url':
|
|
108
|
-
return typeof s.urlPattern === 'string' && !!s.urlPattern;
|
|
109
|
-
case 'assert_text':
|
|
110
|
-
return typeof s.text === 'string' && !!s.text;
|
|
111
|
-
case 'assert_element':
|
|
112
|
-
return hasSelector || hasTarget;
|
|
113
|
-
case 'assert_page': {
|
|
114
|
-
return isPageExpectation(s.pageExpectation);
|
|
115
|
-
}
|
|
116
|
-
default:
|
|
117
|
-
return false;
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
function normalizePageExpectation(expectation) {
|
|
121
|
-
if (!expectation)
|
|
122
|
-
return undefined;
|
|
123
|
-
return {
|
|
124
|
-
...expectation,
|
|
125
|
-
minConfidence: expectation.minConfidence ?? 0.65,
|
|
126
|
-
};
|
|
127
|
-
}
|
|
128
|
-
function normalizeStep(step, index) {
|
|
129
|
-
const target = normalizeTarget(step.target);
|
|
130
|
-
const toTarget = normalizeTarget(step.toTarget);
|
|
131
|
-
const selector = step.selector ?? target?.selector ?? target?.selectorAlternates?.join(', ');
|
|
132
|
-
const coordinates = step.coordinates ?? target?.coordinates;
|
|
133
|
-
const toSelector = step.toSelector ?? toTarget?.selector ?? toTarget?.selectorAlternates?.join(', ');
|
|
134
|
-
const toCoordinates = step.toCoordinates ?? toTarget?.coordinates;
|
|
135
|
-
const baseStep = {
|
|
136
|
-
...step,
|
|
137
|
-
id: step.id || `step-${index + 1}`,
|
|
138
|
-
recordingIntent: step.recordingIntent ?? 'visible',
|
|
139
|
-
target,
|
|
140
|
-
selector,
|
|
141
|
-
coordinates,
|
|
142
|
-
toTarget,
|
|
143
|
-
toSelector,
|
|
144
|
-
toCoordinates,
|
|
145
|
-
expectedPageAfter: normalizePageExpectation(step.expectedPageAfter),
|
|
146
|
-
};
|
|
147
|
-
if (step.type === 'assert_url') {
|
|
148
|
-
return {
|
|
149
|
-
...baseStep,
|
|
150
|
-
matchMode: step.matchMode ?? 'contains',
|
|
151
|
-
timeoutMs: step.timeoutMs ?? 6000,
|
|
152
|
-
};
|
|
153
|
-
}
|
|
154
|
-
if (step.type === 'assert_text') {
|
|
155
|
-
return {
|
|
156
|
-
...baseStep,
|
|
157
|
-
matchMode: step.matchMode ?? 'contains',
|
|
158
|
-
timeoutMs: step.timeoutMs ?? 6000,
|
|
159
|
-
};
|
|
160
|
-
}
|
|
161
|
-
if (step.type === 'assert_element') {
|
|
162
|
-
return {
|
|
163
|
-
...baseStep,
|
|
164
|
-
state: step.state ?? 'visible',
|
|
165
|
-
timeoutMs: step.timeoutMs ?? 6000,
|
|
166
|
-
};
|
|
167
|
-
}
|
|
168
|
-
if (step.type === 'assert_page') {
|
|
169
|
-
return {
|
|
170
|
-
...baseStep,
|
|
171
|
-
timeoutMs: step.timeoutMs ?? 6000,
|
|
172
|
-
pageExpectation: normalizePageExpectation(step.pageExpectation),
|
|
173
|
-
};
|
|
174
|
-
}
|
|
175
|
-
return baseStep;
|
|
176
|
-
}
|
|
177
|
-
function normalizeSemanticToken(value) {
|
|
178
|
-
return (value ?? '')
|
|
179
|
-
.normalize('NFD')
|
|
180
|
-
.replace(/[\u0300-\u036f]/g, '')
|
|
181
|
-
.replace(/\s+/g, ' ')
|
|
182
|
-
.trim()
|
|
183
|
-
.toLowerCase();
|
|
184
|
-
}
|
|
185
|
-
function slugifySemanticToken(value) {
|
|
186
|
-
return normalizeSemanticToken(value)
|
|
187
|
-
.replace(/[^a-z0-9]+/g, '-')
|
|
188
|
-
.replace(/^-+|-+$/g, '');
|
|
189
|
-
}
|
|
190
|
-
function extractDescriptionEntityPhrases(description) {
|
|
191
|
-
if (!description)
|
|
192
|
-
return [];
|
|
193
|
-
return Array.from(description.matchAll(/\b(?:open|click|highlight|hover|verify|scroll(?:\s+\w+){0,2}\s+to|go to|reveal)\s+(?:the\s+)?([a-z0-9][a-z0-9+.\-/]*(?:\s+[a-z0-9][a-z0-9+.\-/]*){0,4})\s+(?:page|section|link|button|tab|card)\b/gi))
|
|
194
|
-
.map((match) => match[1]?.trim() ?? '')
|
|
195
|
-
.filter((value) => value.length >= 3);
|
|
196
|
-
}
|
|
197
|
-
function isSpecificIntentPhrase(value) {
|
|
198
|
-
const normalized = normalizeSemanticToken(value);
|
|
199
|
-
if (!normalized)
|
|
200
|
-
return false;
|
|
201
|
-
const tokens = normalized.split(/\s+/).filter(Boolean);
|
|
202
|
-
return /\d/.test(normalized) || normalized.includes('-') || tokens.length >= 2;
|
|
203
|
-
}
|
|
204
|
-
function collectExpectationValues(expectation) {
|
|
205
|
-
if (!expectation)
|
|
206
|
-
return [];
|
|
207
|
-
return [
|
|
208
|
-
...(expectation.urlPatterns ?? []),
|
|
209
|
-
...(expectation.titlePatterns ?? []),
|
|
210
|
-
...(expectation.textPatterns ?? []),
|
|
211
|
-
...(expectation.navPatterns ?? []),
|
|
212
|
-
...(expectation.breadcrumbPatterns ?? []),
|
|
213
|
-
...(expectation.selectors ?? []),
|
|
214
|
-
];
|
|
215
|
-
}
|
|
216
|
-
function collectStrictIntentTargets(step) {
|
|
217
|
-
const quoted = step.description
|
|
218
|
-
? Array.from(step.description.matchAll(/['"]([^'"]{3,})['"]/g)).map((match) => match[1])
|
|
219
|
-
: [];
|
|
220
|
-
const phrases = [...new Set([
|
|
221
|
-
...quoted,
|
|
222
|
-
...extractDescriptionEntityPhrases(step.description),
|
|
223
|
-
...collectExpectationValues(step.expectedPageAfter),
|
|
224
|
-
...collectExpectationValues(step.pageExpectation),
|
|
225
|
-
])].filter(isSpecificIntentPhrase);
|
|
226
|
-
const selectorSource = [
|
|
227
|
-
step.target?.label ?? '',
|
|
228
|
-
step.target?.href ?? '',
|
|
229
|
-
step.target?.selector ?? '',
|
|
230
|
-
...(step.target?.selectorAlternates ?? []),
|
|
231
|
-
step.selector ?? '',
|
|
232
|
-
...(collectExpectationValues(step.expectedPageAfter)),
|
|
233
|
-
...(collectExpectationValues(step.pageExpectation)),
|
|
234
|
-
].join(' ');
|
|
235
|
-
const slugs = [...new Set(selectorSource
|
|
236
|
-
.match(/[a-z0-9]+(?:-[a-z0-9]+)+/gi)?.map((value) => value.toLowerCase()) ?? [])].filter((value) => value.length >= 3 && (value.includes('-') || /\d/.test(value)));
|
|
237
|
-
return {
|
|
238
|
-
phrases,
|
|
239
|
-
slugs: [...new Set([...slugs, ...phrases.map(slugifySemanticToken)].filter(Boolean))],
|
|
240
|
-
strict: phrases.length > 0 || slugs.length > 0,
|
|
241
|
-
};
|
|
242
|
-
}
|
|
243
|
-
function selectorSupportsTargets(selector, step) {
|
|
244
|
-
if (!selector)
|
|
245
|
-
return false;
|
|
246
|
-
const targets = collectStrictIntentTargets(step);
|
|
247
|
-
if (!targets.strict)
|
|
248
|
-
return true;
|
|
249
|
-
const normalizedSelector = normalizeSemanticToken(selector);
|
|
250
|
-
return targets.phrases.some((phrase) => normalizedSelector.includes(normalizeSemanticToken(phrase)))
|
|
251
|
-
|| targets.slugs.some((slug) => normalizedSelector.includes(slug));
|
|
252
|
-
}
|
|
253
|
-
function expectationSupportsTargets(expectation, step) {
|
|
254
|
-
if (!expectation)
|
|
255
|
-
return false;
|
|
256
|
-
const targets = collectStrictIntentTargets(step);
|
|
257
|
-
if (!targets.strict)
|
|
258
|
-
return true;
|
|
259
|
-
const haystack = normalizeSemanticToken(collectExpectationValues(expectation).join(' '));
|
|
260
|
-
return targets.phrases.some((phrase) => haystack.includes(normalizeSemanticToken(phrase)))
|
|
261
|
-
|| targets.slugs.some((slug) => haystack.includes(slug));
|
|
262
|
-
}
|
|
263
|
-
function isNavigationContractCandidate(step) {
|
|
264
|
-
return step.type === 'navigate'
|
|
265
|
-
|| step.type === 'click'
|
|
266
|
-
|| step.type === 'select_option'
|
|
267
|
-
|| step.type === 'type'
|
|
268
|
-
|| step.type === 'key';
|
|
269
|
-
}
|
|
270
|
-
function deriveExpectedPageAfter(steps, index) {
|
|
271
|
-
for (let cursor = index + 1; cursor < Math.min(steps.length, index + 4); cursor += 1) {
|
|
272
|
-
const nextStep = steps[cursor];
|
|
273
|
-
if (nextStep.type === 'wait' || nextStep.type === 'dismiss_overlays') {
|
|
274
|
-
continue;
|
|
275
|
-
}
|
|
276
|
-
if (nextStep.type === 'assert_page') {
|
|
277
|
-
return normalizePageExpectation(nextStep.pageExpectation);
|
|
278
|
-
}
|
|
279
|
-
if (nextStep.type === 'assert_url' && nextStep.urlPattern) {
|
|
280
|
-
return normalizePageExpectation({
|
|
281
|
-
urlPatterns: [nextStep.urlPattern],
|
|
282
|
-
minConfidence: nextStep.matchMode === 'equals' ? 1 : 0.6,
|
|
283
|
-
});
|
|
284
|
-
}
|
|
285
|
-
break;
|
|
286
|
-
}
|
|
287
|
-
return undefined;
|
|
288
|
-
}
|
|
289
|
-
function enrichPlanRuntimeContracts(steps) {
|
|
290
|
-
return steps.map((step, index) => {
|
|
291
|
-
if (!isNavigationContractCandidate(step) || step.expectedPageAfter) {
|
|
292
|
-
return step;
|
|
293
|
-
}
|
|
294
|
-
const expectedPageAfter = deriveExpectedPageAfter(steps, index);
|
|
295
|
-
if (!expectedPageAfter)
|
|
296
|
-
return step;
|
|
297
|
-
return {
|
|
298
|
-
...step,
|
|
299
|
-
expectedPageAfter,
|
|
300
|
-
};
|
|
301
|
-
});
|
|
302
|
-
}
|
|
303
|
-
function extractNamedScrollTarget(description) {
|
|
304
|
-
if (!description)
|
|
305
|
-
return null;
|
|
306
|
-
const quoted = description.match(/['"]([^'"]{3,})['"]/);
|
|
307
|
-
if (quoted)
|
|
308
|
-
return quoted[1];
|
|
309
|
-
const sectionMatch = description.match(/\bto (?:the\s+)?([a-z0-9][a-z0-9\s+.\-/'’]{2,50}) section\b/i);
|
|
310
|
-
return sectionMatch?.[1]?.trim() ?? null;
|
|
311
|
-
}
|
|
312
|
-
function lintPlanSemantics(plan, options = {}) {
|
|
313
|
-
const steps = plan.steps;
|
|
314
|
-
const visibleSteps = steps.filter((step) => step.recordingIntent !== 'prepare_only');
|
|
315
|
-
for (let i = 0; i < visibleSteps.length; i += 1) {
|
|
316
|
-
const step = visibleSteps[i];
|
|
317
|
-
if (i > 0 && step.type === 'navigate') {
|
|
318
|
-
return `Plan is invalid: visible step "${step.description}" uses direct navigate mid-flow. Recorded demos must navigate through the UI after the initial landing.`;
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
for (const step of steps) {
|
|
322
|
-
if (step.type === 'scroll') {
|
|
323
|
-
const namedTarget = extractNamedScrollTarget(step.description);
|
|
324
|
-
if (namedTarget && !step.selector) {
|
|
325
|
-
return `Plan is invalid: scroll step "${step.description}" targets a named section but has no selector. Use a selector-based scroll for named sections.`;
|
|
326
|
-
}
|
|
327
|
-
if (namedTarget && !selectorSupportsTargets(step.selector, step)) {
|
|
328
|
-
return `Plan is invalid: scroll step "${step.description}" targets a specific named section but its selector is too generic. Use a selector containing the target text or section slug.`;
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
|
-
if (step.type === 'click' || step.type === 'hover' || step.type === 'highlight' || step.type === 'assert_element') {
|
|
332
|
-
const targets = collectStrictIntentTargets(step);
|
|
333
|
-
if (!targets.strict)
|
|
334
|
-
continue;
|
|
335
|
-
const selectorOk = selectorSupportsTargets(step.selector, step);
|
|
336
|
-
const contractOk = step.type === 'click' && expectationSupportsTargets(step.expectedPageAfter, step);
|
|
337
|
-
if (!selectorOk && !contractOk && !step.coordinates) {
|
|
338
|
-
return `Plan is invalid: step "${step.description}" targets a specific page/element but its selector is too generic. Use exact product labels/routes or attach expectedPageAfter.`;
|
|
339
|
-
}
|
|
340
|
-
}
|
|
341
|
-
if ((step.type === 'click' || step.type === 'navigate' || step.type === 'select_option' || step.type === 'key')
|
|
342
|
-
&& /\b(open|go to|navigate to|land on|arrive on)\b/i.test(step.description)
|
|
343
|
-
&& /\b(page|pricing|dashboard|section|screen|tab)\b/i.test(step.description)
|
|
344
|
-
&& !step.expectedPageAfter
|
|
345
|
-
&& step.recordingIntent !== 'prepare_only') {
|
|
346
|
-
return `Plan is invalid: step "${step.description}" changes page/state but has no runtime destination contract. Add expectedPageAfter or a following assert_page/assert_url.`;
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
|
-
if (options.variant?.lang && options.mode === 'base') {
|
|
350
|
-
for (const step of steps) {
|
|
351
|
-
if (step.type === 'assert_text' && /\b(learn more|buy|hello|pricing|features)\b/i.test(step.text ?? '')) {
|
|
352
|
-
return `Plan is invalid: localized base plan still contains a likely English assert_text "${step.text}". Regenerate the plan in the prepared variant context.`;
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
return null;
|
|
357
|
-
}
|
|
358
|
-
function validatePlan(parsed) {
|
|
359
|
-
if (!parsed || typeof parsed !== 'object')
|
|
360
|
-
return false;
|
|
361
|
-
const obj = parsed;
|
|
362
|
-
if (typeof obj.title !== 'string' || !obj.title)
|
|
363
|
-
return false;
|
|
364
|
-
if (typeof obj.estimatedDurationSec !== 'number')
|
|
365
|
-
return false;
|
|
366
|
-
if (typeof obj.startUrl !== 'string' || !obj.startUrl)
|
|
367
|
-
return false;
|
|
368
|
-
if (!Array.isArray(obj.steps) || obj.steps.length === 0)
|
|
369
|
-
return false;
|
|
370
|
-
for (const step of obj.steps) {
|
|
371
|
-
if (!validateStep(step))
|
|
372
|
-
return false;
|
|
373
|
-
}
|
|
374
|
-
return true;
|
|
375
|
-
}
|
|
376
|
-
function normalizeAndValidatePlan(parsed, options = {}) {
|
|
377
|
-
const normalizedPlan = {
|
|
378
|
-
...parsed,
|
|
379
|
-
steps: enrichPlanRuntimeContracts(parsed.steps.map((step, i) => normalizeStep(step, i))),
|
|
380
|
-
};
|
|
381
|
-
const semanticError = lintPlanSemantics(normalizedPlan, options);
|
|
382
|
-
if (semanticError) {
|
|
383
|
-
return { error: semanticError };
|
|
384
|
-
}
|
|
385
|
-
return { plan: normalizedPlan };
|
|
386
|
-
}
|
|
387
|
-
/**
|
|
388
|
-
* Phase 1 of the video pipeline: convert a natural-language script into a
|
|
389
|
-
* structured `VideoPlan` using a single LLM call in JSON mode.
|
|
390
|
-
*/
|
|
391
|
-
export async function planFromScript(script, url, model, apiKey, maxRetries = 2, options = {}, abortSignal) {
|
|
392
|
-
const client = createClient(apiKey);
|
|
393
|
-
const systemPrompt = buildVideoPlannerSystemPrompt(options);
|
|
394
|
-
const userMessage = buildVideoPlannerUserMessage(script, url, options);
|
|
395
|
-
let lastError = '';
|
|
396
|
-
let lastResponse = null;
|
|
397
|
-
for (let attempt = 1; attempt <= maxRetries + 1; attempt++) {
|
|
398
|
-
throwIfAborted(abortSignal, 'Video planning cancelled.');
|
|
399
|
-
// Build user message — include screenshot if available for visual grounding
|
|
400
|
-
const userContent = [
|
|
401
|
-
{ type: 'text', text: userMessage },
|
|
402
|
-
];
|
|
403
|
-
if (options.screenshot) {
|
|
404
|
-
userContent.push({
|
|
405
|
-
type: 'image_url',
|
|
406
|
-
image_url: {
|
|
407
|
-
url: `data:image/jpeg;base64,${options.screenshot.toString('base64')}`,
|
|
408
|
-
detail: 'low',
|
|
409
|
-
},
|
|
410
|
-
});
|
|
411
|
-
}
|
|
412
|
-
const messages = [
|
|
413
|
-
{ role: 'system', content: systemPrompt },
|
|
414
|
-
{ role: 'user', content: options.screenshot ? userContent : userMessage },
|
|
415
|
-
];
|
|
416
|
-
if (attempt > 1 && lastError) {
|
|
417
|
-
messages.push({
|
|
418
|
-
role: 'user',
|
|
419
|
-
content: `The previous response was invalid: ${lastError}. Please try again with a valid JSON plan.`,
|
|
420
|
-
});
|
|
421
|
-
}
|
|
422
|
-
const response = await client.chat.completions.create({
|
|
423
|
-
model,
|
|
424
|
-
messages,
|
|
425
|
-
max_tokens: 4000,
|
|
426
|
-
response_format: { type: 'json_object' },
|
|
427
|
-
provider: { zdr: true },
|
|
428
|
-
}, { signal: abortSignal });
|
|
429
|
-
lastResponse = response;
|
|
430
|
-
const content = response.choices?.[0]?.message?.content ?? '';
|
|
431
|
-
let parsed;
|
|
432
|
-
try {
|
|
433
|
-
parsed = JSON.parse(content);
|
|
434
|
-
}
|
|
435
|
-
catch (e) {
|
|
436
|
-
lastError = `JSON parse error: ${e.message}`;
|
|
437
|
-
continue;
|
|
438
|
-
}
|
|
439
|
-
if (validatePlan(parsed)) {
|
|
440
|
-
const normalized = normalizeAndValidatePlan(parsed, options);
|
|
441
|
-
if (!normalized.plan) {
|
|
442
|
-
lastError = normalized.error ?? 'Plan failed semantic validation.';
|
|
443
|
-
continue;
|
|
444
|
-
}
|
|
445
|
-
const usage = {
|
|
446
|
-
...extractStepUsage(response, {
|
|
447
|
-
stepNumber: 1,
|
|
448
|
-
stepType: 'video_planning',
|
|
449
|
-
modelRequested: model,
|
|
450
|
-
imagesInPrompt: options.screenshot ? 1 : 0,
|
|
451
|
-
}),
|
|
452
|
-
};
|
|
453
|
-
return { plan: normalized.plan, usage };
|
|
454
|
-
}
|
|
455
|
-
lastError = 'Plan structure is invalid or missing required fields.';
|
|
456
|
-
}
|
|
457
|
-
const content = lastResponse?.choices?.[0]?.message?.content ?? '';
|
|
458
|
-
if (content) {
|
|
459
|
-
try {
|
|
460
|
-
const parsed = JSON.parse(content);
|
|
461
|
-
const steps = Array.isArray(parsed.steps) ? parsed.steps : [];
|
|
462
|
-
const normalizedSteps = steps
|
|
463
|
-
.map((s, i) => normalizeStep(s, i))
|
|
464
|
-
.filter((s) => validateStep(s));
|
|
465
|
-
const fallbackStep = {
|
|
466
|
-
id: 'step-1',
|
|
467
|
-
type: 'navigate',
|
|
468
|
-
description: 'Navigate to start URL',
|
|
469
|
-
url,
|
|
470
|
-
};
|
|
471
|
-
const plan = {
|
|
472
|
-
title: typeof parsed.title === 'string' ? parsed.title : 'Video demo',
|
|
473
|
-
estimatedDurationSec: typeof parsed.estimatedDurationSec === 'number' ? parsed.estimatedDurationSec : 30,
|
|
474
|
-
startUrl: typeof parsed.startUrl === 'string' ? parsed.startUrl : url,
|
|
475
|
-
steps: normalizedSteps.length > 0 ? normalizedSteps : [fallbackStep],
|
|
476
|
-
};
|
|
477
|
-
const normalized = normalizeAndValidatePlan(plan, options);
|
|
478
|
-
if (!normalized.plan) {
|
|
479
|
-
throw new Error(normalized.error ?? 'Plan failed semantic validation.');
|
|
480
|
-
}
|
|
481
|
-
if (!lastResponse) {
|
|
482
|
-
throw new Error('Video planner fallback lost the final model response.');
|
|
483
|
-
}
|
|
484
|
-
const usage = {
|
|
485
|
-
...extractStepUsage(lastResponse, {
|
|
486
|
-
stepNumber: 1,
|
|
487
|
-
stepType: 'video_planning',
|
|
488
|
-
modelRequested: model,
|
|
489
|
-
imagesInPrompt: options.screenshot ? 1 : 0,
|
|
490
|
-
}),
|
|
491
|
-
};
|
|
492
|
-
return { plan: normalized.plan, usage };
|
|
493
|
-
}
|
|
494
|
-
catch {
|
|
495
|
-
// Fall through to hard error
|
|
496
|
-
}
|
|
497
|
-
}
|
|
498
|
-
throw new Error(`Failed to generate a valid video plan after ${maxRetries + 1} attempts. Last error: ${lastError}`);
|
|
499
|
-
}
|
|
500
|
-
//# sourceMappingURL=video-planner.js.map
|
package/dist/video-prompts.d.ts
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
import type { ChatCompletionContentPart } from 'openai/resources/chat/completions';
|
|
2
|
-
import type { LoginCredentials, VideoCursorTheme, VideoObservationSnapshot, VideoStep } from './types.js';
|
|
3
|
-
export type VideoPlannerMode = 'full' | 'base' | 'variant_prefix' | 'clip';
|
|
4
|
-
export interface VideoPlannerPromptVariant {
|
|
5
|
-
lang?: string;
|
|
6
|
-
theme?: 'light' | 'dark';
|
|
7
|
-
langInstructions?: string;
|
|
8
|
-
themeInstructions?: string;
|
|
9
|
-
}
|
|
10
|
-
export interface VideoPlannerPromptOptions {
|
|
11
|
-
mode?: VideoPlannerMode;
|
|
12
|
-
variant?: VideoPlannerPromptVariant;
|
|
13
|
-
credentials?: LoginCredentials;
|
|
14
|
-
observationSummary?: string;
|
|
15
|
-
observationSnapshot?: VideoObservationSnapshot;
|
|
16
|
-
/** Screenshot of the page for visual grounding. Sent as image to vision-capable models. */
|
|
17
|
-
screenshot?: Buffer;
|
|
18
|
-
}
|
|
19
|
-
export declare function buildVideoPromptContentParts(params: {
|
|
20
|
-
text: string;
|
|
21
|
-
imageUrl?: string;
|
|
22
|
-
cacheLayoutV2?: boolean;
|
|
23
|
-
}): ChatCompletionContentPart[];
|
|
24
|
-
export declare function buildStepFixerSystemPrompt(videoScript: string): string;
|
|
25
|
-
export declare function buildStepFixerUserMessage(step: VideoStep, failureReason: string, suggestion: string, observationSummary?: string, observationSnapshot?: VideoObservationSnapshot): string;
|
|
26
|
-
/**
|
|
27
|
-
* JavaScript injected via `context.addInitScript()` to show a visible animated
|
|
28
|
-
* cursor in Playwright video recordings (the native OS cursor is invisible).
|
|
29
|
-
*/
|
|
30
|
-
export declare function buildCursorOverlayScript(theme?: VideoCursorTheme): string;
|
|
31
|
-
export declare function buildVideoPlannerSystemPrompt(options?: VideoPlannerPromptOptions): string;
|
|
32
|
-
export declare function buildVideoPlannerUserMessage(script: string, url: string, options?: VideoPlannerPromptOptions): string;
|
|
33
|
-
export declare function buildVideoVerificationSystemPrompt(videoScript: string): string;
|
|
34
|
-
export declare function buildVideoStepVerificationUserMessage(step: VideoStep, stepIndex: number, totalSteps: number, pageContext?: {
|
|
35
|
-
currentUrl?: string;
|
|
36
|
-
pageTitle?: string;
|
|
37
|
-
}, observationSummary?: string, observationSnapshot?: VideoObservationSnapshot): string;
|