autokap 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/cli-config.d.ts +13 -0
  2. package/dist/cli-config.js +42 -0
  3. package/dist/cli-utils.d.ts +0 -19
  4. package/dist/cli-utils.js +2 -65
  5. package/dist/cli.d.ts +0 -1
  6. package/dist/cli.js +266 -305
  7. package/package.json +23 -16
  8. package/assets/chrome/ios-statusbar-comparison-reference.jpg +0 -0
  9. package/assets/chrome/ios-statusbar-dark-reference.jpg +0 -0
  10. package/assets/chrome/ios-statusbar-light-reference.jpg +0 -0
  11. package/assets/devices/ipad-pro-11-m4.json +0 -52
  12. package/assets/devices/iphone-16-pro.json +0 -53
  13. package/assets/devices/macbook-air-13.json +0 -45
  14. package/assets/frames/MacBook Air 13.svg +0 -242
  15. package/assets/frames/Status bar - iPhone.png +0 -0
  16. package/assets/frames/Status bar and Menu bar- iPad.png +0 -0
  17. package/assets/frames/iPad Pro M4 11_.png +0 -0
  18. package/assets/frames/iPhone 16 Pro.png +0 -0
  19. package/assets/icons/Cellular Connection.svg +0 -3
  20. package/assets/icons/Union.svg +0 -6
  21. package/assets/icons/Wifi.svg +0 -3
  22. package/assets/icons/battery.svg +0 -5
  23. package/assets/icons/battery_charging.svg +0 -8
  24. package/dist/abort.d.ts +0 -5
  25. package/dist/abort.js +0 -44
  26. package/dist/agent.d.ts +0 -142
  27. package/dist/agent.js +0 -4504
  28. package/dist/browser-bar.d.ts +0 -40
  29. package/dist/browser-bar.js +0 -147
  30. package/dist/clip-orchestrator.d.ts +0 -148
  31. package/dist/clip-orchestrator.js +0 -950
  32. package/dist/clip-postprocess.d.ts +0 -42
  33. package/dist/clip-postprocess.js +0 -192
  34. package/dist/credential-templates.d.ts +0 -5
  35. package/dist/credential-templates.js +0 -60
  36. package/dist/element-capture.d.ts +0 -53
  37. package/dist/element-capture.js +0 -766
  38. package/dist/hybrid-navigator.d.ts +0 -138
  39. package/dist/hybrid-navigator.js +0 -468
  40. package/dist/index.d.ts +0 -15
  41. package/dist/index.js +0 -11
  42. package/dist/llm-usage.d.ts +0 -17
  43. package/dist/llm-usage.js +0 -45
  44. package/dist/mockup-html.d.ts +0 -119
  45. package/dist/mockup-html.js +0 -253
  46. package/dist/mockup.d.ts +0 -94
  47. package/dist/mockup.js +0 -604
  48. package/dist/mouse-animation.d.ts +0 -46
  49. package/dist/mouse-animation.js +0 -100
  50. package/dist/overlay-utils.d.ts +0 -14
  51. package/dist/overlay-utils.js +0 -13
  52. package/dist/posthog.d.ts +0 -4
  53. package/dist/posthog.js +0 -26
  54. package/dist/prompt-cache.d.ts +0 -10
  55. package/dist/prompt-cache.js +0 -24
  56. package/dist/prompts.d.ts +0 -167
  57. package/dist/prompts.js +0 -1165
  58. package/dist/security.d.ts +0 -20
  59. package/dist/security.js +0 -569
  60. package/dist/session-profile.d.ts +0 -86
  61. package/dist/session-profile.js +0 -1471
  62. package/dist/sf-pro-fonts.d.ts +0 -4
  63. package/dist/sf-pro-fonts.js +0 -7
  64. package/dist/status-bar-l10n.d.ts +0 -14
  65. package/dist/status-bar-l10n.js +0 -177
  66. package/dist/status-bar.d.ts +0 -44
  67. package/dist/status-bar.js +0 -336
  68. package/dist/tools.d.ts +0 -4
  69. package/dist/tools.js +0 -578
  70. package/dist/video-agent.d.ts +0 -143
  71. package/dist/video-agent.js +0 -4783
  72. package/dist/video-observation.d.ts +0 -36
  73. package/dist/video-observation.js +0 -192
  74. package/dist/video-planner.d.ts +0 -12
  75. package/dist/video-planner.js +0 -500
  76. package/dist/video-prompts.d.ts +0 -37
  77. package/dist/video-prompts.js +0 -554
  78. package/dist/video-tools.d.ts +0 -3
  79. package/dist/video-tools.js +0 -59
  80. package/dist/video-variant-state.d.ts +0 -29
  81. package/dist/video-variant-state.js +0 -80
  82. package/dist/vision-model.d.ts +0 -17
  83. package/dist/vision-model.js +0 -74
package/dist/tools.js DELETED
@@ -1,578 +0,0 @@
1
- // Note: evaluation_previous_action and memory fields are no longer injected into
2
- // tool schemas. They are documented in the system prompt instead (see prompts.ts),
3
- // which saves ~1,268 tokens/iteration by avoiding duplication across all tools.
4
- export const agentTools = [
5
- {
6
- type: 'function',
7
- function: {
8
- name: 'click',
9
- description: 'Click or hover an element. Prefer index ([N] badge). Use hover_only=true for menus, tooltips, popovers.',
10
- parameters: {
11
- type: 'object',
12
- properties: {
13
- reason: {
14
- type: 'string',
15
- description: 'Brief reason.',
16
- },
17
- index: {
18
- type: 'number',
19
- description: 'Element badge index [N].',
20
- },
21
- selector: {
22
- type: 'string',
23
- description: 'CSS selector. Use only when index is unavailable.',
24
- },
25
- x: {
26
- type: 'number',
27
- description: 'X coordinate (viewport pixels). Last resort.',
28
- },
29
- y: {
30
- type: 'number',
31
- description: 'Y coordinate (viewport pixels). Last resort.',
32
- },
33
- hover_only: {
34
- type: 'boolean',
35
- description: 'If true, hover without clicking. Use to reveal dropdown menus, tooltips, mega-navs, or account popovers.',
36
- },
37
- },
38
- required: ['reason'],
39
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
40
- },
41
- },
42
- },
43
- {
44
- type: 'function',
45
- function: {
46
- name: 'type_text',
47
- description: 'Type text into an input field.',
48
- parameters: {
49
- type: 'object',
50
- properties: {
51
- reason: {
52
- type: 'string',
53
- description: 'Brief reason.',
54
- },
55
- text: {
56
- type: 'string',
57
- description: 'Text to type. When credentials are available, use placeholders like {{credential.email}} or {{credential.password}} instead of guessing the value.',
58
- },
59
- index: {
60
- type: 'number',
61
- description: 'Index of the input element to type into.',
62
- },
63
- selector: {
64
- type: 'string',
65
- description: 'CSS selector of the input element.',
66
- },
67
- clearFirst: {
68
- type: 'boolean',
69
- description: 'Clear the input before typing. Default false.',
70
- },
71
- },
72
- required: ['reason', 'text'],
73
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
74
- },
75
- },
76
- },
77
- {
78
- type: 'function',
79
- function: {
80
- name: 'select_option',
81
- description: 'Change a native select/dropdown value (sort, filter, language, theme).',
82
- parameters: {
83
- type: 'object',
84
- properties: {
85
- reason: {
86
- type: 'string',
87
- description: 'Brief reason.',
88
- },
89
- index: {
90
- type: 'number',
91
- description: 'Element badge index [N].',
92
- },
93
- selector: {
94
- type: 'string',
95
- description: 'CSS selector. Use only when index is unavailable.',
96
- },
97
- optionLabel: {
98
- type: 'string',
99
- description: 'Visible label of the option to choose.',
100
- },
101
- optionValue: {
102
- type: 'string',
103
- description: 'Underlying option value to choose.',
104
- },
105
- optionIndex: {
106
- type: 'number',
107
- description: 'Zero-based option index to choose if label/value are unavailable.',
108
- },
109
- },
110
- required: ['reason'],
111
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
112
- },
113
- },
114
- },
115
- {
116
- type: 'function',
117
- function: {
118
- name: 'scroll',
119
- description: 'Scroll the page or bring an element into view. Use index to scroll element into view (preferred); direction+amount for general scrolling.',
120
- parameters: {
121
- type: 'object',
122
- properties: {
123
- reason: {
124
- type: 'string',
125
- description: 'Brief reason.',
126
- },
127
- direction: {
128
- type: 'string',
129
- enum: ['up', 'down', 'left', 'right'],
130
- description: 'Scroll direction. Required unless using index.',
131
- },
132
- amount: {
133
- type: 'number',
134
- description: 'Pixels to scroll. Default 500.',
135
- },
136
- selector: {
137
- type: 'string',
138
- description: 'CSS selector of a scrollable container. Omit for page scroll.',
139
- },
140
- index: {
141
- type: 'number',
142
- description: 'Element index to scroll into view. Ignores direction/amount.',
143
- },
144
- align: {
145
- type: 'string',
146
- enum: ['start', 'center', 'end'],
147
- description: 'Where to place the element when using index. Default: center.',
148
- },
149
- },
150
- required: ['reason'],
151
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
152
- },
153
- },
154
- },
155
- {
156
- type: 'function',
157
- function: {
158
- name: 'press_key',
159
- description: 'Press a key or combination. Examples: "Escape", "Enter", "Tab", "ArrowDown".',
160
- parameters: {
161
- type: 'object',
162
- properties: {
163
- reason: {
164
- type: 'string',
165
- description: 'Brief reason.',
166
- },
167
- key: {
168
- type: 'string',
169
- description: 'Key to press (Playwright key names).',
170
- },
171
- },
172
- required: ['reason', 'key'],
173
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
174
- },
175
- },
176
- },
177
- {
178
- type: 'function',
179
- function: {
180
- name: 'dismiss_overlays',
181
- description: 'Dismiss cookie banners and chat/feedback widgets. Use after navigation, after resize, or when the screenshot is obstructed.',
182
- parameters: {
183
- type: 'object',
184
- properties: {
185
- reason: {
186
- type: 'string',
187
- description: 'Brief reason.',
188
- },
189
- },
190
- required: ['reason'],
191
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
192
- },
193
- },
194
- },
195
- {
196
- type: 'function',
197
- function: {
198
- name: 'wait',
199
- description: 'Wait for a duration. Use when the page is loading or animating.',
200
- parameters: {
201
- type: 'object',
202
- properties: {
203
- reason: {
204
- type: 'string',
205
- description: 'Brief reason.',
206
- },
207
- milliseconds: {
208
- type: 'number',
209
- description: 'Wait duration in ms. Default 1000, max 5000.',
210
- },
211
- },
212
- required: ['reason'],
213
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
214
- },
215
- },
216
- },
217
- {
218
- type: 'function',
219
- function: {
220
- name: 'search_text',
221
- description: 'Search the DOM for elements containing text — use when an element is visible in the screenshot but absent from the elements list. Returns tag, text, position and size for clicking by coordinates.',
222
- parameters: {
223
- type: 'object',
224
- properties: {
225
- reason: {
226
- type: 'string',
227
- description: 'Brief reason.',
228
- },
229
- query: {
230
- type: 'string',
231
- description: 'Text to search (case-insensitive partial match). Use a short phrase visible on the page.',
232
- },
233
- },
234
- required: ['reason', 'query'],
235
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
236
- },
237
- },
238
- },
239
- {
240
- type: 'function',
241
- function: {
242
- name: 'analyze_screenshot',
243
- description: 'Request a visual analysis of the current page screenshot. Use when the DOM structure alone is insufficient — for visual layout, colors, theme checks, overlay/spinner detection, or rendering verification. Returns a text description of what is visually on screen.',
244
- parameters: {
245
- type: 'object',
246
- properties: {
247
- reason: {
248
- type: 'string',
249
- description: 'Why visual analysis is needed (e.g., "check theme colors", "verify no overlay blocking content").',
250
- },
251
- },
252
- required: ['reason'],
253
- },
254
- },
255
- },
256
- {
257
- type: 'function',
258
- function: {
259
- name: 'navigate_to',
260
- description: 'Navigate to a URL. Page fully loads before continuing.',
261
- parameters: {
262
- type: 'object',
263
- properties: {
264
- reason: {
265
- type: 'string',
266
- description: 'Brief reason.',
267
- },
268
- url: {
269
- type: 'string',
270
- description: 'The full URL to navigate to.',
271
- },
272
- },
273
- required: ['reason', 'url'],
274
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
275
- },
276
- },
277
- },
278
- {
279
- type: 'function',
280
- function: {
281
- name: 'resize_viewport',
282
- description: 'Resize viewport (desktop/tablet/mobile). Wait after resizing for layout to settle.',
283
- parameters: {
284
- type: 'object',
285
- properties: {
286
- reason: {
287
- type: 'string',
288
- description: 'Brief reason.',
289
- },
290
- width: {
291
- type: 'number',
292
- description: 'Width in px (e.g. 1440 desktop, 768 tablet, 375 mobile).',
293
- },
294
- height: {
295
- type: 'number',
296
- description: 'Height in px (e.g. 900 desktop, 1024 tablet, 812 mobile).',
297
- },
298
- },
299
- required: ['reason', 'width', 'height'],
300
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
301
- },
302
- },
303
- },
304
- {
305
- type: 'function',
306
- function: {
307
- name: 'take_screenshot',
308
- description: 'Capture a screenshot without ending the session — use for intermediate captures in multi-step workflows. Loop continues after this. Use ready_to_capture for the final screenshot.',
309
- parameters: {
310
- type: 'object',
311
- properties: {
312
- label: {
313
- type: 'string',
314
- description: 'Short label, e.g. "Landing page hero section", "Dashboard after login".',
315
- },
316
- assessment: {
317
- type: 'string',
318
- description: 'Why this is a good capture moment (no overlays/spinners, correct content visible).',
319
- },
320
- },
321
- required: ['label', 'assessment'],
322
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
323
- },
324
- },
325
- },
326
- {
327
- type: 'function',
328
- function: {
329
- name: 'ready_to_capture',
330
- description: 'Signal the ENTIRE workflow is complete and capture the final screenshot — ends the session. Use take_screenshot for intermediate captures. Set force=true only if the blocking dialog IS the intended capture target.',
331
- parameters: {
332
- type: 'object',
333
- properties: {
334
- assessment: {
335
- type: 'string',
336
- description: 'Why the workflow is complete and the page is ready.',
337
- },
338
- force: {
339
- type: 'boolean',
340
- description: 'Bypass dialog check when the dialog IS the capture target. Only after a "Blocking dialog" failure.',
341
- },
342
- },
343
- required: ['assessment'],
344
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
345
- },
346
- },
347
- },
348
- {
349
- type: 'function',
350
- function: {
351
- name: 'give_up',
352
- description: 'Signal that the requested screenshot cannot be achieved. Use when: login required, content missing, or stuck in a loop.',
353
- parameters: {
354
- type: 'object',
355
- properties: {
356
- reason: {
357
- type: 'string',
358
- description: 'Explanation of why capture cannot be completed.',
359
- },
360
- },
361
- required: ['reason'],
362
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
363
- },
364
- },
365
- },
366
- {
367
- type: 'function',
368
- function: {
369
- name: 'begin_subgoal',
370
- description: 'Declare a new sub-objective. Archives previous notes and resets working memory. Use at natural workflow boundaries: after login, before language/theme change, before new section, before final capture.',
371
- parameters: {
372
- type: 'object',
373
- properties: {
374
- name: {
375
- type: 'string',
376
- description: 'Snake_case name (max 4 words). Examples: login, set_language, navigate_to_pricing.',
377
- },
378
- progress_summary: {
379
- type: 'string',
380
- description: 'One-line summary of what was accomplished in the PREVIOUS subgoal (max 120 chars). Omit on the very first subgoal.',
381
- },
382
- },
383
- required: ['name'],
384
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
385
- },
386
- },
387
- },
388
- {
389
- type: 'function',
390
- function: {
391
- name: 'note',
392
- description: 'Store a key observation or fact to remember across iterations. Use when you discover important information (element locations, page state, multi-step findings) that you will need in later iterations.',
393
- parameters: {
394
- type: 'object',
395
- properties: {
396
- reason: {
397
- type: 'string',
398
- description: 'Short label for this note (snake_case, max 6 tokens).',
399
- },
400
- content: {
401
- type: 'string',
402
- description: 'The observation to store (max 120 chars).',
403
- },
404
- },
405
- required: ['reason', 'content'],
406
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
407
- },
408
- },
409
- },
410
- ];
411
- /** Restricted tool set for the element capture mini-agent */
412
- export const elementCaptureTools = [
413
- {
414
- type: 'function',
415
- function: {
416
- name: 'capture_by_selector',
417
- description: 'Capture a screenshot of a page element or component identified by a unique CSS selector. ' +
418
- 'Works for both single interactive elements (buttons, inputs) and full components or sections ' +
419
- '(navigation bars, forms, pricing tables, hero sections). ' +
420
- 'The selector MUST match exactly one visible element — the system validates this before capturing. ' +
421
- 'Resolve the selector using this priority order: ' +
422
- '1) [data-testid="..."] or [data-component="..."] — most stable; ' +
423
- '2) ARIA landmark with label: nav[aria-label="Main"], [role="dialog"]; ' +
424
- '3) Unique ID: #hero-section, #pricing-table; ' +
425
- '4) Semantic class on a semantic element: form.login-form, section.pricing, nav.main-nav; ' +
426
- '5) DOM path as last resort: header > .site-logo. ' +
427
- 'If the system returns a validation error (no match, ambiguous, invisible), read the reason and retry with a refined selector. ' +
428
- 'Use the sel="" values shown in the interactive elements list as starting points for selector construction. ' +
429
- 'If a previous capture was rejected only because the crop was too loose, stay grounded on sel="" values returned by the tools and prefer any search_text container selector instead of inventing a tag-only DOM path from the simplified DOM.',
430
- parameters: {
431
- type: 'object',
432
- properties: {
433
- selector: {
434
- type: 'string',
435
- description: 'A unique CSS selector matching exactly one visible element. ' +
436
- 'Examples: [data-testid="pricing-card"], #hero-section, nav[aria-label="Main navigation"], ' +
437
- '.pricing-table, form.login-form, header > .navbar',
438
- },
439
- confidence: {
440
- type: 'number',
441
- description: 'How confident you are this selector matches the correct element (0.0 to 1.0).',
442
- },
443
- reasoning: {
444
- type: 'string',
445
- description: 'Brief explanation of why this selector was chosen and what it should match.',
446
- },
447
- },
448
- required: ['selector', 'confidence', 'reasoning'],
449
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
450
- },
451
- },
452
- },
453
- {
454
- type: 'function',
455
- function: {
456
- name: 'scroll',
457
- description: 'Scroll the page to reveal off-screen elements. Provide index to snap a known interactive element into view (preferred). Provide direction+amount for general scrolling.',
458
- parameters: {
459
- type: 'object',
460
- properties: {
461
- reason: {
462
- type: 'string',
463
- description: 'Brief explanation of why you are scrolling.',
464
- },
465
- direction: {
466
- type: 'string',
467
- enum: ['up', 'down'],
468
- description: 'Scroll direction. Required unless using index.',
469
- },
470
- amount: {
471
- type: 'number',
472
- description: 'Pixels to scroll. Default 500.',
473
- },
474
- index: {
475
- type: 'number',
476
- description: 'Index of an interactive element to scroll into view. When provided, direction/amount are ignored.',
477
- },
478
- align: {
479
- type: 'string',
480
- enum: ['start', 'center', 'end'],
481
- description: 'Alignment when using index. Default: center.',
482
- },
483
- },
484
- required: ['reason'],
485
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
486
- },
487
- },
488
- },
489
- {
490
- type: 'function',
491
- function: {
492
- name: 'dismiss_overlays',
493
- description: 'Dismiss cookie banners and hide common sticky overlays/widgets before capturing an element.',
494
- parameters: {
495
- type: 'object',
496
- properties: {
497
- reason: {
498
- type: 'string',
499
- description: 'Brief explanation of why you are dismissing overlays.',
500
- },
501
- },
502
- required: ['reason'],
503
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
504
- },
505
- },
506
- },
507
- {
508
- type: 'function',
509
- function: {
510
- name: 'search_text',
511
- description: 'Search the entire page DOM for elements containing specific text. ' +
512
- 'Returns matching elements with tag, text, position, size, and a sel="..." CSS selector. ' +
513
- 'Use the sel= value directly in capture_by_selector — no need to scroll first.',
514
- parameters: {
515
- type: 'object',
516
- properties: {
517
- reason: {
518
- type: 'string',
519
- description: 'Brief explanation of why you are searching for this text.',
520
- },
521
- query: {
522
- type: 'string',
523
- description: 'Text to search for (case-insensitive partial match).',
524
- },
525
- },
526
- required: ['reason', 'query'],
527
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
528
- },
529
- },
530
- },
531
- {
532
- type: 'function',
533
- function: {
534
- name: 'resize_viewport',
535
- description: 'Resize the browser viewport. Use this when the target component is taller or wider than the current viewport ' +
536
- 'and you need to see or capture it in full. After resizing, the page re-renders and you get an updated screenshot ' +
537
- 'on the next iteration. The viewport will be restored to its original size after capture.',
538
- parameters: {
539
- type: 'object',
540
- properties: {
541
- reason: {
542
- type: 'string',
543
- description: 'Brief explanation of why you are resizing the viewport.',
544
- },
545
- width: {
546
- type: 'number',
547
- description: 'New viewport width in CSS pixels.',
548
- },
549
- height: {
550
- type: 'number',
551
- description: 'New viewport height in CSS pixels.',
552
- },
553
- },
554
- required: ['reason', 'width', 'height'],
555
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
556
- },
557
- },
558
- },
559
- {
560
- type: 'function',
561
- function: {
562
- name: 'give_up',
563
- description: 'Signal that the described element cannot be found on the page.',
564
- parameters: {
565
- type: 'object',
566
- properties: {
567
- reason: {
568
- type: 'string',
569
- description: 'Explanation of why the element cannot be found.',
570
- },
571
- },
572
- required: ['reason'],
573
- // NOTE: additionalProperties omitted — strict schema validation can cause tool parsing failures on some models
574
- },
575
- },
576
- },
577
- ];
578
- //# sourceMappingURL=tools.js.map