autokap 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/cli-config.d.ts +13 -0
  2. package/dist/cli-config.js +42 -0
  3. package/dist/cli-utils.d.ts +0 -19
  4. package/dist/cli-utils.js +2 -65
  5. package/dist/cli.d.ts +0 -1
  6. package/dist/cli.js +266 -305
  7. package/package.json +23 -16
  8. package/assets/chrome/ios-statusbar-comparison-reference.jpg +0 -0
  9. package/assets/chrome/ios-statusbar-dark-reference.jpg +0 -0
  10. package/assets/chrome/ios-statusbar-light-reference.jpg +0 -0
  11. package/assets/devices/ipad-pro-11-m4.json +0 -52
  12. package/assets/devices/iphone-16-pro.json +0 -53
  13. package/assets/devices/macbook-air-13.json +0 -45
  14. package/assets/frames/MacBook Air 13.svg +0 -242
  15. package/assets/frames/Status bar - iPhone.png +0 -0
  16. package/assets/frames/Status bar and Menu bar- iPad.png +0 -0
  17. package/assets/frames/iPad Pro M4 11_.png +0 -0
  18. package/assets/frames/iPhone 16 Pro.png +0 -0
  19. package/assets/icons/Cellular Connection.svg +0 -3
  20. package/assets/icons/Union.svg +0 -6
  21. package/assets/icons/Wifi.svg +0 -3
  22. package/assets/icons/battery.svg +0 -5
  23. package/assets/icons/battery_charging.svg +0 -8
  24. package/dist/abort.d.ts +0 -5
  25. package/dist/abort.js +0 -44
  26. package/dist/agent.d.ts +0 -142
  27. package/dist/agent.js +0 -4504
  28. package/dist/browser-bar.d.ts +0 -40
  29. package/dist/browser-bar.js +0 -147
  30. package/dist/clip-orchestrator.d.ts +0 -148
  31. package/dist/clip-orchestrator.js +0 -950
  32. package/dist/clip-postprocess.d.ts +0 -42
  33. package/dist/clip-postprocess.js +0 -192
  34. package/dist/credential-templates.d.ts +0 -5
  35. package/dist/credential-templates.js +0 -60
  36. package/dist/element-capture.d.ts +0 -53
  37. package/dist/element-capture.js +0 -766
  38. package/dist/hybrid-navigator.d.ts +0 -138
  39. package/dist/hybrid-navigator.js +0 -468
  40. package/dist/index.d.ts +0 -15
  41. package/dist/index.js +0 -11
  42. package/dist/llm-usage.d.ts +0 -17
  43. package/dist/llm-usage.js +0 -45
  44. package/dist/mockup-html.d.ts +0 -119
  45. package/dist/mockup-html.js +0 -253
  46. package/dist/mockup.d.ts +0 -94
  47. package/dist/mockup.js +0 -604
  48. package/dist/mouse-animation.d.ts +0 -46
  49. package/dist/mouse-animation.js +0 -100
  50. package/dist/overlay-utils.d.ts +0 -14
  51. package/dist/overlay-utils.js +0 -13
  52. package/dist/posthog.d.ts +0 -4
  53. package/dist/posthog.js +0 -26
  54. package/dist/prompt-cache.d.ts +0 -10
  55. package/dist/prompt-cache.js +0 -24
  56. package/dist/prompts.d.ts +0 -167
  57. package/dist/prompts.js +0 -1165
  58. package/dist/security.d.ts +0 -20
  59. package/dist/security.js +0 -569
  60. package/dist/session-profile.d.ts +0 -86
  61. package/dist/session-profile.js +0 -1471
  62. package/dist/sf-pro-fonts.d.ts +0 -4
  63. package/dist/sf-pro-fonts.js +0 -7
  64. package/dist/status-bar-l10n.d.ts +0 -14
  65. package/dist/status-bar-l10n.js +0 -177
  66. package/dist/status-bar.d.ts +0 -44
  67. package/dist/status-bar.js +0 -336
  68. package/dist/tools.d.ts +0 -4
  69. package/dist/tools.js +0 -578
  70. package/dist/video-agent.d.ts +0 -143
  71. package/dist/video-agent.js +0 -4783
  72. package/dist/video-observation.d.ts +0 -36
  73. package/dist/video-observation.js +0 -192
  74. package/dist/video-planner.d.ts +0 -12
  75. package/dist/video-planner.js +0 -500
  76. package/dist/video-prompts.d.ts +0 -37
  77. package/dist/video-prompts.js +0 -554
  78. package/dist/video-tools.d.ts +0 -3
  79. package/dist/video-tools.js +0 -59
  80. package/dist/video-variant-state.d.ts +0 -29
  81. package/dist/video-variant-state.js +0 -80
  82. package/dist/vision-model.d.ts +0 -17
  83. package/dist/vision-model.js +0 -74
@@ -1,100 +0,0 @@
1
- /** Ease-in-out cubic timing function */
2
- function easeInOut(t) {
3
- return t < 0.5 ? 4 * t * t * t : 1 - Math.pow(-2 * t + 2, 3) / 2;
4
- }
5
- /** Cubic Bezier interpolation */
6
- function cubicBezier(p0, p1, p2, p3, t) {
7
- const mt = 1 - t;
8
- return {
9
- x: mt ** 3 * p0.x + 3 * mt ** 2 * t * p1.x + 3 * mt * t ** 2 * p2.x + t ** 3 * p3.x,
10
- y: mt ** 3 * p0.y + 3 * mt ** 2 * t * p1.y + 3 * mt * t ** 2 * p2.y + t ** 3 * p3.y,
11
- };
12
- }
13
- function randomOffset(scale) {
14
- return (Math.random() - 0.5) * 2 * scale;
15
- }
16
- /**
17
- * Move the mouse from `from` to `to` along a cubic Bezier curve with natural
18
- * human-like motion: ease-in-out timing, randomized control points, micro-jitter.
19
- */
20
- export async function moveMouse(page, from, to, options = {}) {
21
- const dx = to.x - from.x;
22
- const dy = to.y - from.y;
23
- const distance = Math.sqrt(dx * dx + dy * dy);
24
- if (distance < 2)
25
- return; // Already there
26
- const steps = options.steps ?? 30;
27
- const durationMs = options.durationMs ?? Math.min(900, Math.max(350, distance * 1.2));
28
- const msPerStep = durationMs / steps;
29
- // Randomize control points to create a natural curve.
30
- // Enforce a minimum arc so short-distance moves still curve visibly.
31
- const perpScale = Math.max(30, Math.min(distance * 0.3, 80));
32
- const p1 = {
33
- x: from.x + dx * 0.25 + randomOffset(perpScale),
34
- y: from.y + dy * 0.25 + randomOffset(perpScale),
35
- };
36
- const p2 = {
37
- x: from.x + dx * 0.75 + randomOffset(perpScale),
38
- y: from.y + dy * 0.75 + randomOffset(perpScale),
39
- };
40
- for (let i = 1; i <= steps; i++) {
41
- const linearT = i / steps;
42
- const t = easeInOut(linearT);
43
- const point = cubicBezier(from, p1, p2, to, t);
44
- // Add micro-jitter to simulate hand imprecision
45
- const jitter = Math.max(0, 1 - linearT) * 1.5; // jitter decreases near destination
46
- const jx = randomOffset(jitter);
47
- const jy = randomOffset(jitter);
48
- await page.mouse.move(point.x + jx, point.y + jy);
49
- if (msPerStep > 1) {
50
- await page.waitForTimeout(msPerStep);
51
- }
52
- }
53
- // Final move to exact destination (no jitter)
54
- await page.mouse.move(to.x, to.y);
55
- }
56
- /**
57
- * Move the mouse to `target` with Bezier curve animation and click.
58
- *
59
- * @param fromCurrent - Optional current mouse position. If omitted, the click
60
- * is performed at `target` without a preceding Bezier move (first action).
61
- */
62
- export async function animatedClick(page, target, fromCurrent, options = {}) {
63
- if (fromCurrent) {
64
- await moveMouse(page, fromCurrent, target, options);
65
- }
66
- else {
67
- await page.mouse.move(target.x, target.y);
68
- }
69
- // Brief hover pause before clicking (human-like)
70
- await page.waitForTimeout(80 + Math.random() * 80);
71
- await page.mouse.down();
72
- await page.waitForTimeout(70);
73
- await page.mouse.up();
74
- }
75
- /**
76
- * Move the mouse to `target` (for hover/highlight actions) without clicking.
77
- */
78
- export async function animatedHover(page, target, fromCurrent, options = {}) {
79
- if (fromCurrent) {
80
- await moveMouse(page, fromCurrent, target, options);
81
- }
82
- else {
83
- await page.mouse.move(target.x, target.y);
84
- }
85
- // Brief pause at hover position
86
- await page.waitForTimeout(200 + Math.random() * 200);
87
- }
88
- /**
89
- * Type text into the currently focused element at a human-like typing speed.
90
- * Assumes the field is already focused (via a preceding click).
91
- */
92
- export async function humanType(page, text) {
93
- for (const char of text) {
94
- await page.keyboard.type(char);
95
- // 60–120 WPM → ~80–130ms between characters (5 chars per word)
96
- const delay = 60 + Math.random() * 80;
97
- await page.waitForTimeout(delay);
98
- }
99
- }
100
- //# sourceMappingURL=mouse-animation.js.map
@@ -1,14 +0,0 @@
1
- import type { Browser } from './browser.js';
2
- type OverlayLogCallback = (entry: {
3
- level: 'error';
4
- message: string;
5
- timestamp: number;
6
- }) => void;
7
- export declare function dismissOverlaysWithLogging(browser: Browser, params: {
8
- context: string;
9
- onLog?: OverlayLogCallback;
10
- }): Promise<{
11
- dismissed: boolean;
12
- method: string | null;
13
- }>;
14
- export {};
@@ -1,13 +0,0 @@
1
- import { logger } from './logger.js';
2
- export async function dismissOverlaysWithLogging(browser, params) {
3
- try {
4
- return await browser.dismissOverlays();
5
- }
6
- catch (error) {
7
- const message = `Overlay dismissal failed during ${params.context}: ${error.message}`;
8
- logger.error(message);
9
- params.onLog?.({ level: 'error', message, timestamp: Date.now() });
10
- return { dismissed: false, method: null };
11
- }
12
- }
13
- //# sourceMappingURL=overlay-utils.js.map
package/dist/posthog.d.ts DELETED
@@ -1,4 +0,0 @@
1
- import { PostHog } from 'posthog-node';
2
- export declare const DISTINCT_ID: string;
3
- export declare function getPostHog(): PostHog;
4
- export declare function shutdownPostHog(): Promise<void>;
package/dist/posthog.js DELETED
@@ -1,26 +0,0 @@
1
- import { PostHog } from 'posthog-node';
2
- import { createHash } from 'node:crypto';
3
- import { hostname } from 'node:os';
4
- // Stable anonymous ID derived from machine hostname — no PII collected
5
- const machineId = createHash('sha256').update(hostname()).digest('hex').slice(0, 16);
6
- export const DISTINCT_ID = `cli-${machineId}`;
7
- // Short-lived process (CLI): flush immediately on every event
8
- let _client = null;
9
- export function getPostHog() {
10
- if (!_client) {
11
- _client = new PostHog(process.env.POSTHOG_API_KEY ?? '', {
12
- host: process.env.POSTHOG_HOST ?? 'https://eu.i.posthog.com',
13
- flushAt: 1,
14
- flushInterval: 0,
15
- enableExceptionAutocapture: true,
16
- });
17
- }
18
- return _client;
19
- }
20
- export async function shutdownPostHog() {
21
- if (_client) {
22
- await _client.shutdown();
23
- _client = null;
24
- }
25
- }
26
- //# sourceMappingURL=posthog.js.map
@@ -1,10 +0,0 @@
1
- export type PromptCacheStrategy = 'implicit_only' | 'explicit_breakpoints' | 'disabled';
2
- export interface ProviderRoutingPreference {
3
- order?: string[];
4
- require?: string[];
5
- disallow?: string[];
6
- }
7
- export declare function resolvePromptCacheStrategy(model: string, options?: {
8
- enableGeminiExplicitBreakpoints?: boolean;
9
- }): PromptCacheStrategy;
10
- export declare function hasManualMultiProviderOrder(prefs?: ProviderRoutingPreference | null): boolean;
@@ -1,24 +0,0 @@
1
- export function resolvePromptCacheStrategy(model, options = {}) {
2
- const normalized = model.trim().toLowerCase();
3
- if (!normalized)
4
- return 'disabled';
5
- if (normalized.startsWith('anthropic/'))
6
- return 'explicit_breakpoints';
7
- if (normalized.startsWith('google/') || normalized.includes('gemini')) {
8
- return options.enableGeminiExplicitBreakpoints ? 'explicit_breakpoints' : 'implicit_only';
9
- }
10
- if (normalized.startsWith('x-ai/')
11
- || normalized.includes('grok')
12
- || normalized.startsWith('openai/')
13
- || normalized.startsWith('deepseek/')
14
- || normalized.startsWith('moonshot/')
15
- || normalized.startsWith('moonshotai/')) {
16
- return 'implicit_only';
17
- }
18
- return 'disabled';
19
- }
20
- export function hasManualMultiProviderOrder(prefs) {
21
- const providers = prefs?.order?.map((provider) => provider.trim()).filter(Boolean) ?? [];
22
- return providers.length > 1;
23
- }
24
- //# sourceMappingURL=prompt-cache.js.map
package/dist/prompts.d.ts DELETED
@@ -1,167 +0,0 @@
1
- import type { ChatCompletionContentPart } from 'openai/resources/chat/completions';
2
- import type { AgentConfig, AgentRunHint, CaptureCursor, CaptureObjective, CaptureHandoffContext, ExecutedAction, InteractiveElement, LoginCredentials, RepairTicket, VariantCaptureManifest, ValidatedSessionProfile, WorkflowScreenshot } from './types.js';
3
- interface SystemPromptOptions {
4
- reasoningLocale?: string;
5
- }
6
- export declare function buildSystemPrompt(opts?: SystemPromptOptions): string;
7
- interface IterationPromptMetrics {
8
- elementsChars: number;
9
- sessionSummaryChars: number;
10
- selectorMemoryChars: number;
11
- agentContextChars: number;
12
- }
13
- export interface StableAnchorUserMessageParams {
14
- userPrompt: string;
15
- credentials?: LoginCredentials;
16
- currentLang?: string;
17
- currentTheme?: 'light' | 'dark';
18
- langInstructions?: string;
19
- themeInstructions?: string;
20
- viewports?: Array<{
21
- width: number;
22
- height: number;
23
- }>;
24
- runHints?: AgentRunHint[];
25
- selectorMemory?: Record<string, string[]>;
26
- sessionProfile?: ValidatedSessionProfile;
27
- handoffContext?: CaptureHandoffContext;
28
- variantManifest?: VariantCaptureManifest;
29
- }
30
- export interface IterationUserMessageParams {
31
- userPrompt: string;
32
- cleanScreenshotUrl?: string;
33
- screenshotUrl: string;
34
- /** In dual-model mode, the vision model's text observation replaces images. */
35
- visionObservation?: string;
36
- /** Simplified DOM structure — primary page context in DOM-first mode. */
37
- simplifiedDOM?: string;
38
- /** When true, the DOM has not changed since the previous iteration (fingerprint match).
39
- * The prompt emits a compact placeholder instead of the full DOM. */
40
- domUnchanged?: boolean;
41
- accessibilityTree: string;
42
- interactiveElements: InteractiveElement[];
43
- actionHistory?: ExecutedAction[];
44
- screenshotsTaken: WorkflowScreenshot[];
45
- iteration: number;
46
- maxIterations: number;
47
- credentials?: LoginCredentials;
48
- currentLang?: string;
49
- currentTheme?: 'light' | 'dark';
50
- langInstructions?: string;
51
- themeInstructions?: string;
52
- viewports?: Array<{
53
- width: number;
54
- height: number;
55
- }>;
56
- runHints?: AgentRunHint[];
57
- selectorMemory?: Record<string, string[]>;
58
- sessionProfile?: ValidatedSessionProfile;
59
- hasCredentials?: boolean;
60
- salienceCompressionEnabled?: boolean;
61
- viewport?: {
62
- width: number;
63
- height: number;
64
- };
65
- currentUrl?: string;
66
- stuckLoopWarning?: string;
67
- lastVerificationFailure?: string;
68
- userGuidance?: string[];
69
- scrollInfo?: {
70
- scrollY: number;
71
- scrollHeight: number;
72
- viewportHeight: number;
73
- };
74
- expansionLevel?: number;
75
- /** When true, includes the full task context (prompt, session, memory, instructions).
76
- * When false (subsequent iterations), sends only the compact page observation. */
77
- isFirstIteration?: boolean;
78
- /** Pre-generated action plan from the planning call. Included in first iteration and on stuck loops. */
79
- taskPlan?: string;
80
- /** Persistent notes stored by the agent via the `note` tool. Re-injected every iteration. */
81
- agentNotes?: string[];
82
- /** Current active subgoal name (set by `begin_subgoal` tool). */
83
- currentSubgoal?: string;
84
- /** Completed subgoals with compressed summaries (HiAgent hierarchical memory). */
85
- completedSubgoals?: Array<{
86
- name: string;
87
- summary: string;
88
- }>;
89
- /** Compact deterministic log of all actions taken so far (survives conversation trimming). */
90
- trajectoryLog?: string;
91
- /** Live browser state handed off from the previous capture in the same run. */
92
- handoffContext?: CaptureHandoffContext;
93
- /** Explicit multi-page manifest for the current variant/page. */
94
- variantManifest?: VariantCaptureManifest;
95
- currentObjective?: CaptureObjective;
96
- captureCursor?: CaptureCursor;
97
- activeRepairTicket?: RepairTicket | null;
98
- remainingCaptureQueue?: string[];
99
- cacheLayoutV2?: boolean;
100
- /** Reference from variant 1 — what state was achieved for this page in the first variant. */
101
- variantReference?: {
102
- finalUrl: string;
103
- assessment: string;
104
- pageTitle: string;
105
- /** Full action sequence from variant 1 — helps the LLM retrace the path. */
106
- actions?: ExecutedAction[];
107
- };
108
- /** Compact summary of recent failed actions — prevents the agent from repeating dead-end strategies. */
109
- failedAttemptsSummary?: string;
110
- }
111
- export declare function buildStableAnchorUserMessage(params: StableAnchorUserMessageParams): {
112
- content: ChatCompletionContentPart[];
113
- metrics: IterationPromptMetrics;
114
- };
115
- export declare function buildIterationUserMessage(params: IterationUserMessageParams): {
116
- content: ChatCompletionContentPart[];
117
- metrics: IterationPromptMetrics;
118
- };
119
- export declare function buildVerificationMessage(params: {
120
- userPrompt: string;
121
- screenshotUrl: string;
122
- previousAssessment: string;
123
- runMode?: AgentConfig['runMode'];
124
- currentLang?: string;
125
- currentTheme?: 'light' | 'dark';
126
- pageContext?: {
127
- currentUrl?: string;
128
- pageTitle?: string;
129
- };
130
- runHints?: AgentRunHint[];
131
- variantManifest?: VariantCaptureManifest;
132
- verificationDiagnostics?: {
133
- lang?: string;
134
- theme?: string;
135
- };
136
- identityHints?: string[];
137
- }): ChatCompletionContentPart[];
138
- export declare function buildVisionObserverPrompt(params: {
139
- screenshotUrl: string;
140
- currentUrl: string;
141
- interactiveElements: InteractiveElement[];
142
- userGoal: string;
143
- currentLang?: string;
144
- currentTheme?: 'light' | 'dark';
145
- currentPageId?: string;
146
- pageIdentitySummary?: string;
147
- currentObjective?: string;
148
- }): ChatCompletionContentPart[];
149
- export declare function buildElementSystemPrompt(description: string): string;
150
- export declare function buildElementIterationMessage(params: {
151
- elementName: string;
152
- elementDescription: string;
153
- accessibilityTree: string;
154
- interactiveElements: InteractiveElement[];
155
- simplifiedDOM?: string;
156
- currentUrl: string;
157
- iteration: number;
158
- maxIterations: number;
159
- actionHistory?: string[];
160
- viewport?: {
161
- width: number;
162
- height: number;
163
- };
164
- forbiddenSearchQueries?: string[];
165
- screenshotUrl?: string;
166
- }): ChatCompletionContentPart[];
167
- export {};