opensteer 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,8 +1,10 @@
1
+ import {
2
+ BrowserProfileClient
3
+ } from "./chunk-WJI7TGBQ.js";
1
4
  import {
2
5
  ActionWsClient,
3
- CloudCdpClient,
4
- CloudSessionClient,
5
6
  CounterResolutionError,
7
+ CursorController,
6
8
  ElementPathError,
7
9
  LocalSelectorStorage,
8
10
  OPENSTEER_HIDDEN_ATTR,
@@ -22,8 +24,8 @@ import {
22
24
  OpensteerAgentError,
23
25
  OpensteerAgentExecutionError,
24
26
  OpensteerAgentProviderError,
25
- OpensteerCloudError,
26
27
  OpensteerCuaAgentHandler,
28
+ SvgCursorRenderer,
27
29
  buildArrayFieldPathCandidates,
28
30
  buildElementPathFromHandle,
29
31
  buildElementPathFromSelector,
@@ -36,9 +38,7 @@ import {
36
38
  clearCookies,
37
39
  cloneElementPath,
38
40
  closeTab,
39
- cloudNotLaunchedError,
40
41
  cloudSessionContractVersion,
41
- cloudUnsupportedMethodError,
42
42
  collectLocalSelectorCacheEntries,
43
43
  countArrayItemsWithPath,
44
44
  createCuaClient,
@@ -58,13 +58,13 @@ import {
58
58
  importCookies,
59
59
  listTabs,
60
60
  markInteractiveElements,
61
- normalizeNamespace,
62
61
  performClick,
63
62
  performFileUpload,
64
63
  performHover,
65
64
  performInput,
66
65
  performScroll,
67
66
  performSelect,
67
+ planSnappyCursorMotion,
68
68
  prepareSnapshot,
69
69
  pressKey,
70
70
  queryAllByElementPath,
@@ -72,14 +72,22 @@ import {
72
72
  resolveCounterElement,
73
73
  resolveCountersBatch,
74
74
  resolveElementPath,
75
- resolveNamespaceDir,
76
75
  sanitizeElementPath,
77
76
  serializePageHTML,
78
77
  setCookie,
79
78
  switchTab,
80
79
  typeText,
81
80
  waitForVisualStability
82
- } from "./chunk-SGZYTGY3.js";
81
+ } from "./chunk-F2VDVOJO.js";
82
+ import {
83
+ CloudCdpClient,
84
+ CloudSessionClient,
85
+ OpensteerCloudError,
86
+ cloudNotLaunchedError,
87
+ cloudUnsupportedMethodError,
88
+ normalizeNamespace,
89
+ resolveNamespaceDir
90
+ } from "./chunk-WDRMHPWL.js";
83
91
  import {
84
92
  createResolveCallback
85
93
  } from "./chunk-DN3GI5CH.js";
@@ -90,11 +98,196 @@ import "./chunk-3H5RRIMZ.js";
90
98
  import {
91
99
  getModelProvider
92
100
  } from "./chunk-FAHE5DB2.js";
101
+
102
+ // src/cursor/renderers/cdp-overlay.ts
103
+ var PULSE_DELAY_MS = 30;
104
+ var CdpOverlayCursorRenderer = class {
105
+ page = null;
106
+ session = null;
107
+ active = false;
108
+ reason = "disabled";
109
+ lastMessage;
110
+ lastPoint = null;
111
+ async initialize(page) {
112
+ this.page = page;
113
+ if (page.isClosed()) {
114
+ this.markInactive("page_closed");
115
+ return;
116
+ }
117
+ await this.createSession();
118
+ }
119
+ isActive() {
120
+ return this.active;
121
+ }
122
+ status() {
123
+ return {
124
+ enabled: true,
125
+ active: this.active,
126
+ reason: this.reason ? this.lastMessage ? `${this.reason}: ${this.lastMessage}` : this.reason : void 0
127
+ };
128
+ }
129
+ async move(point, style) {
130
+ await this.sendWithRecovery(async (session) => {
131
+ await session.send("Overlay.highlightQuad", {
132
+ quad: buildCursorQuad(point, style.size),
133
+ color: toProtocolRgba(style.fillColor),
134
+ outlineColor: toProtocolRgba(style.outlineColor)
135
+ });
136
+ });
137
+ this.lastPoint = point;
138
+ }
139
+ async pulse(point, style) {
140
+ const pulseSize = style.size * style.pulseScale;
141
+ const pulseFill = {
142
+ ...style.fillColor,
143
+ a: Math.min(1, style.fillColor.a * 0.14)
144
+ };
145
+ const pulseOutline = {
146
+ ...style.haloColor,
147
+ a: Math.min(1, style.haloColor.a * 0.9)
148
+ };
149
+ await this.sendWithRecovery(async (session) => {
150
+ await session.send("Overlay.highlightQuad", {
151
+ quad: buildCursorQuad(point, pulseSize),
152
+ color: toProtocolRgba(pulseFill),
153
+ outlineColor: toProtocolRgba(pulseOutline)
154
+ });
155
+ });
156
+ await sleep(PULSE_DELAY_MS);
157
+ await this.move(point, style);
158
+ }
159
+ async clear() {
160
+ if (!this.session) return;
161
+ try {
162
+ await this.session.send("Overlay.hideHighlight");
163
+ } catch {
164
+ this.markInactive("cdp_detached");
165
+ }
166
+ }
167
+ async dispose() {
168
+ await this.cleanupSession();
169
+ this.active = false;
170
+ this.reason = "disabled";
171
+ this.lastMessage = void 0;
172
+ this.lastPoint = null;
173
+ this.page = null;
174
+ }
175
+ async sendWithRecovery(operation) {
176
+ if (!this.active || !this.session) return;
177
+ try {
178
+ await operation(this.session);
179
+ } catch (error) {
180
+ const message = error instanceof Error ? error.message : String(error);
181
+ this.lastMessage = message;
182
+ if (!isRecoverableProtocolError(message) || !this.page) {
183
+ this.markInactive("renderer_error", message);
184
+ return;
185
+ }
186
+ await this.createSession();
187
+ if (!this.active || !this.session) {
188
+ return;
189
+ }
190
+ try {
191
+ await operation(this.session);
192
+ } catch (retryError) {
193
+ const retryMessage = retryError instanceof Error ? retryError.message : String(retryError);
194
+ this.markInactive("renderer_error", retryMessage);
195
+ }
196
+ }
197
+ }
198
+ async createSession() {
199
+ if (!this.page || this.page.isClosed()) {
200
+ this.markInactive("page_closed");
201
+ return;
202
+ }
203
+ await this.cleanupSession();
204
+ try {
205
+ const session = await this.page.context().newCDPSession(this.page);
206
+ await session.send("DOM.enable");
207
+ await session.send("Overlay.enable");
208
+ this.session = session;
209
+ this.active = true;
210
+ this.reason = void 0;
211
+ this.lastMessage = void 0;
212
+ } catch (error) {
213
+ const message = error instanceof Error ? error.message : String(error);
214
+ this.markInactive(inferSetupReason(message), message);
215
+ await this.cleanupSession();
216
+ }
217
+ }
218
+ async cleanupSession() {
219
+ const session = this.session;
220
+ this.session = null;
221
+ if (!session) return;
222
+ try {
223
+ await session.detach();
224
+ } catch {
225
+ }
226
+ }
227
+ markInactive(reason, message) {
228
+ this.active = false;
229
+ this.reason = reason;
230
+ this.lastMessage = message;
231
+ }
232
+ };
233
+ function buildCursorQuad(point, size) {
234
+ const x = point.x;
235
+ const y = point.y;
236
+ return [
237
+ // Point 0: Tip (the hotspot)
238
+ roundPointValue(x),
239
+ roundPointValue(y),
240
+ // Point 1: Right shoulder — extends right and down
241
+ roundPointValue(x + size * 0.45),
242
+ roundPointValue(y + size * 0.78),
243
+ // Point 2: Tail — bottom of the cursor shaft
244
+ roundPointValue(x + size * 0.12),
245
+ roundPointValue(y + size * 1.3),
246
+ // Point 3: Left edge — stays close to the shaft
247
+ roundPointValue(x - size * 0.04),
248
+ roundPointValue(y + size * 0.62)
249
+ ];
250
+ }
251
+ function inferSetupReason(message) {
252
+ const lowered = message.toLowerCase();
253
+ if (lowered.includes("not supported") || lowered.includes("only supported") || lowered.includes("unknown command")) {
254
+ return "unsupported";
255
+ }
256
+ return "cdp_unavailable";
257
+ }
258
+ function isRecoverableProtocolError(message) {
259
+ const lowered = message.toLowerCase();
260
+ return lowered.includes("session closed") || lowered.includes("target closed") || lowered.includes("has been closed") || lowered.includes("detached");
261
+ }
262
+ function toProtocolRgba(color) {
263
+ return {
264
+ r: clampColor(color.r),
265
+ g: clampColor(color.g),
266
+ b: clampColor(color.b),
267
+ a: clampAlpha(color.a)
268
+ };
269
+ }
270
+ function clampColor(value) {
271
+ return Math.min(255, Math.max(0, Math.round(value)));
272
+ }
273
+ function clampAlpha(value) {
274
+ const normalized = Number.isFinite(value) ? value : 1;
275
+ return Math.min(1, Math.max(0, normalized));
276
+ }
277
+ function roundPointValue(value) {
278
+ return Math.round(value * 100) / 100;
279
+ }
280
+ function sleep(ms) {
281
+ return new Promise((resolve) => setTimeout(resolve, ms));
282
+ }
93
283
  export {
94
284
  ActionWsClient,
285
+ BrowserProfileClient,
286
+ CdpOverlayCursorRenderer,
95
287
  CloudCdpClient,
96
288
  CloudSessionClient,
97
289
  CounterResolutionError,
290
+ CursorController,
98
291
  ElementPathError,
99
292
  LocalSelectorStorage,
100
293
  OPENSTEER_HIDDEN_ATTR,
@@ -116,6 +309,7 @@ export {
116
309
  OpensteerAgentProviderError,
117
310
  OpensteerCloudError,
118
311
  OpensteerCuaAgentHandler,
312
+ SvgCursorRenderer,
119
313
  buildArrayFieldPathCandidates,
120
314
  buildElementPathFromHandle,
121
315
  buildElementPathFromSelector,
@@ -160,6 +354,7 @@ export {
160
354
  performInput,
161
355
  performScroll,
162
356
  performSelect,
357
+ planSnappyCursorMotion,
163
358
  prepareSnapshot,
164
359
  pressKey,
165
360
  queryAllByElementPath,
@@ -0,0 +1,327 @@
1
+ import { BrowserContextOptions } from 'playwright';
2
+
3
+ type MatchOperator = 'exact' | 'startsWith' | 'contains';
4
+ interface AttributeMatchClause {
5
+ kind: 'attr';
6
+ key: string;
7
+ op?: MatchOperator;
8
+ value?: string;
9
+ }
10
+ interface PositionMatchClause {
11
+ kind: 'position';
12
+ axis: 'nthOfType' | 'nthChild';
13
+ }
14
+ type MatchClause = AttributeMatchClause | PositionMatchClause;
15
+ interface PathNodePosition {
16
+ nthChild: number;
17
+ nthOfType: number;
18
+ }
19
+ interface PathNode {
20
+ tag: string;
21
+ attrs: Record<string, string>;
22
+ position: PathNodePosition;
23
+ match: MatchClause[];
24
+ }
25
+ type DomPath = PathNode[];
26
+ interface ContextHop {
27
+ kind: 'iframe' | 'shadow';
28
+ host: DomPath;
29
+ }
30
+ interface ElementPath {
31
+ context: ContextHop[];
32
+ nodes: DomPath;
33
+ }
34
+
35
+ type SnapshotMode = 'action' | 'extraction' | 'clickable' | 'scrollable' | 'full';
36
+ interface SnapshotOptions {
37
+ mode?: SnapshotMode;
38
+ withCounters?: boolean;
39
+ markInteractive?: boolean;
40
+ }
41
+ interface ScreenshotOptions {
42
+ fullPage?: boolean;
43
+ type?: 'png' | 'jpeg';
44
+ /** Ignored for PNG. */
45
+ quality?: number;
46
+ omitBackground?: boolean;
47
+ }
48
+ interface AiResolveArgs {
49
+ html: string;
50
+ action: string;
51
+ description: string;
52
+ url: string | null;
53
+ }
54
+ interface AiResolveResult {
55
+ element?: number;
56
+ selector?: string;
57
+ path?: ElementPath;
58
+ }
59
+ type AiResolveCallbackResult = AiResolveResult | number | string | null | undefined;
60
+ type AiResolveCallback = (args: AiResolveArgs) => Promise<AiResolveCallbackResult>;
61
+ interface AiExtractArgs<TSchema = ExtractSchema> {
62
+ html: string;
63
+ schema: TSchema;
64
+ description?: string;
65
+ prompt?: string;
66
+ url: string | null;
67
+ }
68
+ type AiExtractResult<TData = unknown> = TData | ExtractionPlan | string;
69
+ type AiExtractCallback = <TSchema = ExtractSchema, TData = unknown>(args: AiExtractArgs<TSchema>) => Promise<AiExtractResult<TData>>;
70
+ interface GotoOptions {
71
+ timeout?: number;
72
+ waitUntil?: 'commit' | 'domcontentloaded' | 'load' | 'networkidle';
73
+ settleMs?: number;
74
+ }
75
+ interface LaunchOptions {
76
+ headless?: boolean;
77
+ executablePath?: string;
78
+ slowMo?: number;
79
+ context?: BrowserContextOptions;
80
+ /** Connect to a running browser. Example: "http://localhost:9222" */
81
+ connectUrl?: string;
82
+ /** Browser channel: "chrome", "chrome-beta", or "msedge" */
83
+ channel?: string;
84
+ /** Browser profile directory or Chromium user-data dir. Preserves cookies, extensions, and sessions. */
85
+ profileDir?: string;
86
+ /** Cloud browser profile preference. Applies only when cloud mode is enabled. */
87
+ cloudBrowserProfile?: OpensteerCloudBrowserProfileOptions;
88
+ /** Connection timeout in milliseconds. */
89
+ timeout?: number;
90
+ }
91
+ interface OpensteerBrowserConfig {
92
+ headless?: boolean;
93
+ executablePath?: string;
94
+ slowMo?: number;
95
+ /** Connect to a running browser. Example: "http://localhost:9222" */
96
+ connectUrl?: string;
97
+ /** Browser channel: "chrome", "chrome-beta", or "msedge" */
98
+ channel?: string;
99
+ /** Browser profile directory or Chromium user-data dir. Preserves cookies, extensions, and sessions. */
100
+ profileDir?: string;
101
+ }
102
+ interface OpensteerStorageConfig {
103
+ rootDir?: string;
104
+ }
105
+ interface OpensteerCursorColor {
106
+ r: number;
107
+ g: number;
108
+ b: number;
109
+ a: number;
110
+ }
111
+ interface OpensteerCursorStyle {
112
+ size?: number;
113
+ fillColor?: OpensteerCursorColor;
114
+ outlineColor?: OpensteerCursorColor;
115
+ haloColor?: OpensteerCursorColor;
116
+ pulseScale?: number;
117
+ }
118
+ type OpensteerCursorProfile = 'snappy';
119
+ interface OpensteerCursorConfig {
120
+ enabled?: boolean;
121
+ profile?: OpensteerCursorProfile;
122
+ style?: OpensteerCursorStyle;
123
+ }
124
+ type OpensteerAuthScheme = 'api-key' | 'bearer';
125
+ type OpensteerCloudAnnouncePolicy = 'always' | 'off' | 'tty';
126
+ interface OpensteerCloudBrowserProfileOptions {
127
+ profileId: string;
128
+ reuseIfActive?: boolean;
129
+ }
130
+ interface OpensteerCloudOptions {
131
+ apiKey?: string;
132
+ accessToken?: string;
133
+ baseUrl?: string;
134
+ authScheme?: OpensteerAuthScheme;
135
+ announce?: OpensteerCloudAnnouncePolicy;
136
+ browserProfile?: OpensteerCloudBrowserProfileOptions;
137
+ }
138
+ type OpensteerCloudConfig = boolean | OpensteerCloudOptions;
139
+ interface OpensteerConfig {
140
+ name?: string;
141
+ browser?: OpensteerBrowserConfig;
142
+ storage?: OpensteerStorageConfig;
143
+ cursor?: OpensteerCursorConfig;
144
+ cloud?: OpensteerCloudConfig;
145
+ model?: string;
146
+ debug?: boolean;
147
+ }
148
+ interface ActionWaitOptions {
149
+ enabled?: boolean;
150
+ timeout?: number;
151
+ settleMs?: number;
152
+ networkQuietMs?: number;
153
+ includeNetwork?: boolean;
154
+ }
155
+ interface BaseActionOptions {
156
+ description?: string;
157
+ element?: number;
158
+ selector?: string;
159
+ wait?: false | ActionWaitOptions;
160
+ }
161
+ interface ClickOptions extends BaseActionOptions {
162
+ button?: 'left' | 'right' | 'middle';
163
+ clickCount?: number;
164
+ modifiers?: Array<'Alt' | 'Control' | 'Meta' | 'Shift'>;
165
+ }
166
+ interface HoverOptions extends BaseActionOptions {
167
+ force?: boolean;
168
+ position?: {
169
+ x: number;
170
+ y: number;
171
+ };
172
+ }
173
+ interface InputOptions extends BaseActionOptions {
174
+ text: string;
175
+ clear?: boolean;
176
+ pressEnter?: boolean;
177
+ }
178
+ interface SelectOptions extends BaseActionOptions {
179
+ value?: string;
180
+ label?: string;
181
+ index?: number;
182
+ }
183
+ interface ScrollOptions extends BaseActionOptions {
184
+ direction?: 'up' | 'down' | 'left' | 'right';
185
+ amount?: number;
186
+ }
187
+ interface ExtractSchemaField {
188
+ element?: number;
189
+ selector?: string;
190
+ attribute?: string;
191
+ source?: 'current_url';
192
+ }
193
+ type ExtractSchemaValue = ExtractSchemaField | string | number | boolean | null | ExtractSchema | ExtractSchema[];
194
+ interface ExtractSchema {
195
+ [key: string]: ExtractSchemaValue;
196
+ }
197
+ interface ExtractOptions<TSchema = ExtractSchema> extends BaseActionOptions {
198
+ schema?: TSchema;
199
+ prompt?: string;
200
+ snapshot?: SnapshotOptions;
201
+ }
202
+ interface ExtractionFieldPlan {
203
+ element?: number;
204
+ selector?: string;
205
+ attribute?: string;
206
+ source?: 'current_url';
207
+ }
208
+ interface ExtractionPlan {
209
+ fields?: Record<string, ExtractionFieldPlan>;
210
+ paths?: Record<string, ElementPath>;
211
+ data?: unknown;
212
+ }
213
+ interface ExtractFromPlanOptions<TSchema = ExtractSchema> {
214
+ description?: string;
215
+ schema: TSchema;
216
+ plan: ExtractionPlan;
217
+ }
218
+ interface ActionResult {
219
+ method: string;
220
+ namespace: string;
221
+ persisted: boolean;
222
+ pathFile: string | null;
223
+ selectorUsed?: string | null;
224
+ }
225
+ interface OpensteerCursorState {
226
+ enabled: boolean;
227
+ active: boolean;
228
+ reason?: string;
229
+ }
230
+ interface ExtractionRunResult<T = unknown> {
231
+ namespace: string;
232
+ persisted: boolean;
233
+ pathFile: string | null;
234
+ data: T;
235
+ paths: Record<string, ElementPath>;
236
+ }
237
+ interface StateResult {
238
+ url: string;
239
+ title: string;
240
+ html: string;
241
+ }
242
+ interface TabInfo {
243
+ index: number;
244
+ url: string;
245
+ title: string;
246
+ active: boolean;
247
+ }
248
+ interface CookieParam {
249
+ name: string;
250
+ value: string;
251
+ url?: string;
252
+ domain?: string;
253
+ path?: string;
254
+ expires?: number;
255
+ httpOnly?: boolean;
256
+ secure?: boolean;
257
+ sameSite?: 'Strict' | 'Lax' | 'None';
258
+ }
259
+ interface FileUploadOptions extends BaseActionOptions {
260
+ paths: string[];
261
+ }
262
+ interface BoundingBox {
263
+ x: number;
264
+ y: number;
265
+ width: number;
266
+ height: number;
267
+ }
268
+ type OpensteerAgentMode = 'cua';
269
+ type OpensteerAgentProvider = 'openai' | 'anthropic' | 'google';
270
+ interface OpensteerAgentModelConfig {
271
+ modelName: string;
272
+ apiKey?: string;
273
+ baseUrl?: string;
274
+ organization?: string;
275
+ thinkingBudget?: number;
276
+ environment?: string;
277
+ }
278
+ interface OpensteerAgentConfig {
279
+ mode: OpensteerAgentMode;
280
+ model?: string | OpensteerAgentModelConfig;
281
+ systemPrompt?: string;
282
+ waitBetweenActionsMs?: number;
283
+ }
284
+ interface OpensteerAgentExecuteOptions {
285
+ instruction: string;
286
+ maxSteps?: number;
287
+ highlightCursor?: boolean;
288
+ }
289
+ interface OpensteerAgentUsage {
290
+ inputTokens: number;
291
+ outputTokens: number;
292
+ reasoningTokens?: number;
293
+ inferenceTimeMs: number;
294
+ }
295
+ interface OpensteerAgentAction {
296
+ type: string;
297
+ reasoning?: string;
298
+ button?: string;
299
+ clickCount?: number;
300
+ x?: number;
301
+ y?: number;
302
+ text?: string;
303
+ keys?: string[];
304
+ scrollX?: number;
305
+ scrollY?: number;
306
+ timeMs?: number;
307
+ url?: string;
308
+ path?: Array<{
309
+ x: number;
310
+ y: number;
311
+ }>;
312
+ [key: string]: unknown;
313
+ }
314
+ interface OpensteerAgentResult {
315
+ success: boolean;
316
+ completed: boolean;
317
+ message: string;
318
+ actions: OpensteerAgentAction[];
319
+ usage?: OpensteerAgentUsage;
320
+ provider: OpensteerAgentProvider;
321
+ model: string;
322
+ }
323
+ interface OpensteerAgentInstance {
324
+ execute(instructionOrOptions: string | OpensteerAgentExecuteOptions): Promise<OpensteerAgentResult>;
325
+ }
326
+
327
+ export type { OpensteerBrowserConfig as $, ActionResult as A, BaseActionOptions as B, CookieParam as C, AiResolveArgs as D, ExtractOptions as E, FileUploadOptions as F, GotoOptions as G, HoverOptions as H, InputOptions as I, AiResolveCallbackResult as J, AiResolveResult as K, LaunchOptions as L, AttributeMatchClause as M, ContextHop as N, OpensteerAuthScheme as O, DomPath as P, ExtractSchema as Q, ExtractSchemaField as R, SnapshotOptions as S, TabInfo as T, ExtractSchemaValue as U, ExtractionFieldPlan as V, ExtractionPlan as W, MatchClause as X, MatchOperator as Y, OpensteerAgentMode as Z, OpensteerAgentModelConfig as _, OpensteerConfig as a, OpensteerCloudAnnouncePolicy as a0, OpensteerCloudBrowserProfileOptions as a1, OpensteerCloudConfig as a2, OpensteerCloudOptions as a3, OpensteerCursorColor as a4, OpensteerCursorProfile as a5, OpensteerStorageConfig as a6, PathNode as a7, PathNodePosition as a8, PositionMatchClause as a9, StateResult as b, ScreenshotOptions as c, ClickOptions as d, SelectOptions as e, ScrollOptions as f, BoundingBox as g, ExtractFromPlanOptions as h, ExtractionRunResult as i, OpensteerCursorState as j, OpensteerAgentConfig as k, OpensteerAgentInstance as l, ElementPath as m, SnapshotMode as n, AiResolveCallback as o, AiExtractCallback as p, OpensteerAgentProvider as q, OpensteerAgentAction as r, OpensteerAgentResult as s, OpensteerAgentUsage as t, OpensteerCursorStyle as u, OpensteerCursorConfig as v, OpensteerAgentExecuteOptions as w, ActionWaitOptions as x, AiExtractArgs as y, AiExtractResult as z };