opensteer 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,327 @@
1
+ import { BrowserContextOptions } from 'playwright';
2
+
3
+ type MatchOperator = 'exact' | 'startsWith' | 'contains';
4
+ interface AttributeMatchClause {
5
+ kind: 'attr';
6
+ key: string;
7
+ op?: MatchOperator;
8
+ value?: string;
9
+ }
10
+ interface PositionMatchClause {
11
+ kind: 'position';
12
+ axis: 'nthOfType' | 'nthChild';
13
+ }
14
+ type MatchClause = AttributeMatchClause | PositionMatchClause;
15
+ interface PathNodePosition {
16
+ nthChild: number;
17
+ nthOfType: number;
18
+ }
19
+ interface PathNode {
20
+ tag: string;
21
+ attrs: Record<string, string>;
22
+ position: PathNodePosition;
23
+ match: MatchClause[];
24
+ }
25
+ type DomPath = PathNode[];
26
+ interface ContextHop {
27
+ kind: 'iframe' | 'shadow';
28
+ host: DomPath;
29
+ }
30
+ interface ElementPath {
31
+ context: ContextHop[];
32
+ nodes: DomPath;
33
+ }
34
+
35
+ type SnapshotMode = 'action' | 'extraction' | 'clickable' | 'scrollable' | 'full';
36
+ interface SnapshotOptions {
37
+ mode?: SnapshotMode;
38
+ withCounters?: boolean;
39
+ markInteractive?: boolean;
40
+ }
41
+ interface ScreenshotOptions {
42
+ fullPage?: boolean;
43
+ type?: 'png' | 'jpeg';
44
+ /** Ignored for PNG. */
45
+ quality?: number;
46
+ omitBackground?: boolean;
47
+ }
48
+ interface AiResolveArgs {
49
+ html: string;
50
+ action: string;
51
+ description: string;
52
+ url: string | null;
53
+ }
54
+ interface AiResolveResult {
55
+ element?: number;
56
+ selector?: string;
57
+ path?: ElementPath;
58
+ }
59
+ type AiResolveCallbackResult = AiResolveResult | number | string | null | undefined;
60
+ type AiResolveCallback = (args: AiResolveArgs) => Promise<AiResolveCallbackResult>;
61
+ interface AiExtractArgs<TSchema = ExtractSchema> {
62
+ html: string;
63
+ schema: TSchema;
64
+ description?: string;
65
+ prompt?: string;
66
+ url: string | null;
67
+ }
68
+ type AiExtractResult<TData = unknown> = TData | ExtractionPlan | string;
69
+ type AiExtractCallback = <TSchema = ExtractSchema, TData = unknown>(args: AiExtractArgs<TSchema>) => Promise<AiExtractResult<TData>>;
70
+ interface GotoOptions {
71
+ timeout?: number;
72
+ waitUntil?: 'commit' | 'domcontentloaded' | 'load' | 'networkidle';
73
+ settleMs?: number;
74
+ }
75
+ interface LaunchOptions {
76
+ headless?: boolean;
77
+ executablePath?: string;
78
+ slowMo?: number;
79
+ context?: BrowserContextOptions;
80
+ /** Connect to a running browser. Example: "http://localhost:9222" */
81
+ connectUrl?: string;
82
+ /** Browser channel: "chrome", "chrome-beta", or "msedge" */
83
+ channel?: string;
84
+ /** Browser profile directory or Chromium user-data dir. Preserves cookies, extensions, and sessions. */
85
+ profileDir?: string;
86
+ /** Cloud browser profile preference. Applies only when cloud mode is enabled. */
87
+ cloudBrowserProfile?: OpensteerCloudBrowserProfileOptions;
88
+ /** Connection timeout in milliseconds. */
89
+ timeout?: number;
90
+ }
91
+ interface OpensteerBrowserConfig {
92
+ headless?: boolean;
93
+ executablePath?: string;
94
+ slowMo?: number;
95
+ /** Connect to a running browser. Example: "http://localhost:9222" */
96
+ connectUrl?: string;
97
+ /** Browser channel: "chrome", "chrome-beta", or "msedge" */
98
+ channel?: string;
99
+ /** Browser profile directory or Chromium user-data dir. Preserves cookies, extensions, and sessions. */
100
+ profileDir?: string;
101
+ }
102
+ interface OpensteerStorageConfig {
103
+ rootDir?: string;
104
+ }
105
+ interface OpensteerCursorColor {
106
+ r: number;
107
+ g: number;
108
+ b: number;
109
+ a: number;
110
+ }
111
+ interface OpensteerCursorStyle {
112
+ size?: number;
113
+ fillColor?: OpensteerCursorColor;
114
+ outlineColor?: OpensteerCursorColor;
115
+ haloColor?: OpensteerCursorColor;
116
+ pulseScale?: number;
117
+ }
118
+ type OpensteerCursorProfile = 'snappy';
119
+ interface OpensteerCursorConfig {
120
+ enabled?: boolean;
121
+ profile?: OpensteerCursorProfile;
122
+ style?: OpensteerCursorStyle;
123
+ }
124
+ type OpensteerAuthScheme = 'api-key' | 'bearer';
125
+ type OpensteerCloudAnnouncePolicy = 'always' | 'off' | 'tty';
126
+ interface OpensteerCloudBrowserProfileOptions {
127
+ profileId: string;
128
+ reuseIfActive?: boolean;
129
+ }
130
+ interface OpensteerCloudOptions {
131
+ apiKey?: string;
132
+ accessToken?: string;
133
+ baseUrl?: string;
134
+ authScheme?: OpensteerAuthScheme;
135
+ announce?: OpensteerCloudAnnouncePolicy;
136
+ browserProfile?: OpensteerCloudBrowserProfileOptions;
137
+ }
138
+ type OpensteerCloudConfig = boolean | OpensteerCloudOptions;
139
+ interface OpensteerConfig {
140
+ name?: string;
141
+ browser?: OpensteerBrowserConfig;
142
+ storage?: OpensteerStorageConfig;
143
+ cursor?: OpensteerCursorConfig;
144
+ cloud?: OpensteerCloudConfig;
145
+ model?: string;
146
+ debug?: boolean;
147
+ }
148
+ interface ActionWaitOptions {
149
+ enabled?: boolean;
150
+ timeout?: number;
151
+ settleMs?: number;
152
+ networkQuietMs?: number;
153
+ includeNetwork?: boolean;
154
+ }
155
+ interface BaseActionOptions {
156
+ description?: string;
157
+ element?: number;
158
+ selector?: string;
159
+ wait?: false | ActionWaitOptions;
160
+ }
161
+ interface ClickOptions extends BaseActionOptions {
162
+ button?: 'left' | 'right' | 'middle';
163
+ clickCount?: number;
164
+ modifiers?: Array<'Alt' | 'Control' | 'Meta' | 'Shift'>;
165
+ }
166
+ interface HoverOptions extends BaseActionOptions {
167
+ force?: boolean;
168
+ position?: {
169
+ x: number;
170
+ y: number;
171
+ };
172
+ }
173
+ interface InputOptions extends BaseActionOptions {
174
+ text: string;
175
+ clear?: boolean;
176
+ pressEnter?: boolean;
177
+ }
178
+ interface SelectOptions extends BaseActionOptions {
179
+ value?: string;
180
+ label?: string;
181
+ index?: number;
182
+ }
183
+ interface ScrollOptions extends BaseActionOptions {
184
+ direction?: 'up' | 'down' | 'left' | 'right';
185
+ amount?: number;
186
+ }
187
+ interface ExtractSchemaField {
188
+ element?: number;
189
+ selector?: string;
190
+ attribute?: string;
191
+ source?: 'current_url';
192
+ }
193
+ type ExtractSchemaValue = ExtractSchemaField | string | number | boolean | null | ExtractSchema | ExtractSchema[];
194
+ interface ExtractSchema {
195
+ [key: string]: ExtractSchemaValue;
196
+ }
197
+ interface ExtractOptions<TSchema = ExtractSchema> extends BaseActionOptions {
198
+ schema?: TSchema;
199
+ prompt?: string;
200
+ snapshot?: SnapshotOptions;
201
+ }
202
+ interface ExtractionFieldPlan {
203
+ element?: number;
204
+ selector?: string;
205
+ attribute?: string;
206
+ source?: 'current_url';
207
+ }
208
+ interface ExtractionPlan {
209
+ fields?: Record<string, ExtractionFieldPlan>;
210
+ paths?: Record<string, ElementPath>;
211
+ data?: unknown;
212
+ }
213
+ interface ExtractFromPlanOptions<TSchema = ExtractSchema> {
214
+ description?: string;
215
+ schema: TSchema;
216
+ plan: ExtractionPlan;
217
+ }
218
+ interface ActionResult {
219
+ method: string;
220
+ namespace: string;
221
+ persisted: boolean;
222
+ pathFile: string | null;
223
+ selectorUsed?: string | null;
224
+ }
225
+ interface OpensteerCursorState {
226
+ enabled: boolean;
227
+ active: boolean;
228
+ reason?: string;
229
+ }
230
+ interface ExtractionRunResult<T = unknown> {
231
+ namespace: string;
232
+ persisted: boolean;
233
+ pathFile: string | null;
234
+ data: T;
235
+ paths: Record<string, ElementPath>;
236
+ }
237
+ interface StateResult {
238
+ url: string;
239
+ title: string;
240
+ html: string;
241
+ }
242
+ interface TabInfo {
243
+ index: number;
244
+ url: string;
245
+ title: string;
246
+ active: boolean;
247
+ }
248
+ interface CookieParam {
249
+ name: string;
250
+ value: string;
251
+ url?: string;
252
+ domain?: string;
253
+ path?: string;
254
+ expires?: number;
255
+ httpOnly?: boolean;
256
+ secure?: boolean;
257
+ sameSite?: 'Strict' | 'Lax' | 'None';
258
+ }
259
+ interface FileUploadOptions extends BaseActionOptions {
260
+ paths: string[];
261
+ }
262
+ interface BoundingBox {
263
+ x: number;
264
+ y: number;
265
+ width: number;
266
+ height: number;
267
+ }
268
+ type OpensteerAgentMode = 'cua';
269
+ type OpensteerAgentProvider = 'openai' | 'anthropic' | 'google';
270
+ interface OpensteerAgentModelConfig {
271
+ modelName: string;
272
+ apiKey?: string;
273
+ baseUrl?: string;
274
+ organization?: string;
275
+ thinkingBudget?: number;
276
+ environment?: string;
277
+ }
278
+ interface OpensteerAgentConfig {
279
+ mode: OpensteerAgentMode;
280
+ model?: string | OpensteerAgentModelConfig;
281
+ systemPrompt?: string;
282
+ waitBetweenActionsMs?: number;
283
+ }
284
+ interface OpensteerAgentExecuteOptions {
285
+ instruction: string;
286
+ maxSteps?: number;
287
+ highlightCursor?: boolean;
288
+ }
289
+ interface OpensteerAgentUsage {
290
+ inputTokens: number;
291
+ outputTokens: number;
292
+ reasoningTokens?: number;
293
+ inferenceTimeMs: number;
294
+ }
295
+ interface OpensteerAgentAction {
296
+ type: string;
297
+ reasoning?: string;
298
+ button?: string;
299
+ clickCount?: number;
300
+ x?: number;
301
+ y?: number;
302
+ text?: string;
303
+ keys?: string[];
304
+ scrollX?: number;
305
+ scrollY?: number;
306
+ timeMs?: number;
307
+ url?: string;
308
+ path?: Array<{
309
+ x: number;
310
+ y: number;
311
+ }>;
312
+ [key: string]: unknown;
313
+ }
314
+ interface OpensteerAgentResult {
315
+ success: boolean;
316
+ completed: boolean;
317
+ message: string;
318
+ actions: OpensteerAgentAction[];
319
+ usage?: OpensteerAgentUsage;
320
+ provider: OpensteerAgentProvider;
321
+ model: string;
322
+ }
323
+ interface OpensteerAgentInstance {
324
+ execute(instructionOrOptions: string | OpensteerAgentExecuteOptions): Promise<OpensteerAgentResult>;
325
+ }
326
+
327
+ export type { OpensteerBrowserConfig as $, ActionResult as A, BaseActionOptions as B, CookieParam as C, AiResolveArgs as D, ExtractOptions as E, FileUploadOptions as F, GotoOptions as G, HoverOptions as H, InputOptions as I, AiResolveCallbackResult as J, AiResolveResult as K, LaunchOptions as L, AttributeMatchClause as M, ContextHop as N, OpensteerAuthScheme as O, DomPath as P, ExtractSchema as Q, ExtractSchemaField as R, SnapshotOptions as S, TabInfo as T, ExtractSchemaValue as U, ExtractionFieldPlan as V, ExtractionPlan as W, MatchClause as X, MatchOperator as Y, OpensteerAgentMode as Z, OpensteerAgentModelConfig as _, OpensteerConfig as a, OpensteerCloudAnnouncePolicy as a0, OpensteerCloudBrowserProfileOptions as a1, OpensteerCloudConfig as a2, OpensteerCloudOptions as a3, OpensteerCursorColor as a4, OpensteerCursorProfile as a5, OpensteerStorageConfig as a6, PathNode as a7, PathNodePosition as a8, PositionMatchClause as a9, StateResult as b, ScreenshotOptions as c, ClickOptions as d, SelectOptions as e, ScrollOptions as f, BoundingBox as g, ExtractFromPlanOptions as h, ExtractionRunResult as i, OpensteerCursorState as j, OpensteerAgentConfig as k, OpensteerAgentInstance as l, ElementPath as m, SnapshotMode as n, AiResolveCallback as o, AiExtractCallback as p, OpensteerAgentProvider as q, OpensteerAgentAction as r, OpensteerAgentResult as s, OpensteerAgentUsage as t, OpensteerCursorStyle as u, OpensteerCursorConfig as v, OpensteerAgentExecuteOptions as w, ActionWaitOptions as x, AiExtractArgs as y, AiExtractResult as z };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opensteer",
3
- "version": "0.6.0",
3
+ "version": "0.6.2",
4
4
  "description": "Open-source browser automation SDK and CLI that lets AI agents build complex scrapers directly in your codebase.",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -36,6 +36,7 @@
36
36
  "ai": "^6.0.77",
37
37
  "cheerio": "^1.0.0-rc.12",
38
38
  "dotenv": "^17.2.4",
39
+ "open": "^11.0.0",
39
40
  "openai": "^6.25.0",
40
41
  "playwright": "^1.50.0",
41
42
  "skills": "1.4.3",
@@ -70,7 +71,7 @@
70
71
  "url": "https://github.com/steerlabs/opensteer/issues"
71
72
  },
72
73
  "scripts": {
73
- "build": "tsup src/index.ts src/cli/server.ts src/cli/skills-installer.ts --dts --format esm,cjs --clean --external ai --external zod --external @ai-sdk/openai --external @ai-sdk/anthropic --external @ai-sdk/google --external @ai-sdk/xai --external @ai-sdk/groq --external openai --external @anthropic-ai/sdk --external @google/genai",
74
+ "build": "tsup src/index.ts src/cli/server.ts src/cli/skills-installer.ts src/cli/profile.ts src/cli/auth.ts --dts --format esm,cjs --clean --external ai --external zod --external @ai-sdk/openai --external @ai-sdk/anthropic --external @ai-sdk/google --external @ai-sdk/xai --external @ai-sdk/groq --external openai --external @anthropic-ai/sdk --external @google/genai",
74
75
  "test": "vitest run",
75
76
  "test:live-web": "vitest run --config vitest.live-web.config.ts",
76
77
  "test:unit": "vitest run tests/html tests/element-path tests/config.test.ts tests/storage",
@@ -51,12 +51,22 @@ opensteer click 3 --description "the products link"
51
51
  opensteer input 5 "laptop" --pressEnter --description "the search input"
52
52
  ```
53
53
 
54
- For data: take an extraction snapshot, identify counter numbers for each field, then run `extract` with a schema and `--description`. For arrays, include at least 2 items so Opensteer infers the repeating pattern.
54
+ For data, the agent must define the extraction object from the snapshot.
55
+
56
+ - First run `opensteer snapshot extraction` and inspect the counters.
57
+ - Decide the exact JSON object the task needs.
58
+ - Treat the extraction snapshot as a planning aid only. It is trimmed/filtered, so do not read final values from the snapshot HTML itself.
59
+ - Build the full `extract` schema yourself so every leaf field is explicitly bound with `{ element: N }`, `{ element: N, attribute: "..." }`, or `{ source: "current_url" }`.
60
+ - Always call `extract` to read the actual field values from the live page/runtime DOM.
61
+ - Use `--description` only to cache that extraction for replay. Do not rely on `--description` to tell Opensteer what data to collect.
62
+ - For arrays, include at least 2 representative items so Opensteer infers the repeating pattern.
63
+ - Do not replace `extract` with custom DOM parsing when the desired output can be expressed as a structured object.
55
64
 
56
65
  ```bash
57
66
  opensteer snapshot extraction
58
- opensteer extract '{"products":[{"name":{"element":11},"price":{"element":12}},{"name":{"element":25},"price":{"element":26}}]}' \
59
- --description "product listing"
67
+ # Decide the full output object first, then bind every leaf field explicitly
68
+ opensteer extract '{"images":[{"imageUrl":{"element":11,"attribute":"src"},"alt":{"element":11,"attribute":"alt"},"caption":{"element":14},"credit":{"element":15}},{"imageUrl":{"element":24,"attribute":"src"},"alt":{"element":24,"attribute":"alt"},"caption":{"element":27},"credit":{"element":28}}]}' \
69
+ --description "article images with captions and credits"
60
70
  ```
61
71
 
62
72
  Repeat Step 3 → Step 4 for every distinct page type the scraper will visit.
@@ -71,7 +81,7 @@ opensteer close
71
81
 
72
82
  ## Phase 2 — SDK Scraper Script
73
83
 
74
- Use cached `description` strings (exact match to CLI `--description` values). `name` must match `--name` from Phase 1.
84
+ Use cached `description` strings (exact match to CLI `--description` values) only after Phase 1 has already established the exact extraction schema from `snapshot extraction`. `name` must match `--name` from Phase 1.
75
85
 
76
86
  ```typescript
77
87
  import { Opensteer } from "opensteer";
@@ -115,7 +125,26 @@ await opensteer.select({ description: "...", label: "Option A" });
115
125
  await opensteer.scroll({ direction: "down", amount: 500 });
116
126
 
117
127
  await opensteer.extract({ description: "..." }); // replay from cache
118
- await opensteer.extract({ schema: { title: { element: 3 } }, description: "..." }); // first cache
128
+ await opensteer.extract({ schema: { title: { element: 3 } }, description: "..." }); // explicit first cache
129
+ await opensteer.extract({
130
+ description: "article images with captions and credits",
131
+ schema: {
132
+ images: [
133
+ {
134
+ imageUrl: { element: 11, attribute: "src" },
135
+ alt: { element: 11, attribute: "alt" },
136
+ caption: { element: 14 },
137
+ credit: { element: 15 },
138
+ },
139
+ {
140
+ imageUrl: { element: 24, attribute: "src" },
141
+ alt: { element: 24, attribute: "alt" },
142
+ caption: { element: 27 },
143
+ credit: { element: 28 },
144
+ },
145
+ ],
146
+ },
147
+ }); // first extraction run: agent defines the full object from the snapshot
119
148
 
120
149
  await opensteer.waitForText("literal text");
121
150
  await opensteer.page.waitForSelector("css-selector"); // SPA content guard
@@ -136,19 +136,21 @@ opensteer wait-selector "h1" # Wait for selector to appea
136
136
 
137
137
  ```bash
138
138
  opensteer snapshot extraction
139
- # Read counters from output, then:
139
+ # `schema-json` describes the output shape. It can use explicit bindings or semantic placeholders.
140
+
141
+ # Explicit field bindings from observed counters/attributes:
140
142
  opensteer extract '{"title":{"element":3},"price":{"element":7}}'
141
143
  opensteer extract '{"url":{"element":5,"attribute":"href"}}'
142
144
  opensteer extract '{"pageUrl":{"source":"current_url"},"title":{"element":3}}'
143
145
 
144
- # Arrays: include multiple items to identify the pattern
146
+ # Explicit array bindings: include multiple items to identify the repeating pattern
145
147
  opensteer extract '{"results":[{"title":{"element":11},"url":{"element":10,"attribute":"href"}},{"title":{"element":16},"url":{"element":15,"attribute":"href"}}]}'
146
- ```
147
148
 
148
- ### AI-based (limited and requires LLM API keys)
149
-
150
- ```bash
151
- opensteer extract '{"title":"","price":""}' --description "product details"
149
+ # Semantic extraction: use the output shape plus description/prompt
150
+ opensteer extract '{"title":"string","price":"string"}' --description "product details"
151
+ opensteer extract '{"images":[{"imageUrl":"string","alt":"string","caption":"string","credit":"string"}]}' \
152
+ --description "article images with captions and credits" \
153
+ --prompt "For each image, return the image URL, alt text, caption, and credit. Prefer caption and credit from the same figure. If missing, look at sibling text, then parent/container text, then nearby alt/data-* attributes."
152
154
  ```
153
155
 
154
- Always prefer counter-based. AI extraction requires `@ai-sdk/*` packages and does NOT work from workspace root scripts.
156
+ Use explicit bindings when you need deterministic element-to-field mappings. Use semantic extraction when the fields require relationship inference or fallback rules. `--prompt` is the place to describe those rules.
@@ -64,16 +64,27 @@ const data = await opensteer.extract({
64
64
  description: "product details",
65
65
  });
66
66
 
67
- // Counter-based (during exploration or when no cache exists)
67
+ // Semantic extraction: schema is the output shape
68
+ const images = await opensteer.extract({
69
+ description: "article images with captions and credits",
70
+ prompt: "For each image, return the image URL, alt text, caption, and credit. Prefer caption and credit from the same figure. If missing, look at sibling text, then parent/container text, then nearby alt/data-* attributes.",
71
+ schema: {
72
+ images: [{ imageUrl: "string", alt: "string", caption: "string", credit: "string" }],
73
+ },
74
+ });
75
+
76
+ // Explicit bindings (during exploration or when no cache exists)
68
77
  const data = await opensteer.extract({
69
78
  schema: { title: { element: 3 }, price: { element: 7 } },
70
79
  description: "product details",
71
80
  });
72
81
  ```
73
82
 
74
- Schema field types: `{ element: N }`, `{ element: N, attribute: "href" }`, `{ selector: ".price" }`, `{ source: "current_url" }`.
83
+ `schema` describes the output shape, not just selector config. It can use semantic placeholders like `"string"` and arrays of objects, or explicit field bindings such as `{ element: N }`, `{ element: N, attribute: "href" }`, `{ selector: ".price" }`, and `{ source: "current_url" }`.
84
+
85
+ Use `prompt` to describe relationship/fallback rules, such as matching each image to its caption and credit.
75
86
 
76
- For arrays, include multiple items in the schema. Opensteer caches the structural pattern and expands to all matching items on replay.
87
+ For explicit array bindings, include multiple items in the schema so Opensteer can infer the repeating pattern. For semantic extraction, a single representative object shape is enough.
77
88
 
78
89
  ## Keyboard
79
90