opensteer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,27 @@
1
+ # Changelog
2
+
3
+ ## Unreleased
4
+
5
+ - Breaking: removed legacy `ai` config from `OpensteerConfig`; use top-level `model` instead.
6
+ - Breaking: `OPENSTEER_AI_MODEL` is no longer supported; use `OPENSTEER_MODEL`.
7
+ - Breaking: `OPENSTEER_RUNTIME` is no longer supported; use `OPENSTEER_MODE`.
8
+ - Breaking: mode selection now uses `mode: 'local' | 'remote'` and remote credentials use `remote.apiKey`.
9
+ - Opensteer now enables built-in LLM resolve/extract by default with model `gpt-5.1`.
10
+ - Remote mode now falls back to `OPENSTEER_API_KEY` when `remote.apiKey` is omitted.
11
+ - Mutating actions now include smart best-effort post-action wait with per-action
12
+ profiles and optional per-call overrides via `wait`.
13
+ - Added structured interaction diagnostics via `OpensteerActionError` for
14
+ descriptor-aware interaction methods (`click`, `dblclick`, `rightclick`,
15
+ `hover`, `input`, `select`, `scroll`, `uploadFile`).
16
+ - Added `ActionFailure` types (`ActionFailureCode`, `retryable`,
17
+ `classificationSource`, optional `details`) to support programmatic handling
18
+ of action failures.
19
+ - Added DOM actionability probe + Playwright call-log classification to report
20
+ reasons like `BLOCKED_BY_INTERCEPTOR`, `NOT_VISIBLE`, `NOT_EDITABLE`, and
21
+ timeout/stale-target cases more accurately.
22
+ - Remote action failures now accept optional structured failure details and map
23
+ them to `OpensteerActionError` when available.
24
+
25
+ ## 0.1.0
26
+
27
+ - Initial open-source release.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 TrendUp AI, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,156 @@
1
+ # Opensteer
2
+
3
+ Lean browser automation SDK for coding agents and script replay.
4
+
5
+ `opensteer` provides descriptor-aware actions (`click`, `dblclick`,
6
+ `rightclick`, `hover`, `input`, `select`, `scroll`, `extract`,
7
+ `extractFromPlan`, `uploadFile`), observation (`snapshot`, `state`,
8
+ `screenshot`), navigation (`goto`), and convenience methods for tabs, cookies,
9
+ keyboard, element info, and wait.
10
+
11
+ For anything not covered, use raw Playwright via `opensteer.page` and
12
+ `opensteer.context`.
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ # npm
18
+ npm install opensteer playwright
19
+ # pnpm
20
+ pnpm add opensteer playwright
21
+ ```
22
+
23
+ ## Quickstart
24
+
25
+ ```ts
26
+ import { Opensteer } from "opensteer";
27
+
28
+ const opensteer = new Opensteer({ name: "my-scraper" }); // defaults to model: 'gpt-5.1'
29
+ await opensteer.launch({ headless: false });
30
+
31
+ await opensteer.goto("https://example.com");
32
+ const html = await opensteer.snapshot();
33
+
34
+ await opensteer.click({ description: "login-button" });
35
+ await opensteer.input({ description: "email", text: "user@example.com" });
36
+ await opensteer.page.keyboard.press("Enter");
37
+
38
+ await opensteer.close();
39
+ ```
40
+
41
+ ## Core Model
42
+
43
+ - `opensteer.page`: raw Playwright `Page`
44
+ - `opensteer.context`: raw Playwright `BrowserContext`
45
+ - Opensteer methods: descriptor-aware operations that can persist selectors
46
+ - Selector storage: `.opensteer/selectors/<namespace>`
47
+
48
+ ## Resolution Chain
49
+
50
+ For actions like `click`/`input`/`hover`/`select`/`scroll`:
51
+
52
+ 1. Use persisted path for `description` (if present)
53
+ 2. Use `element` counter from snapshot
54
+ 3. Use explicit CSS `selector`
55
+ 4. Use built-in LLM resolution (`description` required)
56
+ 5. Throw
57
+
58
+ When steps 2-4 resolve and `description` is provided, the path is persisted.
59
+
60
+ ## Smart Post-Action Wait
61
+
62
+ Mutating actions (`click`, `input`, `select`, `scroll`, etc.) include a
63
+ best-effort post-action wait so delayed visual updates are usually settled
64
+ before the method resolves.
65
+
66
+ You can disable or tune this per call:
67
+
68
+ ```ts
69
+ await opensteer.click({ description: "Save button", wait: false });
70
+
71
+ await opensteer.click({
72
+ description: "Save button",
73
+ wait: { timeout: 9000, settleMs: 900, includeNetwork: true, networkQuietMs: 400 },
74
+ });
75
+ ```
76
+
77
+ ## Action Failure Diagnostics
78
+
79
+ Descriptor-aware interaction methods (`click`, `dblclick`, `rightclick`,
80
+ `hover`, `input`, `select`, `scroll`, `uploadFile`) throw
81
+ `OpensteerActionError` when an interaction cannot be completed.
82
+
83
+ The error includes structured failure metadata for agent/tooling decisions:
84
+
85
+ - `error.failure.code` (`ActionFailureCode`)
86
+ - `error.failure.message`
87
+ - `error.failure.retryable`
88
+ - `error.failure.classificationSource`
89
+ - `error.failure.details` (for blocker and observation details when available)
90
+
91
+ ```ts
92
+ import { Opensteer, OpensteerActionError } from "opensteer";
93
+
94
+ try {
95
+ await opensteer.click({ description: "Save button" });
96
+ } catch (err) {
97
+ if (err instanceof OpensteerActionError) {
98
+ console.error(err.failure.code); // e.g. BLOCKED_BY_INTERCEPTOR
99
+ console.error(err.failure.message);
100
+ console.error(err.failure.classificationSource);
101
+ }
102
+ throw err;
103
+ }
104
+ ```
105
+
106
+ ## Snapshot Modes
107
+
108
+ ```ts
109
+ await opensteer.snapshot(); // action mode (default)
110
+ await opensteer.snapshot({ mode: "extraction" });
111
+ await opensteer.snapshot({ mode: "clickable" });
112
+ await opensteer.snapshot({ mode: "scrollable" });
113
+ await opensteer.snapshot({ mode: "full" });
114
+ ```
115
+
116
+ ## Two Usage Patterns
117
+
118
+ ### Explore (coding agent, no API key required)
119
+
120
+ Use `snapshot()` + `element` counters while exploring in real time, then persist
121
+ stable descriptions for replay.
122
+
123
+ ### Run (script replay / built-in LLM)
124
+
125
+ Opensteer uses built-in LLM resolve/extract by default. You can override the
126
+ default model with top-level `model` or `OPENSTEER_MODEL`.
127
+
128
+ ```ts
129
+ const opensteer = new Opensteer({
130
+ name: "run-mode",
131
+ model: "gpt-5-mini",
132
+ });
133
+ ```
134
+
135
+ ## Mode Selection
136
+
137
+ Opensteer defaults to local mode.
138
+
139
+ - `OPENSTEER_MODE=local` runs local Playwright.
140
+ - `OPENSTEER_MODE=remote` runs remote mode (requires `OPENSTEER_API_KEY`).
141
+ - `mode: "remote"` in constructor config always forces remote mode.
142
+
143
+ Remote mode is fail-fast: it does not automatically fall back to local mode.
144
+
145
+ ## Docs
146
+
147
+ - `docs/getting-started.md`
148
+ - `docs/api-reference.md`
149
+ - `docs/remote-integration.md`
150
+ - `docs/html-cleaning.md`
151
+ - `docs/selectors.md`
152
+ - `docs/live-web-tests.md`
153
+
154
+ ## License
155
+
156
+ MIT
@@ -0,0 +1,388 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { connect } from 'net'
4
+ import { spawn } from 'child_process'
5
+ import { existsSync, readFileSync, unlinkSync, mkdirSync } from 'fs'
6
+ import { join, dirname } from 'path'
7
+ import { homedir } from 'os'
8
+ import { fileURLToPath } from 'url'
9
+
10
+ const __dirname = dirname(fileURLToPath(import.meta.url))
11
+
12
+ const RUNTIME_DIR = join(homedir(), '.opensteer')
13
+ const SOCKET_PATH = join(RUNTIME_DIR, 'opensteer.sock')
14
+ const PID_PATH = join(RUNTIME_DIR, 'opensteer.pid')
15
+ const SERVER_SCRIPT = join(__dirname, '..', 'dist', 'cli', 'server.js')
16
+
17
+ const CONNECT_TIMEOUT = 15000
18
+ const POLL_INTERVAL = 100
19
+ const RESPONSE_TIMEOUT = 120000
20
+
21
+ function parseArgs(argv) {
22
+ const args = argv.slice(2)
23
+ if (args.length === 0 || args[0] === '--help' || args[0] === '-h') {
24
+ printHelp()
25
+ process.exit(0)
26
+ }
27
+
28
+ const command = args[0]
29
+ const flags = {}
30
+ const positional = []
31
+
32
+ for (let i = 1; i < args.length; i++) {
33
+ const arg = args[i]
34
+ if (arg.startsWith('--')) {
35
+ const key = arg.slice(2)
36
+ const next = args[i + 1]
37
+ if (next !== undefined && !next.startsWith('--')) {
38
+ flags[key] = parseValue(next)
39
+ i++
40
+ } else {
41
+ flags[key] = true
42
+ }
43
+ } else {
44
+ positional.push(arg)
45
+ }
46
+ }
47
+
48
+ return { command, flags, positional }
49
+ }
50
+
51
+ function parseValue(str) {
52
+ if (str === 'true') return true
53
+ if (str === 'false') return false
54
+ const num = Number(str)
55
+ if (!Number.isNaN(num) && str.trim() !== '') return num
56
+ return str
57
+ }
58
+
59
+ function buildRequest(command, flags, positional) {
60
+ const id = 1
61
+ const globalFlags = {}
62
+ for (const key of ['name', 'headless', 'json', 'connect-url', 'channel', 'profile-dir']) {
63
+ if (key in flags) {
64
+ globalFlags[key] = flags[key]
65
+ delete flags[key]
66
+ }
67
+ }
68
+
69
+ const args = { ...globalFlags, ...flags }
70
+
71
+ switch (command) {
72
+ case 'open':
73
+ case 'navigate':
74
+ args.url = positional[0] || args.url
75
+ break
76
+
77
+ case 'click':
78
+ case 'dblclick':
79
+ case 'rightclick':
80
+ case 'hover':
81
+ case 'select':
82
+ case 'scroll':
83
+ case 'get-text':
84
+ case 'get-value':
85
+ case 'get-attrs':
86
+ if (positional[0] !== undefined && args.element === undefined) {
87
+ args.element = Number(positional[0])
88
+ }
89
+ break
90
+
91
+ case 'input': {
92
+ // input 12 "text" or input "text" --element 12
93
+ if (positional.length >= 2) {
94
+ const first = Number(positional[0])
95
+ if (!Number.isNaN(first)) {
96
+ args.element = args.element ?? first
97
+ args.text = args.text ?? positional[1]
98
+ } else {
99
+ args.text = args.text ?? positional[0]
100
+ args.element = args.element ?? Number(positional[1])
101
+ }
102
+ } else if (positional.length === 1) {
103
+ args.text = args.text ?? positional[0]
104
+ }
105
+ break
106
+ }
107
+
108
+ case 'press':
109
+ args.key = positional[0] || args.key
110
+ break
111
+
112
+ case 'type':
113
+ args.text = positional[0] || args.text
114
+ break
115
+
116
+ case 'get-html':
117
+ if (positional[0] && !args.selector) {
118
+ args.selector = positional[0]
119
+ }
120
+ break
121
+
122
+ case 'tab-new':
123
+ args.url = positional[0] || args.url
124
+ break
125
+
126
+ case 'tab-switch':
127
+ case 'tab-close':
128
+ args.index =
129
+ positional[0] !== undefined ? Number(positional[0]) : args.index
130
+ break
131
+
132
+ case 'cookies-export':
133
+ case 'cookies-import':
134
+ case 'screenshot':
135
+ args.file = positional[0] || args.file
136
+ break
137
+
138
+ case 'eval':
139
+ args.expression = positional[0] || args.expression
140
+ break
141
+
142
+ case 'wait-for':
143
+ args.text = positional[0] || args.text
144
+ break
145
+
146
+ case 'wait-selector':
147
+ args.selector = positional[0] || args.selector
148
+ break
149
+
150
+ case 'extract':
151
+ if (positional[0] && !args.schema) {
152
+ try {
153
+ args.schema = JSON.parse(positional[0])
154
+ } catch {
155
+ error(`Invalid JSON schema: ${positional[0]}`)
156
+ }
157
+ }
158
+ break
159
+
160
+ case 'snapshot':
161
+ args.mode = positional[0] || args.mode
162
+ break
163
+ }
164
+
165
+ return { id, command, args }
166
+ }
167
+
168
+ function isServerRunning() {
169
+ if (!existsSync(PID_PATH)) return false
170
+ try {
171
+ const pid = parseInt(readFileSync(PID_PATH, 'utf-8').trim(), 10)
172
+ process.kill(pid, 0)
173
+ return true
174
+ } catch {
175
+ cleanStaleFiles()
176
+ return false
177
+ }
178
+ }
179
+
180
+ function cleanStaleFiles() {
181
+ try {
182
+ unlinkSync(SOCKET_PATH)
183
+ } catch { }
184
+ try {
185
+ unlinkSync(PID_PATH)
186
+ } catch { }
187
+ }
188
+
189
+ function startServer() {
190
+ mkdirSync(RUNTIME_DIR, { recursive: true })
191
+
192
+ const child = spawn('node', [SERVER_SCRIPT], {
193
+ detached: true,
194
+ stdio: ['ignore', 'ignore', 'ignore'],
195
+ })
196
+ child.unref()
197
+ }
198
+
199
+ function waitForSocket(timeout) {
200
+ return new Promise((resolve, reject) => {
201
+ const start = Date.now()
202
+
203
+ function poll() {
204
+ if (Date.now() - start > timeout) {
205
+ reject(new Error('Timed out waiting for server to start'))
206
+ return
207
+ }
208
+
209
+ if (existsSync(SOCKET_PATH)) {
210
+ resolve()
211
+ return
212
+ }
213
+
214
+ setTimeout(poll, POLL_INTERVAL)
215
+ }
216
+
217
+ poll()
218
+ })
219
+ }
220
+
221
+ function sendCommand(request) {
222
+ return new Promise((resolve, reject) => {
223
+ const socket = connect(SOCKET_PATH)
224
+ let buffer = ''
225
+ let settled = false
226
+
227
+ const timer = setTimeout(() => {
228
+ if (!settled) {
229
+ settled = true
230
+ socket.destroy()
231
+ reject(new Error('Response timeout'))
232
+ }
233
+ }, RESPONSE_TIMEOUT)
234
+
235
+ socket.on('connect', () => {
236
+ socket.write(JSON.stringify(request) + '\n')
237
+ })
238
+
239
+ socket.on('data', (chunk) => {
240
+ buffer += chunk.toString()
241
+ const idx = buffer.indexOf('\n')
242
+ if (idx !== -1) {
243
+ const line = buffer.slice(0, idx)
244
+ clearTimeout(timer)
245
+ settled = true
246
+ socket.end()
247
+ try {
248
+ resolve(JSON.parse(line))
249
+ } catch {
250
+ reject(new Error('Invalid JSON response from server'))
251
+ }
252
+ }
253
+ })
254
+
255
+ socket.on('error', (err) => {
256
+ if (!settled) {
257
+ clearTimeout(timer)
258
+ settled = true
259
+ reject(err)
260
+ }
261
+ })
262
+
263
+ socket.on('close', () => {
264
+ if (!settled) {
265
+ clearTimeout(timer)
266
+ settled = true
267
+ reject(new Error('Connection closed before response'))
268
+ }
269
+ })
270
+ })
271
+ }
272
+
273
+ function output(data) {
274
+ process.stdout.write(JSON.stringify(data) + '\n')
275
+ }
276
+
277
+ function error(msg) {
278
+ process.stderr.write(JSON.stringify({ ok: false, error: msg }) + '\n')
279
+ process.exit(1)
280
+ }
281
+
282
+ function printHelp() {
283
+ console.log(`Usage: opensteer <command> [options]
284
+
285
+ Navigation:
286
+ open <url> Open browser and navigate to URL
287
+ navigate <url> Navigate and wait for visual stability
288
+ back Go back
289
+ forward Go forward
290
+ reload Reload page
291
+ close Close browser and server
292
+
293
+ Observation:
294
+ snapshot [--mode action] Get page snapshot
295
+ state Get page URL, title, and snapshot
296
+ screenshot [file] Take screenshot
297
+
298
+ Actions:
299
+ click [element] Click element
300
+ dblclick [element] Double-click element
301
+ rightclick [element] Right-click element
302
+ hover [element] Hover over element
303
+ input [element] <text> Input text into element
304
+ select [element] Select option from dropdown
305
+ scroll [element] Scroll page or element
306
+
307
+ Keyboard:
308
+ press <key> Press key
309
+ type <text> Type text into focused element
310
+
311
+ Element Info:
312
+ get-text [element] Get element text
313
+ get-value [element] Get element value
314
+ get-attrs [element] Get element attributes
315
+ get-html [selector] Get page or element HTML
316
+
317
+ Tabs:
318
+ tabs List tabs
319
+ tab-new [url] Open new tab
320
+ tab-switch <index> Switch to tab
321
+ tab-close [index] Close tab
322
+
323
+ Cookies:
324
+ cookies [--url] Get cookies
325
+ cookie-set Set cookie (--name, --value, ...)
326
+ cookies-clear Clear all cookies
327
+ cookies-export <file> Export cookies to file
328
+ cookies-import <file> Import cookies from file
329
+
330
+ Utility:
331
+ eval <expression> Evaluate JavaScript
332
+ wait-for <text> Wait for text to appear
333
+ wait-selector <selector> Wait for selector
334
+ extract <schema-json> Extract structured data
335
+
336
+ Global Flags:
337
+ --name <namespace> Storage namespace (default: "cli")
338
+ --headless Launch browser in headless mode
339
+ --connect-url <url> Connect to a running browser (e.g. http://localhost:9222)
340
+ --channel <browser> Use installed browser (chrome, chrome-beta, msedge)
341
+ --profile-dir <path> Browser profile directory for logged-in sessions
342
+ --element <N> Target element by counter
343
+ --selector <css> Target element by CSS selector
344
+ --description <text> Description for selector persistence
345
+ --help Show this help
346
+
347
+ Environment:
348
+ OPENSTEER_MODE Runtime mode: "local" (default) or "remote"
349
+ OPENSTEER_API_KEY Required when remote mode is selected
350
+ OPENSTEER_BASE_URL Override remote control-plane base URL
351
+ `)
352
+ }
353
+
354
+ async function main() {
355
+ const { command, flags, positional } = parseArgs(process.argv)
356
+ const request = buildRequest(command, flags, positional)
357
+
358
+ if (!isServerRunning()) {
359
+ if (!existsSync(SERVER_SCRIPT)) {
360
+ error(
361
+ `Server script not found: ${SERVER_SCRIPT}. Run the build script first.`
362
+ )
363
+ }
364
+ startServer()
365
+ try {
366
+ await waitForSocket(CONNECT_TIMEOUT)
367
+ } catch {
368
+ error('Failed to start server. Check that the build is complete.')
369
+ }
370
+ }
371
+
372
+ try {
373
+ const response = await sendCommand(request)
374
+
375
+ if (response.ok) {
376
+ output({ ok: true, ...response.result })
377
+ } else {
378
+ process.stderr.write(
379
+ JSON.stringify({ ok: false, error: response.error }) + '\n'
380
+ )
381
+ process.exit(1)
382
+ }
383
+ } catch (err) {
384
+ error(err.message || 'Connection failed')
385
+ }
386
+ }
387
+
388
+ main()
@@ -0,0 +1,69 @@
1
+ // src/extract-field-plan.ts
2
+ var CURRENT_URL_SENTINEL = "CURRENT_URL";
3
+ var COUNTER_KEY = "$c";
4
+ function flattenExtractionDataToFieldPlan(data) {
5
+ const fields = {};
6
+ flattenExtractionDataToFieldPlanRecursive(data, "", fields);
7
+ return fields;
8
+ }
9
+ function flattenExtractionDataToFieldPlanRecursive(value, prefix, out) {
10
+ if (value == null) return;
11
+ if (typeof value === "number" && Number.isFinite(value)) {
12
+ const key = String(prefix || "").trim();
13
+ if (!key) return;
14
+ out[key] = { element: Math.trunc(value) };
15
+ return;
16
+ }
17
+ if (typeof value === "string" && value.trim().toUpperCase() === CURRENT_URL_SENTINEL) {
18
+ const key = String(prefix || "").trim();
19
+ if (!key) return;
20
+ out[key] = { source: "current_url" };
21
+ return;
22
+ }
23
+ const counterLeaf = parseCounterLeafDescriptor(value);
24
+ if (counterLeaf) {
25
+ const key = String(prefix || "").trim();
26
+ if (!key) return;
27
+ out[key] = counterLeaf;
28
+ return;
29
+ }
30
+ if (Array.isArray(value)) {
31
+ for (let i = 0; i < value.length; i++) {
32
+ const nextPrefix = prefix ? `${prefix}[${i}]` : `[${i}]`;
33
+ flattenExtractionDataToFieldPlanRecursive(value[i], nextPrefix, out);
34
+ }
35
+ return;
36
+ }
37
+ if (typeof value !== "object") return;
38
+ for (const [key, child] of Object.entries(
39
+ value
40
+ )) {
41
+ const nextPrefix = prefix ? `${prefix}.${key}` : key;
42
+ flattenExtractionDataToFieldPlanRecursive(child, nextPrefix, out);
43
+ }
44
+ }
45
+ function parseCounterLeafDescriptor(value) {
46
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
47
+ return null;
48
+ }
49
+ const record = value;
50
+ if (!Object.hasOwn(record, COUNTER_KEY)) {
51
+ return null;
52
+ }
53
+ const counter = record.$c;
54
+ if (typeof counter !== "number" || !Number.isFinite(counter)) {
55
+ return null;
56
+ }
57
+ const rawAttribute = record.$a;
58
+ const normalizedAttribute = typeof rawAttribute === "string" ? rawAttribute.trim() : "";
59
+ return normalizedAttribute ? {
60
+ element: Math.trunc(counter),
61
+ attribute: normalizedAttribute
62
+ } : {
63
+ element: Math.trunc(counter)
64
+ };
65
+ }
66
+
67
+ export {
68
+ flattenExtractionDataToFieldPlan
69
+ };