opensteer 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,23 +4,8 @@
4
4
 
5
5
  - Breaking: removed legacy `ai` config from `OpensteerConfig`; use top-level `model` instead.
6
6
  - Breaking: `OPENSTEER_AI_MODEL` is no longer supported; use `OPENSTEER_MODEL`.
7
- - Breaking: `OPENSTEER_RUNTIME` is no longer supported; use `OPENSTEER_MODE`.
8
- - Breaking: mode selection now uses `mode: 'local' | 'remote'` and remote credentials use `remote.apiKey`.
9
7
  - Opensteer now enables built-in LLM resolve/extract by default with model `gpt-5.1`.
10
- - Remote mode now falls back to `OPENSTEER_API_KEY` when `remote.apiKey` is omitted.
11
- - Mutating actions now include smart best-effort post-action wait with per-action
12
- profiles and optional per-call overrides via `wait`.
13
- - Added structured interaction diagnostics via `OpensteerActionError` for
14
- descriptor-aware interaction methods (`click`, `dblclick`, `rightclick`,
15
- `hover`, `input`, `select`, `scroll`, `uploadFile`).
16
- - Added `ActionFailure` types (`ActionFailureCode`, `retryable`,
17
- `classificationSource`, optional `details`) to support programmatic handling
18
- of action failures.
19
- - Added DOM actionability probe + Playwright call-log classification to report
20
- reasons like `BLOCKED_BY_INTERCEPTOR`, `NOT_VISIBLE`, `NOT_EDITABLE`, and
21
- timeout/stale-target cases more accurately.
22
- - Remote action failures now accept optional structured failure details and map
23
- them to `OpensteerActionError` when available.
8
+ - Cloud mode now falls back to `OPENSTEER_API_KEY` when `cloud.key` is omitted.
24
9
 
25
10
  ## 0.1.0
26
11
 
package/README.md CHANGED
@@ -2,14 +2,11 @@
2
2
 
3
3
  Lean browser automation SDK for coding agents and script replay.
4
4
 
5
- `opensteer` provides descriptor-aware actions (`click`, `dblclick`,
6
- `rightclick`, `hover`, `input`, `select`, `scroll`, `extract`,
7
- `extractFromPlan`, `uploadFile`), observation (`snapshot`, `state`,
8
- `screenshot`), navigation (`goto`), and convenience methods for tabs, cookies,
9
- keyboard, element info, and wait.
5
+ `opensteer` wraps only operations that need descriptor resolution (`snapshot`,
6
+ `click`, `dblclick`, `rightclick`, `hover`, `input`, `select`, `scroll`,
7
+ `extract`, `extractFromPlan`, `state`).
10
8
 
11
- For anything not covered, use raw Playwright via `opensteer.page` and
12
- `opensteer.context`.
9
+ Everything else is raw Playwright via `ov.page` and `ov.context`.
13
10
 
14
11
  ## Install
15
12
 
@@ -25,23 +22,23 @@ pnpm add opensteer playwright
25
22
  ```ts
26
23
  import { Opensteer } from "opensteer";
27
24
 
28
- const opensteer = new Opensteer({ name: "my-scraper" }); // defaults to model: 'gpt-5.1'
29
- await opensteer.launch({ headless: false });
25
+ const ov = new Opensteer({ name: "my-scraper" }); // defaults to model: 'gpt-5.1'
26
+ await ov.launch({ headless: false });
30
27
 
31
- await opensteer.goto("https://example.com");
32
- const html = await opensteer.snapshot();
28
+ await ov.page.goto("https://example.com");
29
+ const html = await ov.snapshot();
33
30
 
34
- await opensteer.click({ description: "login-button" });
35
- await opensteer.input({ description: "email", text: "user@example.com" });
36
- await opensteer.page.keyboard.press("Enter");
31
+ await ov.click({ description: "login-button" });
32
+ await ov.input({ description: "email", text: "user@example.com" });
33
+ await ov.page.keyboard.press("Enter");
37
34
 
38
- await opensteer.close();
35
+ await ov.close();
39
36
  ```
40
37
 
41
38
  ## Core Model
42
39
 
43
- - `opensteer.page`: raw Playwright `Page`
44
- - `opensteer.context`: raw Playwright `BrowserContext`
40
+ - `ov.page`: raw Playwright `Page`
41
+ - `ov.context`: raw Playwright `BrowserContext`
45
42
  - Opensteer methods: descriptor-aware operations that can persist selectors
46
43
  - Selector storage: `.opensteer/selectors/<namespace>`
47
44
 
@@ -57,60 +54,14 @@ For actions like `click`/`input`/`hover`/`select`/`scroll`:
57
54
 
58
55
  When steps 2-4 resolve and `description` is provided, the path is persisted.
59
56
 
60
- ## Smart Post-Action Wait
61
-
62
- Mutating actions (`click`, `input`, `select`, `scroll`, etc.) include a
63
- best-effort post-action wait so delayed visual updates are usually settled
64
- before the method resolves.
65
-
66
- You can disable or tune this per call:
67
-
68
- ```ts
69
- await opensteer.click({ description: "Save button", wait: false });
70
-
71
- await opensteer.click({
72
- description: "Save button",
73
- wait: { timeout: 9000, settleMs: 900, includeNetwork: true, networkQuietMs: 400 },
74
- });
75
- ```
76
-
77
- ## Action Failure Diagnostics
78
-
79
- Descriptor-aware interaction methods (`click`, `dblclick`, `rightclick`,
80
- `hover`, `input`, `select`, `scroll`, `uploadFile`) throw
81
- `OpensteerActionError` when an interaction cannot be completed.
82
-
83
- The error includes structured failure metadata for agent/tooling decisions:
84
-
85
- - `error.failure.code` (`ActionFailureCode`)
86
- - `error.failure.message`
87
- - `error.failure.retryable`
88
- - `error.failure.classificationSource`
89
- - `error.failure.details` (for blocker and observation details when available)
90
-
91
- ```ts
92
- import { Opensteer, OpensteerActionError } from "opensteer";
93
-
94
- try {
95
- await opensteer.click({ description: "Save button" });
96
- } catch (err) {
97
- if (err instanceof OpensteerActionError) {
98
- console.error(err.failure.code); // e.g. BLOCKED_BY_INTERCEPTOR
99
- console.error(err.failure.message);
100
- console.error(err.failure.classificationSource);
101
- }
102
- throw err;
103
- }
104
- ```
105
-
106
57
  ## Snapshot Modes
107
58
 
108
59
  ```ts
109
- await opensteer.snapshot(); // action mode (default)
110
- await opensteer.snapshot({ mode: "extraction" });
111
- await opensteer.snapshot({ mode: "clickable" });
112
- await opensteer.snapshot({ mode: "scrollable" });
113
- await opensteer.snapshot({ mode: "full" });
60
+ await ov.snapshot(); // action mode (default)
61
+ await ov.snapshot({ mode: "extraction" });
62
+ await ov.snapshot({ mode: "clickable" });
63
+ await ov.snapshot({ mode: "scrollable" });
64
+ await ov.snapshot({ mode: "full" });
114
65
  ```
115
66
 
116
67
  ## Two Usage Patterns
@@ -126,27 +77,16 @@ Opensteer uses built-in LLM resolve/extract by default. You can override the
126
77
  default model with top-level `model` or `OPENSTEER_MODEL`.
127
78
 
128
79
  ```ts
129
- const opensteer = new Opensteer({
80
+ const ov = new Opensteer({
130
81
  name: "run-mode",
131
82
  model: "gpt-5-mini",
132
83
  });
133
84
  ```
134
85
 
135
- ## Mode Selection
136
-
137
- Opensteer defaults to local mode.
138
-
139
- - `OPENSTEER_MODE=local` runs local Playwright.
140
- - `OPENSTEER_MODE=remote` runs remote mode (requires `OPENSTEER_API_KEY`).
141
- - `mode: "remote"` in constructor config always forces remote mode.
142
-
143
- Remote mode is fail-fast: it does not automatically fall back to local mode.
144
-
145
86
  ## Docs
146
87
 
147
88
  - `docs/getting-started.md`
148
89
  - `docs/api-reference.md`
149
- - `docs/remote-integration.md`
150
90
  - `docs/html-cleaning.md`
151
91
  - `docs/selectors.md`
152
92
  - `docs/live-web-tests.md`
package/bin/opensteer.mjs CHANGED
@@ -1,22 +1,22 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- import { spawn } from 'child_process'
4
- import { existsSync, readFileSync, readdirSync, unlinkSync } from 'fs'
5
3
  import { connect } from 'net'
6
- import { tmpdir } from 'os'
7
- import { basename, dirname, join } from 'path'
4
+ import { spawn } from 'child_process'
5
+ import { existsSync, readFileSync, unlinkSync, mkdirSync } from 'fs'
6
+ import { join, dirname } from 'path'
7
+ import { homedir } from 'os'
8
8
  import { fileURLToPath } from 'url'
9
9
 
10
10
  const __dirname = dirname(fileURLToPath(import.meta.url))
11
+
12
+ const RUNTIME_DIR = join(homedir(), '.opensteer')
13
+ const SOCKET_PATH = join(RUNTIME_DIR, 'opensteer.sock')
14
+ const PID_PATH = join(RUNTIME_DIR, 'opensteer.pid')
11
15
  const SERVER_SCRIPT = join(__dirname, '..', 'dist', 'cli', 'server.js')
12
16
 
13
17
  const CONNECT_TIMEOUT = 15000
14
18
  const POLL_INTERVAL = 100
15
19
  const RESPONSE_TIMEOUT = 120000
16
- const RUNTIME_PREFIX = 'opensteer-'
17
- const SOCKET_SUFFIX = '.sock'
18
- const PID_SUFFIX = '.pid'
19
- const CLOSE_ALL_REQUEST = { id: 1, command: 'close', args: {} }
20
20
 
21
21
  function parseArgs(argv) {
22
22
  const args = argv.slice(2)
@@ -56,51 +56,10 @@ function parseValue(str) {
56
56
  return str
57
57
  }
58
58
 
59
- function sanitizeNamespace(value) {
60
- const trimmed = String(value || '').trim()
61
- if (!trimmed || trimmed === '.' || trimmed === '..') {
62
- return 'default'
63
- }
64
-
65
- const replaced = trimmed.replace(/[^a-zA-Z0-9_-]+/g, '_')
66
- const collapsed = replaced.replace(/_+/g, '_')
67
- const bounded = collapsed.replace(/^_+|_+$/g, '')
68
-
69
- return bounded || 'default'
70
- }
71
-
72
- function resolveNamespace(flags) {
73
- if (flags.name !== undefined && String(flags.name).trim().length > 0) {
74
- return sanitizeNamespace(String(flags.name))
75
- }
76
-
77
- if (
78
- typeof process.env.OPENSTEER_NAME === 'string' &&
79
- process.env.OPENSTEER_NAME.trim().length > 0
80
- ) {
81
- return sanitizeNamespace(process.env.OPENSTEER_NAME)
82
- }
83
-
84
- const cwdBase = basename(process.cwd())
85
- if (cwdBase && cwdBase !== '.' && cwdBase !== '/') {
86
- return sanitizeNamespace(cwdBase)
87
- }
88
-
89
- return 'default'
90
- }
91
-
92
- function getSocketPath(namespace) {
93
- return join(tmpdir(), `${RUNTIME_PREFIX}${namespace}${SOCKET_SUFFIX}`)
94
- }
95
-
96
- function getPidPath(namespace) {
97
- return join(tmpdir(), `${RUNTIME_PREFIX}${namespace}${PID_SUFFIX}`)
98
- }
99
-
100
59
  function buildRequest(command, flags, positional) {
101
60
  const id = 1
102
61
  const globalFlags = {}
103
- for (const key of ['headless', 'json', 'connect-url', 'channel', 'profile-dir']) {
62
+ for (const key of ['name', 'headless', 'json', 'cdp-url', 'channel', 'user-data-dir']) {
104
63
  if (key in flags) {
105
64
  globalFlags[key] = flags[key]
106
65
  delete flags[key]
@@ -206,66 +165,38 @@ function buildRequest(command, flags, positional) {
206
165
  return { id, command, args }
207
166
  }
208
167
 
209
- function readPid(pidPath) {
210
- if (!existsSync(pidPath)) {
211
- return null
212
- }
213
-
214
- const parsed = Number.parseInt(readFileSync(pidPath, 'utf-8').trim(), 10)
215
- if (!Number.isInteger(parsed) || parsed <= 0) {
216
- return null
217
- }
218
-
219
- return parsed
220
- }
221
-
222
- function isPidAlive(pid) {
168
+ function isServerRunning() {
169
+ if (!existsSync(PID_PATH)) return false
223
170
  try {
171
+ const pid = parseInt(readFileSync(PID_PATH, 'utf-8').trim(), 10)
224
172
  process.kill(pid, 0)
225
173
  return true
226
174
  } catch {
175
+ cleanStaleFiles()
227
176
  return false
228
177
  }
229
178
  }
230
179
 
231
- function cleanStaleFiles(namespace) {
180
+ function cleanStaleFiles() {
232
181
  try {
233
- unlinkSync(getSocketPath(namespace))
234
- } catch { }
182
+ unlinkSync(SOCKET_PATH)
183
+ } catch {}
235
184
  try {
236
- unlinkSync(getPidPath(namespace))
237
- } catch { }
185
+ unlinkSync(PID_PATH)
186
+ } catch {}
238
187
  }
239
188
 
240
- function isServerRunning(namespace) {
241
- const pidPath = getPidPath(namespace)
242
- const pid = readPid(pidPath)
243
- if (!pid) {
244
- cleanStaleFiles(namespace)
245
- return false
246
- }
247
-
248
- if (!isPidAlive(pid)) {
249
- cleanStaleFiles(namespace)
250
- return false
251
- }
252
-
253
- return existsSync(getSocketPath(namespace))
254
- }
189
+ function startServer() {
190
+ mkdirSync(RUNTIME_DIR, { recursive: true })
255
191
 
256
- function startServer(namespace) {
257
192
  const child = spawn('node', [SERVER_SCRIPT], {
258
193
  detached: true,
259
194
  stdio: ['ignore', 'ignore', 'ignore'],
260
- env: {
261
- ...process.env,
262
- OPENSTEER_NAME: namespace,
263
- },
264
195
  })
265
196
  child.unref()
266
197
  }
267
198
 
268
- function waitForSocket(socketPath, timeout) {
199
+ function waitForSocket(timeout) {
269
200
  return new Promise((resolve, reject) => {
270
201
  const start = Date.now()
271
202
 
@@ -275,7 +206,7 @@ function waitForSocket(socketPath, timeout) {
275
206
  return
276
207
  }
277
208
 
278
- if (existsSync(socketPath)) {
209
+ if (existsSync(SOCKET_PATH)) {
279
210
  resolve()
280
211
  return
281
212
  }
@@ -287,9 +218,9 @@ function waitForSocket(socketPath, timeout) {
287
218
  })
288
219
  }
289
220
 
290
- function sendCommand(socketPath, request) {
221
+ function sendCommand(request) {
291
222
  return new Promise((resolve, reject) => {
292
- const socket = connect(socketPath)
223
+ const socket = connect(SOCKET_PATH)
293
224
  let buffer = ''
294
225
  let settled = false
295
226
 
@@ -339,71 +270,6 @@ function sendCommand(socketPath, request) {
339
270
  })
340
271
  }
341
272
 
342
- function listSessions() {
343
- const sessions = []
344
- const entries = readdirSync(tmpdir())
345
-
346
- for (const entry of entries) {
347
- if (!entry.startsWith(RUNTIME_PREFIX) || !entry.endsWith(PID_SUFFIX)) {
348
- continue
349
- }
350
-
351
- const name = entry.slice(
352
- RUNTIME_PREFIX.length,
353
- entry.length - PID_SUFFIX.length
354
- )
355
- if (!name) {
356
- continue
357
- }
358
-
359
- const pid = readPid(join(tmpdir(), entry))
360
- if (!pid || !isPidAlive(pid)) {
361
- cleanStaleFiles(name)
362
- continue
363
- }
364
-
365
- sessions.push({ name, pid })
366
- }
367
-
368
- sessions.sort((a, b) => a.name.localeCompare(b.name))
369
- return sessions
370
- }
371
-
372
- async function closeAllSessions() {
373
- const sessions = listSessions()
374
- const closed = []
375
- const failures = []
376
-
377
- for (const session of sessions) {
378
- const socketPath = getSocketPath(session.name)
379
- if (!existsSync(socketPath)) {
380
- cleanStaleFiles(session.name)
381
- continue
382
- }
383
-
384
- try {
385
- const response = await sendCommand(socketPath, CLOSE_ALL_REQUEST)
386
- if (response && response.ok === true) {
387
- closed.push(session)
388
- } else {
389
- failures.push(
390
- `${session.name}: ${response?.error || 'unknown close error'}`
391
- )
392
- }
393
- } catch (err) {
394
- failures.push(
395
- `${session.name}: ${err instanceof Error ? err.message : String(err)}`
396
- )
397
- }
398
- }
399
-
400
- if (failures.length > 0) {
401
- throw new Error(`Failed to close sessions: ${failures.join('; ')}`)
402
- }
403
-
404
- return closed
405
- }
406
-
407
273
  function output(data) {
408
274
  process.stdout.write(JSON.stringify(data) + '\n')
409
275
  }
@@ -423,10 +289,6 @@ Navigation:
423
289
  forward Go forward
424
290
  reload Reload page
425
291
  close Close browser and server
426
- close --all Close all active namespace-scoped servers
427
-
428
- Sessions:
429
- sessions List active namespace-scoped sessions
430
292
 
431
293
  Observation:
432
294
  snapshot [--mode action] Get page snapshot
@@ -472,64 +334,38 @@ Utility:
472
334
  extract <schema-json> Extract structured data
473
335
 
474
336
  Global Flags:
475
- --name <namespace> Session namespace (default: CWD basename or OPENSTEER_NAME)
337
+ --name <namespace> Storage namespace (default: "cli")
476
338
  --headless Launch browser in headless mode
477
- --connect-url <url> Connect to a running browser (e.g. http://localhost:9222)
339
+ --cdp-url <url> Connect to running Chrome via CDP (e.g. http://localhost:9222)
478
340
  --channel <browser> Use installed browser (chrome, chrome-beta, msedge)
479
- --profile-dir <path> Browser profile directory for logged-in sessions
341
+ --user-data-dir <path> Chrome profile directory for logged-in sessions
480
342
  --element <N> Target element by counter
481
343
  --selector <css> Target element by CSS selector
482
344
  --description <text> Description for selector persistence
483
345
  --help Show this help
484
-
485
- Environment:
486
- OPENSTEER_NAME Default session namespace when --name is omitted
487
- OPENSTEER_MODE Runtime mode: "local" (default) or "remote"
488
- OPENSTEER_API_KEY Required when remote mode is selected
489
- OPENSTEER_BASE_URL Override remote control-plane base URL
490
346
  `)
491
347
  }
492
348
 
493
349
  async function main() {
494
350
  const { command, flags, positional } = parseArgs(process.argv)
495
- const namespace = resolveNamespace(flags)
496
- const socketPath = getSocketPath(namespace)
497
-
498
- if (command === 'sessions') {
499
- output({ ok: true, sessions: listSessions() })
500
- return
501
- }
502
-
503
- if (command === 'close' && flags.all === true) {
504
- try {
505
- const closed = await closeAllSessions()
506
- output({ ok: true, closed })
507
- } catch (err) {
508
- error(err instanceof Error ? err.message : 'Failed to close sessions')
509
- }
510
- return
511
- }
512
-
513
- delete flags.name
514
- delete flags.all
515
351
  const request = buildRequest(command, flags, positional)
516
352
 
517
- if (!isServerRunning(namespace)) {
353
+ if (!isServerRunning()) {
518
354
  if (!existsSync(SERVER_SCRIPT)) {
519
355
  error(
520
356
  `Server script not found: ${SERVER_SCRIPT}. Run the build script first.`
521
357
  )
522
358
  }
523
- startServer(namespace)
359
+ startServer()
524
360
  try {
525
- await waitForSocket(socketPath, CONNECT_TIMEOUT)
361
+ await waitForSocket(CONNECT_TIMEOUT)
526
362
  } catch {
527
363
  error('Failed to start server. Check that the build is complete.')
528
364
  }
529
365
  }
530
366
 
531
367
  try {
532
- const response = await sendCommand(socketPath, request)
368
+ const response = await sendCommand(request)
533
369
 
534
370
  if (response.ok) {
535
371
  output({ ok: true, ...response.result })
@@ -540,7 +376,7 @@ async function main() {
540
376
  process.exit(1)
541
377
  }
542
378
  } catch (err) {
543
- error(err instanceof Error ? err.message : 'Connection failed')
379
+ error(err.message || 'Connection failed')
544
380
  }
545
381
  }
546
382