opensteer 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,8 +4,23 @@
4
4
 
5
5
  - Breaking: removed legacy `ai` config from `OpensteerConfig`; use top-level `model` instead.
6
6
  - Breaking: `OPENSTEER_AI_MODEL` is no longer supported; use `OPENSTEER_MODEL`.
7
+ - Breaking: `OPENSTEER_RUNTIME` is no longer supported; use `OPENSTEER_MODE`.
8
+ - Breaking: mode selection now uses `mode: 'local' | 'remote'` and remote credentials use `remote.apiKey`.
7
9
  - Opensteer now enables built-in LLM resolve/extract by default with model `gpt-5.1`.
8
- - Cloud mode now falls back to `OPENSTEER_API_KEY` when `cloud.key` is omitted.
10
+ - Remote mode now falls back to `OPENSTEER_API_KEY` when `remote.apiKey` is omitted.
11
+ - Mutating actions now include smart best-effort post-action wait with per-action
12
+ profiles and optional per-call overrides via `wait`.
13
+ - Added structured interaction diagnostics via `OpensteerActionError` for
14
+ descriptor-aware interaction methods (`click`, `dblclick`, `rightclick`,
15
+ `hover`, `input`, `select`, `scroll`, `uploadFile`).
16
+ - Added `ActionFailure` types (`ActionFailureCode`, `retryable`,
17
+ `classificationSource`, optional `details`) to support programmatic handling
18
+ of action failures.
19
+ - Added DOM actionability probe + Playwright call-log classification to report
20
+ reasons like `BLOCKED_BY_INTERCEPTOR`, `NOT_VISIBLE`, `NOT_EDITABLE`, and
21
+ timeout/stale-target cases more accurately.
22
+ - Remote action failures now accept optional structured failure details and map
23
+ them to `OpensteerActionError` when available.
9
24
 
10
25
  ## 0.1.0
11
26
 
package/README.md CHANGED
@@ -2,11 +2,14 @@
2
2
 
3
3
  Lean browser automation SDK for coding agents and script replay.
4
4
 
5
- `opensteer` wraps only operations that need descriptor resolution (`snapshot`,
6
- `click`, `dblclick`, `rightclick`, `hover`, `input`, `select`, `scroll`,
7
- `extract`, `extractFromPlan`, `state`).
5
+ `opensteer` provides descriptor-aware actions (`click`, `dblclick`,
6
+ `rightclick`, `hover`, `input`, `select`, `scroll`, `extract`,
7
+ `extractFromPlan`, `uploadFile`), observation (`snapshot`, `state`,
8
+ `screenshot`), navigation (`goto`), and convenience methods for tabs, cookies,
9
+ keyboard, element info, and wait.
8
10
 
9
- Everything else is raw Playwright via `ov.page` and `ov.context`.
11
+ For anything not covered, use raw Playwright via `opensteer.page` and
12
+ `opensteer.context`.
10
13
 
11
14
  ## Install
12
15
 
@@ -22,23 +25,23 @@ pnpm add opensteer playwright
22
25
  ```ts
23
26
  import { Opensteer } from "opensteer";
24
27
 
25
- const ov = new Opensteer({ name: "my-scraper" }); // defaults to model: 'gpt-5.1'
26
- await ov.launch({ headless: false });
28
+ const opensteer = new Opensteer({ name: "my-scraper" }); // defaults to model: 'gpt-5.1'
29
+ await opensteer.launch({ headless: false });
27
30
 
28
- await ov.page.goto("https://example.com");
29
- const html = await ov.snapshot();
31
+ await opensteer.goto("https://example.com");
32
+ const html = await opensteer.snapshot();
30
33
 
31
- await ov.click({ description: "login-button" });
32
- await ov.input({ description: "email", text: "user@example.com" });
33
- await ov.page.keyboard.press("Enter");
34
+ await opensteer.click({ description: "login-button" });
35
+ await opensteer.input({ description: "email", text: "user@example.com" });
36
+ await opensteer.page.keyboard.press("Enter");
34
37
 
35
- await ov.close();
38
+ await opensteer.close();
36
39
  ```
37
40
 
38
41
  ## Core Model
39
42
 
40
- - `ov.page`: raw Playwright `Page`
41
- - `ov.context`: raw Playwright `BrowserContext`
43
+ - `opensteer.page`: raw Playwright `Page`
44
+ - `opensteer.context`: raw Playwright `BrowserContext`
42
45
  - Opensteer methods: descriptor-aware operations that can persist selectors
43
46
  - Selector storage: `.opensteer/selectors/<namespace>`
44
47
 
@@ -54,14 +57,60 @@ For actions like `click`/`input`/`hover`/`select`/`scroll`:
54
57
 
55
58
  When steps 2-4 resolve and `description` is provided, the path is persisted.
56
59
 
60
+ ## Smart Post-Action Wait
61
+
62
+ Mutating actions (`click`, `input`, `select`, `scroll`, etc.) include a
63
+ best-effort post-action wait so delayed visual updates are usually settled
64
+ before the method resolves.
65
+
66
+ You can disable or tune this per call:
67
+
68
+ ```ts
69
+ await opensteer.click({ description: "Save button", wait: false });
70
+
71
+ await opensteer.click({
72
+ description: "Save button",
73
+ wait: { timeout: 9000, settleMs: 900, includeNetwork: true, networkQuietMs: 400 },
74
+ });
75
+ ```
76
+
77
+ ## Action Failure Diagnostics
78
+
79
+ Descriptor-aware interaction methods (`click`, `dblclick`, `rightclick`,
80
+ `hover`, `input`, `select`, `scroll`, `uploadFile`) throw
81
+ `OpensteerActionError` when an interaction cannot be completed.
82
+
83
+ The error includes structured failure metadata for agent/tooling decisions:
84
+
85
+ - `error.failure.code` (`ActionFailureCode`)
86
+ - `error.failure.message`
87
+ - `error.failure.retryable`
88
+ - `error.failure.classificationSource`
89
+ - `error.failure.details` (for blocker and observation details when available)
90
+
91
+ ```ts
92
+ import { Opensteer, OpensteerActionError } from "opensteer";
93
+
94
+ try {
95
+ await opensteer.click({ description: "Save button" });
96
+ } catch (err) {
97
+ if (err instanceof OpensteerActionError) {
98
+ console.error(err.failure.code); // e.g. BLOCKED_BY_INTERCEPTOR
99
+ console.error(err.failure.message);
100
+ console.error(err.failure.classificationSource);
101
+ }
102
+ throw err;
103
+ }
104
+ ```
105
+
57
106
  ## Snapshot Modes
58
107
 
59
108
  ```ts
60
- await ov.snapshot(); // action mode (default)
61
- await ov.snapshot({ mode: "extraction" });
62
- await ov.snapshot({ mode: "clickable" });
63
- await ov.snapshot({ mode: "scrollable" });
64
- await ov.snapshot({ mode: "full" });
109
+ await opensteer.snapshot(); // action mode (default)
110
+ await opensteer.snapshot({ mode: "extraction" });
111
+ await opensteer.snapshot({ mode: "clickable" });
112
+ await opensteer.snapshot({ mode: "scrollable" });
113
+ await opensteer.snapshot({ mode: "full" });
65
114
  ```
66
115
 
67
116
  ## Two Usage Patterns
@@ -77,16 +126,27 @@ Opensteer uses built-in LLM resolve/extract by default. You can override the
77
126
  default model with top-level `model` or `OPENSTEER_MODEL`.
78
127
 
79
128
  ```ts
80
- const ov = new Opensteer({
129
+ const opensteer = new Opensteer({
81
130
  name: "run-mode",
82
131
  model: "gpt-5-mini",
83
132
  });
84
133
  ```
85
134
 
135
+ ## Mode Selection
136
+
137
+ Opensteer defaults to local mode.
138
+
139
+ - `OPENSTEER_MODE=local` runs local Playwright.
140
+ - `OPENSTEER_MODE=remote` runs remote mode (requires `OPENSTEER_API_KEY`).
141
+ - `mode: "remote"` in constructor config always forces remote mode.
142
+
143
+ Remote mode is fail-fast: it does not automatically fall back to local mode.
144
+
86
145
  ## Docs
87
146
 
88
147
  - `docs/getting-started.md`
89
148
  - `docs/api-reference.md`
149
+ - `docs/remote-integration.md`
90
150
  - `docs/html-cleaning.md`
91
151
  - `docs/selectors.md`
92
152
  - `docs/live-web-tests.md`
package/bin/opensteer.mjs CHANGED
@@ -1,22 +1,32 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- import { connect } from 'net'
3
+ import { createHash } from 'crypto'
4
4
  import { spawn } from 'child_process'
5
- import { existsSync, readFileSync, unlinkSync, mkdirSync } from 'fs'
6
- import { join, dirname } from 'path'
7
- import { homedir } from 'os'
5
+ import { existsSync, readFileSync, readdirSync, unlinkSync, writeFileSync } from 'fs'
6
+ import { connect } from 'net'
7
+ import { tmpdir } from 'os'
8
+ import { basename, dirname, join } from 'path'
8
9
  import { fileURLToPath } from 'url'
9
10
 
10
11
  const __dirname = dirname(fileURLToPath(import.meta.url))
11
-
12
- const RUNTIME_DIR = join(homedir(), '.opensteer')
13
- const SOCKET_PATH = join(RUNTIME_DIR, 'opensteer.sock')
14
- const PID_PATH = join(RUNTIME_DIR, 'opensteer.pid')
15
12
  const SERVER_SCRIPT = join(__dirname, '..', 'dist', 'cli', 'server.js')
16
13
 
17
14
  const CONNECT_TIMEOUT = 15000
18
15
  const POLL_INTERVAL = 100
19
16
  const RESPONSE_TIMEOUT = 120000
17
+ const RUNTIME_PREFIX = 'opensteer-'
18
+ const SOCKET_SUFFIX = '.sock'
19
+ const PID_SUFFIX = '.pid'
20
+ const CLOSE_ALL_REQUEST = { id: 1, command: 'close', args: {} }
21
+
22
+ function getVersion() {
23
+ try {
24
+ const pkgPath = join(__dirname, '..', 'package.json')
25
+ return JSON.parse(readFileSync(pkgPath, 'utf-8')).version
26
+ } catch {
27
+ return 'unknown'
28
+ }
29
+ }
20
30
 
21
31
  function parseArgs(argv) {
22
32
  const args = argv.slice(2)
@@ -25,6 +35,11 @@ function parseArgs(argv) {
25
35
  process.exit(0)
26
36
  }
27
37
 
38
+ if (args[0] === '--version' || args[0] === '-v') {
39
+ console.log(getVersion())
40
+ process.exit(0)
41
+ }
42
+
28
43
  const command = args[0]
29
44
  const flags = {}
30
45
  const positional = []
@@ -56,10 +71,78 @@ function parseValue(str) {
56
71
  return str
57
72
  }
58
73
 
74
+ function sanitizeNamespace(value) {
75
+ const trimmed = String(value || '').trim()
76
+ if (!trimmed || trimmed === '.' || trimmed === '..') {
77
+ return 'default'
78
+ }
79
+
80
+ const replaced = trimmed.replace(/[^a-zA-Z0-9_-]+/g, '_')
81
+ const collapsed = replaced.replace(/_+/g, '_')
82
+ const bounded = collapsed.replace(/^_+|_+$/g, '')
83
+
84
+ return bounded || 'default'
85
+ }
86
+
87
+ function getActiveNamespacePath() {
88
+ const hash = createHash('md5').update(process.cwd()).digest('hex').slice(0, 16)
89
+ return join(tmpdir(), `${RUNTIME_PREFIX}active-${hash}`)
90
+ }
91
+
92
+ function readActiveNamespace() {
93
+ try {
94
+ const filePath = getActiveNamespacePath()
95
+ if (!existsSync(filePath)) return null
96
+ const ns = readFileSync(filePath, 'utf-8').trim()
97
+ return ns || null
98
+ } catch {
99
+ return null
100
+ }
101
+ }
102
+
103
+ function writeActiveNamespace(namespace) {
104
+ try {
105
+ writeFileSync(getActiveNamespacePath(), namespace)
106
+ } catch { /* best-effort */ }
107
+ }
108
+
109
+ function resolveNamespace(flags) {
110
+ if (flags.name !== undefined && String(flags.name).trim().length > 0) {
111
+ return { namespace: sanitizeNamespace(String(flags.name)), source: 'flag' }
112
+ }
113
+
114
+ if (
115
+ typeof process.env.OPENSTEER_NAME === 'string' &&
116
+ process.env.OPENSTEER_NAME.trim().length > 0
117
+ ) {
118
+ return { namespace: sanitizeNamespace(process.env.OPENSTEER_NAME), source: 'env' }
119
+ }
120
+
121
+ const active = readActiveNamespace()
122
+ if (active && isServerRunning(active)) {
123
+ return { namespace: active, source: 'active' }
124
+ }
125
+
126
+ const cwdBase = basename(process.cwd())
127
+ if (cwdBase && cwdBase !== '.' && cwdBase !== '/') {
128
+ return { namespace: sanitizeNamespace(cwdBase), source: 'cwd' }
129
+ }
130
+
131
+ return { namespace: 'default', source: 'default' }
132
+ }
133
+
134
+ function getSocketPath(namespace) {
135
+ return join(tmpdir(), `${RUNTIME_PREFIX}${namespace}${SOCKET_SUFFIX}`)
136
+ }
137
+
138
+ function getPidPath(namespace) {
139
+ return join(tmpdir(), `${RUNTIME_PREFIX}${namespace}${PID_SUFFIX}`)
140
+ }
141
+
59
142
  function buildRequest(command, flags, positional) {
60
143
  const id = 1
61
144
  const globalFlags = {}
62
- for (const key of ['name', 'headless', 'json', 'cdp-url', 'channel', 'user-data-dir']) {
145
+ for (const key of ['headless', 'json', 'connect-url', 'channel', 'profile-dir']) {
63
146
  if (key in flags) {
64
147
  globalFlags[key] = flags[key]
65
148
  delete flags[key]
@@ -165,38 +248,66 @@ function buildRequest(command, flags, positional) {
165
248
  return { id, command, args }
166
249
  }
167
250
 
168
- function isServerRunning() {
169
- if (!existsSync(PID_PATH)) return false
251
+ function readPid(pidPath) {
252
+ if (!existsSync(pidPath)) {
253
+ return null
254
+ }
255
+
256
+ const parsed = Number.parseInt(readFileSync(pidPath, 'utf-8').trim(), 10)
257
+ if (!Number.isInteger(parsed) || parsed <= 0) {
258
+ return null
259
+ }
260
+
261
+ return parsed
262
+ }
263
+
264
+ function isPidAlive(pid) {
170
265
  try {
171
- const pid = parseInt(readFileSync(PID_PATH, 'utf-8').trim(), 10)
172
266
  process.kill(pid, 0)
173
267
  return true
174
268
  } catch {
175
- cleanStaleFiles()
176
269
  return false
177
270
  }
178
271
  }
179
272
 
180
- function cleanStaleFiles() {
273
+ function cleanStaleFiles(namespace) {
181
274
  try {
182
- unlinkSync(SOCKET_PATH)
183
- } catch {}
275
+ unlinkSync(getSocketPath(namespace))
276
+ } catch { }
184
277
  try {
185
- unlinkSync(PID_PATH)
186
- } catch {}
278
+ unlinkSync(getPidPath(namespace))
279
+ } catch { }
187
280
  }
188
281
 
189
- function startServer() {
190
- mkdirSync(RUNTIME_DIR, { recursive: true })
282
+ function isServerRunning(namespace) {
283
+ const pidPath = getPidPath(namespace)
284
+ const pid = readPid(pidPath)
285
+ if (!pid) {
286
+ cleanStaleFiles(namespace)
287
+ return false
288
+ }
289
+
290
+ if (!isPidAlive(pid)) {
291
+ cleanStaleFiles(namespace)
292
+ return false
293
+ }
294
+
295
+ return existsSync(getSocketPath(namespace))
296
+ }
191
297
 
298
+ function startServer(namespace) {
192
299
  const child = spawn('node', [SERVER_SCRIPT], {
193
300
  detached: true,
194
301
  stdio: ['ignore', 'ignore', 'ignore'],
302
+ env: {
303
+ ...process.env,
304
+ OPENSTEER_NAME: namespace,
305
+ },
195
306
  })
196
307
  child.unref()
197
308
  }
198
309
 
199
- function waitForSocket(timeout) {
310
+ function waitForSocket(socketPath, timeout) {
200
311
  return new Promise((resolve, reject) => {
201
312
  const start = Date.now()
202
313
 
@@ -206,7 +317,7 @@ function waitForSocket(timeout) {
206
317
  return
207
318
  }
208
319
 
209
- if (existsSync(SOCKET_PATH)) {
320
+ if (existsSync(socketPath)) {
210
321
  resolve()
211
322
  return
212
323
  }
@@ -218,9 +329,9 @@ function waitForSocket(timeout) {
218
329
  })
219
330
  }
220
331
 
221
- function sendCommand(request) {
332
+ function sendCommand(socketPath, request) {
222
333
  return new Promise((resolve, reject) => {
223
- const socket = connect(SOCKET_PATH)
334
+ const socket = connect(socketPath)
224
335
  let buffer = ''
225
336
  let settled = false
226
337
 
@@ -270,6 +381,71 @@ function sendCommand(request) {
270
381
  })
271
382
  }
272
383
 
384
+ function listSessions() {
385
+ const sessions = []
386
+ const entries = readdirSync(tmpdir())
387
+
388
+ for (const entry of entries) {
389
+ if (!entry.startsWith(RUNTIME_PREFIX) || !entry.endsWith(PID_SUFFIX)) {
390
+ continue
391
+ }
392
+
393
+ const name = entry.slice(
394
+ RUNTIME_PREFIX.length,
395
+ entry.length - PID_SUFFIX.length
396
+ )
397
+ if (!name) {
398
+ continue
399
+ }
400
+
401
+ const pid = readPid(join(tmpdir(), entry))
402
+ if (!pid || !isPidAlive(pid)) {
403
+ cleanStaleFiles(name)
404
+ continue
405
+ }
406
+
407
+ sessions.push({ name, pid })
408
+ }
409
+
410
+ sessions.sort((a, b) => a.name.localeCompare(b.name))
411
+ return sessions
412
+ }
413
+
414
+ async function closeAllSessions() {
415
+ const sessions = listSessions()
416
+ const closed = []
417
+ const failures = []
418
+
419
+ for (const session of sessions) {
420
+ const socketPath = getSocketPath(session.name)
421
+ if (!existsSync(socketPath)) {
422
+ cleanStaleFiles(session.name)
423
+ continue
424
+ }
425
+
426
+ try {
427
+ const response = await sendCommand(socketPath, CLOSE_ALL_REQUEST)
428
+ if (response && response.ok === true) {
429
+ closed.push(session)
430
+ } else {
431
+ failures.push(
432
+ `${session.name}: ${response?.error || 'unknown close error'}`
433
+ )
434
+ }
435
+ } catch (err) {
436
+ failures.push(
437
+ `${session.name}: ${err instanceof Error ? err.message : String(err)}`
438
+ )
439
+ }
440
+ }
441
+
442
+ if (failures.length > 0) {
443
+ throw new Error(`Failed to close sessions: ${failures.join('; ')}`)
444
+ }
445
+
446
+ return closed
447
+ }
448
+
273
449
  function output(data) {
274
450
  process.stdout.write(JSON.stringify(data) + '\n')
275
451
  }
@@ -289,6 +465,11 @@ Navigation:
289
465
  forward Go forward
290
466
  reload Reload page
291
467
  close Close browser and server
468
+ close --all Close all active namespace-scoped servers
469
+
470
+ Sessions:
471
+ sessions List active namespace-scoped sessions
472
+ status Show resolved namespace and session state
292
473
 
293
474
  Observation:
294
475
  snapshot [--mode action] Get page snapshot
@@ -334,40 +515,87 @@ Utility:
334
515
  extract <schema-json> Extract structured data
335
516
 
336
517
  Global Flags:
337
- --name <namespace> Storage namespace (default: "cli")
518
+ --name <namespace> Session namespace (default: CWD basename or OPENSTEER_NAME)
338
519
  --headless Launch browser in headless mode
339
- --cdp-url <url> Connect to running Chrome via CDP (e.g. http://localhost:9222)
520
+ --connect-url <url> Connect to a running browser (e.g. http://localhost:9222)
340
521
  --channel <browser> Use installed browser (chrome, chrome-beta, msedge)
341
- --user-data-dir <path> Chrome profile directory for logged-in sessions
522
+ --profile-dir <path> Browser profile directory for logged-in sessions
342
523
  --element <N> Target element by counter
343
524
  --selector <css> Target element by CSS selector
344
525
  --description <text> Description for selector persistence
345
526
  --help Show this help
527
+ --version, -v Show version
528
+
529
+ Environment:
530
+ OPENSTEER_NAME Default session namespace when --name is omitted
531
+ OPENSTEER_MODE Runtime mode: "local" (default) or "remote"
532
+ OPENSTEER_API_KEY Required when remote mode is selected
533
+ OPENSTEER_BASE_URL Override remote control-plane base URL
346
534
  `)
347
535
  }
348
536
 
349
537
  async function main() {
350
538
  const { command, flags, positional } = parseArgs(process.argv)
539
+ const { namespace, source: namespaceSource } = resolveNamespace(flags)
540
+ const socketPath = getSocketPath(namespace)
541
+
542
+ if (command === 'sessions') {
543
+ output({ ok: true, sessions: listSessions() })
544
+ return
545
+ }
546
+
547
+ if (command === 'status') {
548
+ output({
549
+ ok: true,
550
+ namespace,
551
+ namespaceSource,
552
+ serverRunning: isServerRunning(namespace),
553
+ socketPath,
554
+ sessions: listSessions(),
555
+ })
556
+ return
557
+ }
558
+
559
+ if (command === 'close' && flags.all === true) {
560
+ try {
561
+ const closed = await closeAllSessions()
562
+ output({ ok: true, closed })
563
+ } catch (err) {
564
+ error(err instanceof Error ? err.message : 'Failed to close sessions')
565
+ }
566
+ return
567
+ }
568
+
569
+ delete flags.name
570
+ delete flags.all
351
571
  const request = buildRequest(command, flags, positional)
352
572
 
353
- if (!isServerRunning()) {
573
+ if (!isServerRunning(namespace)) {
574
+ if (command !== 'open') {
575
+ error(
576
+ `No server running for namespace '${namespace}' (resolved from ${namespaceSource}). Run 'opensteer open' first or use 'opensteer sessions' to see active sessions.`
577
+ )
578
+ }
354
579
  if (!existsSync(SERVER_SCRIPT)) {
355
580
  error(
356
581
  `Server script not found: ${SERVER_SCRIPT}. Run the build script first.`
357
582
  )
358
583
  }
359
- startServer()
584
+ startServer(namespace)
360
585
  try {
361
- await waitForSocket(CONNECT_TIMEOUT)
586
+ await waitForSocket(socketPath, CONNECT_TIMEOUT)
362
587
  } catch {
363
588
  error('Failed to start server. Check that the build is complete.')
364
589
  }
365
590
  }
366
591
 
367
592
  try {
368
- const response = await sendCommand(request)
593
+ const response = await sendCommand(socketPath, request)
369
594
 
370
595
  if (response.ok) {
596
+ if (command === 'open') {
597
+ writeActiveNamespace(namespace)
598
+ }
371
599
  output({ ok: true, ...response.result })
372
600
  } else {
373
601
  process.stderr.write(
@@ -376,7 +604,7 @@ async function main() {
376
604
  process.exit(1)
377
605
  }
378
606
  } catch (err) {
379
- error(err.message || 'Connection failed')
607
+ error(err instanceof Error ? err.message : 'Connection failed')
380
608
  }
381
609
  }
382
610