opensteer 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,23 +4,8 @@
4
4
 
5
5
  - Breaking: removed legacy `ai` config from `OpensteerConfig`; use top-level `model` instead.
6
6
  - Breaking: `OPENSTEER_AI_MODEL` is no longer supported; use `OPENSTEER_MODEL`.
7
- - Breaking: `OPENSTEER_RUNTIME` is no longer supported; use `OPENSTEER_MODE`.
8
- - Breaking: mode selection now uses `mode: 'local' | 'remote'` and remote credentials use `remote.apiKey`.
9
7
  - Opensteer now enables built-in LLM resolve/extract by default with model `gpt-5.1`.
10
- - Remote mode now falls back to `OPENSTEER_API_KEY` when `remote.apiKey` is omitted.
11
- - Mutating actions now include smart best-effort post-action wait with per-action
12
- profiles and optional per-call overrides via `wait`.
13
- - Added structured interaction diagnostics via `OpensteerActionError` for
14
- descriptor-aware interaction methods (`click`, `dblclick`, `rightclick`,
15
- `hover`, `input`, `select`, `scroll`, `uploadFile`).
16
- - Added `ActionFailure` types (`ActionFailureCode`, `retryable`,
17
- `classificationSource`, optional `details`) to support programmatic handling
18
- of action failures.
19
- - Added DOM actionability probe + Playwright call-log classification to report
20
- reasons like `BLOCKED_BY_INTERCEPTOR`, `NOT_VISIBLE`, `NOT_EDITABLE`, and
21
- timeout/stale-target cases more accurately.
22
- - Remote action failures now accept optional structured failure details and map
23
- them to `OpensteerActionError` when available.
8
+ - Cloud mode now falls back to `OPENSTEER_API_KEY` when `cloud.key` is omitted.
24
9
 
25
10
  ## 0.1.0
26
11
 
package/README.md CHANGED
@@ -2,14 +2,11 @@
2
2
 
3
3
  Lean browser automation SDK for coding agents and script replay.
4
4
 
5
- `opensteer` provides descriptor-aware actions (`click`, `dblclick`,
6
- `rightclick`, `hover`, `input`, `select`, `scroll`, `extract`,
7
- `extractFromPlan`, `uploadFile`), observation (`snapshot`, `state`,
8
- `screenshot`), navigation (`goto`), and convenience methods for tabs, cookies,
9
- keyboard, element info, and wait.
5
+ `opensteer` wraps only operations that need descriptor resolution (`snapshot`,
6
+ `click`, `dblclick`, `rightclick`, `hover`, `input`, `select`, `scroll`,
7
+ `extract`, `extractFromPlan`, `state`).
10
8
 
11
- For anything not covered, use raw Playwright via `opensteer.page` and
12
- `opensteer.context`.
9
+ Everything else is raw Playwright via `ov.page` and `ov.context`.
13
10
 
14
11
  ## Install
15
12
 
@@ -25,23 +22,23 @@ pnpm add opensteer playwright
25
22
  ```ts
26
23
  import { Opensteer } from "opensteer";
27
24
 
28
- const opensteer = new Opensteer({ name: "my-scraper" }); // defaults to model: 'gpt-5.1'
29
- await opensteer.launch({ headless: false });
25
+ const ov = new Opensteer({ name: "my-scraper" }); // defaults to model: 'gpt-5.1'
26
+ await ov.launch({ headless: false });
30
27
 
31
- await opensteer.goto("https://example.com");
32
- const html = await opensteer.snapshot();
28
+ await ov.page.goto("https://example.com");
29
+ const html = await ov.snapshot();
33
30
 
34
- await opensteer.click({ description: "login-button" });
35
- await opensteer.input({ description: "email", text: "user@example.com" });
36
- await opensteer.page.keyboard.press("Enter");
31
+ await ov.click({ description: "login-button" });
32
+ await ov.input({ description: "email", text: "user@example.com" });
33
+ await ov.page.keyboard.press("Enter");
37
34
 
38
- await opensteer.close();
35
+ await ov.close();
39
36
  ```
40
37
 
41
38
  ## Core Model
42
39
 
43
- - `opensteer.page`: raw Playwright `Page`
44
- - `opensteer.context`: raw Playwright `BrowserContext`
40
+ - `ov.page`: raw Playwright `Page`
41
+ - `ov.context`: raw Playwright `BrowserContext`
45
42
  - Opensteer methods: descriptor-aware operations that can persist selectors
46
43
  - Selector storage: `.opensteer/selectors/<namespace>`
47
44
 
@@ -57,60 +54,14 @@ For actions like `click`/`input`/`hover`/`select`/`scroll`:
57
54
 
58
55
  When steps 2-4 resolve and `description` is provided, the path is persisted.
59
56
 
60
- ## Smart Post-Action Wait
61
-
62
- Mutating actions (`click`, `input`, `select`, `scroll`, etc.) include a
63
- best-effort post-action wait so delayed visual updates are usually settled
64
- before the method resolves.
65
-
66
- You can disable or tune this per call:
67
-
68
- ```ts
69
- await opensteer.click({ description: "Save button", wait: false });
70
-
71
- await opensteer.click({
72
- description: "Save button",
73
- wait: { timeout: 9000, settleMs: 900, includeNetwork: true, networkQuietMs: 400 },
74
- });
75
- ```
76
-
77
- ## Action Failure Diagnostics
78
-
79
- Descriptor-aware interaction methods (`click`, `dblclick`, `rightclick`,
80
- `hover`, `input`, `select`, `scroll`, `uploadFile`) throw
81
- `OpensteerActionError` when an interaction cannot be completed.
82
-
83
- The error includes structured failure metadata for agent/tooling decisions:
84
-
85
- - `error.failure.code` (`ActionFailureCode`)
86
- - `error.failure.message`
87
- - `error.failure.retryable`
88
- - `error.failure.classificationSource`
89
- - `error.failure.details` (for blocker and observation details when available)
90
-
91
- ```ts
92
- import { Opensteer, OpensteerActionError } from "opensteer";
93
-
94
- try {
95
- await opensteer.click({ description: "Save button" });
96
- } catch (err) {
97
- if (err instanceof OpensteerActionError) {
98
- console.error(err.failure.code); // e.g. BLOCKED_BY_INTERCEPTOR
99
- console.error(err.failure.message);
100
- console.error(err.failure.classificationSource);
101
- }
102
- throw err;
103
- }
104
- ```
105
-
106
57
  ## Snapshot Modes
107
58
 
108
59
  ```ts
109
- await opensteer.snapshot(); // action mode (default)
110
- await opensteer.snapshot({ mode: "extraction" });
111
- await opensteer.snapshot({ mode: "clickable" });
112
- await opensteer.snapshot({ mode: "scrollable" });
113
- await opensteer.snapshot({ mode: "full" });
60
+ await ov.snapshot(); // action mode (default)
61
+ await ov.snapshot({ mode: "extraction" });
62
+ await ov.snapshot({ mode: "clickable" });
63
+ await ov.snapshot({ mode: "scrollable" });
64
+ await ov.snapshot({ mode: "full" });
114
65
  ```
115
66
 
116
67
  ## Two Usage Patterns
@@ -126,27 +77,16 @@ Opensteer uses built-in LLM resolve/extract by default. You can override the
126
77
  default model with top-level `model` or `OPENSTEER_MODEL`.
127
78
 
128
79
  ```ts
129
- const opensteer = new Opensteer({
80
+ const ov = new Opensteer({
130
81
  name: "run-mode",
131
82
  model: "gpt-5-mini",
132
83
  });
133
84
  ```
134
85
 
135
- ## Mode Selection
136
-
137
- Opensteer defaults to local mode.
138
-
139
- - `OPENSTEER_MODE=local` runs local Playwright.
140
- - `OPENSTEER_MODE=remote` runs remote mode (requires `OPENSTEER_API_KEY`).
141
- - `mode: "remote"` in constructor config always forces remote mode.
142
-
143
- Remote mode is fail-fast: it does not automatically fall back to local mode.
144
-
145
86
  ## Docs
146
87
 
147
88
  - `docs/getting-started.md`
148
89
  - `docs/api-reference.md`
149
- - `docs/remote-integration.md`
150
90
  - `docs/html-cleaning.md`
151
91
  - `docs/selectors.md`
152
92
  - `docs/live-web-tests.md`
package/bin/opensteer.mjs CHANGED
@@ -1,32 +1,22 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- import { createHash } from 'crypto'
4
- import { spawn } from 'child_process'
5
- import { existsSync, readFileSync, readdirSync, unlinkSync, writeFileSync } from 'fs'
6
3
  import { connect } from 'net'
7
- import { tmpdir } from 'os'
8
- import { basename, dirname, join } from 'path'
4
+ import { spawn } from 'child_process'
5
+ import { existsSync, readFileSync, unlinkSync, mkdirSync } from 'fs'
6
+ import { join, dirname } from 'path'
7
+ import { homedir } from 'os'
9
8
  import { fileURLToPath } from 'url'
10
9
 
11
10
  const __dirname = dirname(fileURLToPath(import.meta.url))
11
+
12
+ const RUNTIME_DIR = join(homedir(), '.opensteer')
13
+ const SOCKET_PATH = join(RUNTIME_DIR, 'opensteer.sock')
14
+ const PID_PATH = join(RUNTIME_DIR, 'opensteer.pid')
12
15
  const SERVER_SCRIPT = join(__dirname, '..', 'dist', 'cli', 'server.js')
13
16
 
14
17
  const CONNECT_TIMEOUT = 15000
15
18
  const POLL_INTERVAL = 100
16
19
  const RESPONSE_TIMEOUT = 120000
17
- const RUNTIME_PREFIX = 'opensteer-'
18
- const SOCKET_SUFFIX = '.sock'
19
- const PID_SUFFIX = '.pid'
20
- const CLOSE_ALL_REQUEST = { id: 1, command: 'close', args: {} }
21
-
22
- function getVersion() {
23
- try {
24
- const pkgPath = join(__dirname, '..', 'package.json')
25
- return JSON.parse(readFileSync(pkgPath, 'utf-8')).version
26
- } catch {
27
- return 'unknown'
28
- }
29
- }
30
20
 
31
21
  function parseArgs(argv) {
32
22
  const args = argv.slice(2)
@@ -35,11 +25,6 @@ function parseArgs(argv) {
35
25
  process.exit(0)
36
26
  }
37
27
 
38
- if (args[0] === '--version' || args[0] === '-v') {
39
- console.log(getVersion())
40
- process.exit(0)
41
- }
42
-
43
28
  const command = args[0]
44
29
  const flags = {}
45
30
  const positional = []
@@ -71,78 +56,10 @@ function parseValue(str) {
71
56
  return str
72
57
  }
73
58
 
74
- function sanitizeNamespace(value) {
75
- const trimmed = String(value || '').trim()
76
- if (!trimmed || trimmed === '.' || trimmed === '..') {
77
- return 'default'
78
- }
79
-
80
- const replaced = trimmed.replace(/[^a-zA-Z0-9_-]+/g, '_')
81
- const collapsed = replaced.replace(/_+/g, '_')
82
- const bounded = collapsed.replace(/^_+|_+$/g, '')
83
-
84
- return bounded || 'default'
85
- }
86
-
87
- function getActiveNamespacePath() {
88
- const hash = createHash('md5').update(process.cwd()).digest('hex').slice(0, 16)
89
- return join(tmpdir(), `${RUNTIME_PREFIX}active-${hash}`)
90
- }
91
-
92
- function readActiveNamespace() {
93
- try {
94
- const filePath = getActiveNamespacePath()
95
- if (!existsSync(filePath)) return null
96
- const ns = readFileSync(filePath, 'utf-8').trim()
97
- return ns || null
98
- } catch {
99
- return null
100
- }
101
- }
102
-
103
- function writeActiveNamespace(namespace) {
104
- try {
105
- writeFileSync(getActiveNamespacePath(), namespace)
106
- } catch { /* best-effort */ }
107
- }
108
-
109
- function resolveNamespace(flags) {
110
- if (flags.name !== undefined && String(flags.name).trim().length > 0) {
111
- return { namespace: sanitizeNamespace(String(flags.name)), source: 'flag' }
112
- }
113
-
114
- if (
115
- typeof process.env.OPENSTEER_NAME === 'string' &&
116
- process.env.OPENSTEER_NAME.trim().length > 0
117
- ) {
118
- return { namespace: sanitizeNamespace(process.env.OPENSTEER_NAME), source: 'env' }
119
- }
120
-
121
- const active = readActiveNamespace()
122
- if (active && isServerRunning(active)) {
123
- return { namespace: active, source: 'active' }
124
- }
125
-
126
- const cwdBase = basename(process.cwd())
127
- if (cwdBase && cwdBase !== '.' && cwdBase !== '/') {
128
- return { namespace: sanitizeNamespace(cwdBase), source: 'cwd' }
129
- }
130
-
131
- return { namespace: 'default', source: 'default' }
132
- }
133
-
134
- function getSocketPath(namespace) {
135
- return join(tmpdir(), `${RUNTIME_PREFIX}${namespace}${SOCKET_SUFFIX}`)
136
- }
137
-
138
- function getPidPath(namespace) {
139
- return join(tmpdir(), `${RUNTIME_PREFIX}${namespace}${PID_SUFFIX}`)
140
- }
141
-
142
59
  function buildRequest(command, flags, positional) {
143
60
  const id = 1
144
61
  const globalFlags = {}
145
- for (const key of ['headless', 'json', 'connect-url', 'channel', 'profile-dir']) {
62
+ for (const key of ['name', 'headless', 'json', 'cdp-url', 'channel', 'user-data-dir']) {
146
63
  if (key in flags) {
147
64
  globalFlags[key] = flags[key]
148
65
  delete flags[key]
@@ -248,66 +165,38 @@ function buildRequest(command, flags, positional) {
248
165
  return { id, command, args }
249
166
  }
250
167
 
251
- function readPid(pidPath) {
252
- if (!existsSync(pidPath)) {
253
- return null
254
- }
255
-
256
- const parsed = Number.parseInt(readFileSync(pidPath, 'utf-8').trim(), 10)
257
- if (!Number.isInteger(parsed) || parsed <= 0) {
258
- return null
259
- }
260
-
261
- return parsed
262
- }
263
-
264
- function isPidAlive(pid) {
168
+ function isServerRunning() {
169
+ if (!existsSync(PID_PATH)) return false
265
170
  try {
171
+ const pid = parseInt(readFileSync(PID_PATH, 'utf-8').trim(), 10)
266
172
  process.kill(pid, 0)
267
173
  return true
268
174
  } catch {
175
+ cleanStaleFiles()
269
176
  return false
270
177
  }
271
178
  }
272
179
 
273
- function cleanStaleFiles(namespace) {
180
+ function cleanStaleFiles() {
274
181
  try {
275
- unlinkSync(getSocketPath(namespace))
276
- } catch { }
182
+ unlinkSync(SOCKET_PATH)
183
+ } catch {}
277
184
  try {
278
- unlinkSync(getPidPath(namespace))
279
- } catch { }
185
+ unlinkSync(PID_PATH)
186
+ } catch {}
280
187
  }
281
188
 
282
- function isServerRunning(namespace) {
283
- const pidPath = getPidPath(namespace)
284
- const pid = readPid(pidPath)
285
- if (!pid) {
286
- cleanStaleFiles(namespace)
287
- return false
288
- }
289
-
290
- if (!isPidAlive(pid)) {
291
- cleanStaleFiles(namespace)
292
- return false
293
- }
294
-
295
- return existsSync(getSocketPath(namespace))
296
- }
189
+ function startServer() {
190
+ mkdirSync(RUNTIME_DIR, { recursive: true })
297
191
 
298
- function startServer(namespace) {
299
192
  const child = spawn('node', [SERVER_SCRIPT], {
300
193
  detached: true,
301
194
  stdio: ['ignore', 'ignore', 'ignore'],
302
- env: {
303
- ...process.env,
304
- OPENSTEER_NAME: namespace,
305
- },
306
195
  })
307
196
  child.unref()
308
197
  }
309
198
 
310
- function waitForSocket(socketPath, timeout) {
199
+ function waitForSocket(timeout) {
311
200
  return new Promise((resolve, reject) => {
312
201
  const start = Date.now()
313
202
 
@@ -317,7 +206,7 @@ function waitForSocket(socketPath, timeout) {
317
206
  return
318
207
  }
319
208
 
320
- if (existsSync(socketPath)) {
209
+ if (existsSync(SOCKET_PATH)) {
321
210
  resolve()
322
211
  return
323
212
  }
@@ -329,9 +218,9 @@ function waitForSocket(socketPath, timeout) {
329
218
  })
330
219
  }
331
220
 
332
- function sendCommand(socketPath, request) {
221
+ function sendCommand(request) {
333
222
  return new Promise((resolve, reject) => {
334
- const socket = connect(socketPath)
223
+ const socket = connect(SOCKET_PATH)
335
224
  let buffer = ''
336
225
  let settled = false
337
226
 
@@ -381,71 +270,6 @@ function sendCommand(socketPath, request) {
381
270
  })
382
271
  }
383
272
 
384
- function listSessions() {
385
- const sessions = []
386
- const entries = readdirSync(tmpdir())
387
-
388
- for (const entry of entries) {
389
- if (!entry.startsWith(RUNTIME_PREFIX) || !entry.endsWith(PID_SUFFIX)) {
390
- continue
391
- }
392
-
393
- const name = entry.slice(
394
- RUNTIME_PREFIX.length,
395
- entry.length - PID_SUFFIX.length
396
- )
397
- if (!name) {
398
- continue
399
- }
400
-
401
- const pid = readPid(join(tmpdir(), entry))
402
- if (!pid || !isPidAlive(pid)) {
403
- cleanStaleFiles(name)
404
- continue
405
- }
406
-
407
- sessions.push({ name, pid })
408
- }
409
-
410
- sessions.sort((a, b) => a.name.localeCompare(b.name))
411
- return sessions
412
- }
413
-
414
- async function closeAllSessions() {
415
- const sessions = listSessions()
416
- const closed = []
417
- const failures = []
418
-
419
- for (const session of sessions) {
420
- const socketPath = getSocketPath(session.name)
421
- if (!existsSync(socketPath)) {
422
- cleanStaleFiles(session.name)
423
- continue
424
- }
425
-
426
- try {
427
- const response = await sendCommand(socketPath, CLOSE_ALL_REQUEST)
428
- if (response && response.ok === true) {
429
- closed.push(session)
430
- } else {
431
- failures.push(
432
- `${session.name}: ${response?.error || 'unknown close error'}`
433
- )
434
- }
435
- } catch (err) {
436
- failures.push(
437
- `${session.name}: ${err instanceof Error ? err.message : String(err)}`
438
- )
439
- }
440
- }
441
-
442
- if (failures.length > 0) {
443
- throw new Error(`Failed to close sessions: ${failures.join('; ')}`)
444
- }
445
-
446
- return closed
447
- }
448
-
449
273
  function output(data) {
450
274
  process.stdout.write(JSON.stringify(data) + '\n')
451
275
  }
@@ -465,11 +289,6 @@ Navigation:
465
289
  forward Go forward
466
290
  reload Reload page
467
291
  close Close browser and server
468
- close --all Close all active namespace-scoped servers
469
-
470
- Sessions:
471
- sessions List active namespace-scoped sessions
472
- status Show resolved namespace and session state
473
292
 
474
293
  Observation:
475
294
  snapshot [--mode action] Get page snapshot
@@ -515,87 +334,40 @@ Utility:
515
334
  extract <schema-json> Extract structured data
516
335
 
517
336
  Global Flags:
518
- --name <namespace> Session namespace (default: CWD basename or OPENSTEER_NAME)
337
+ --name <namespace> Storage namespace (default: "cli")
519
338
  --headless Launch browser in headless mode
520
- --connect-url <url> Connect to a running browser (e.g. http://localhost:9222)
339
+ --cdp-url <url> Connect to running Chrome via CDP (e.g. http://localhost:9222)
521
340
  --channel <browser> Use installed browser (chrome, chrome-beta, msedge)
522
- --profile-dir <path> Browser profile directory for logged-in sessions
341
+ --user-data-dir <path> Chrome profile directory for logged-in sessions
523
342
  --element <N> Target element by counter
524
343
  --selector <css> Target element by CSS selector
525
344
  --description <text> Description for selector persistence
526
345
  --help Show this help
527
- --version, -v Show version
528
-
529
- Environment:
530
- OPENSTEER_NAME Default session namespace when --name is omitted
531
- OPENSTEER_MODE Runtime mode: "local" (default) or "remote"
532
- OPENSTEER_API_KEY Required when remote mode is selected
533
- OPENSTEER_BASE_URL Override remote control-plane base URL
534
346
  `)
535
347
  }
536
348
 
537
349
  async function main() {
538
350
  const { command, flags, positional } = parseArgs(process.argv)
539
- const { namespace, source: namespaceSource } = resolveNamespace(flags)
540
- const socketPath = getSocketPath(namespace)
541
-
542
- if (command === 'sessions') {
543
- output({ ok: true, sessions: listSessions() })
544
- return
545
- }
546
-
547
- if (command === 'status') {
548
- output({
549
- ok: true,
550
- namespace,
551
- namespaceSource,
552
- serverRunning: isServerRunning(namespace),
553
- socketPath,
554
- sessions: listSessions(),
555
- })
556
- return
557
- }
558
-
559
- if (command === 'close' && flags.all === true) {
560
- try {
561
- const closed = await closeAllSessions()
562
- output({ ok: true, closed })
563
- } catch (err) {
564
- error(err instanceof Error ? err.message : 'Failed to close sessions')
565
- }
566
- return
567
- }
568
-
569
- delete flags.name
570
- delete flags.all
571
351
  const request = buildRequest(command, flags, positional)
572
352
 
573
- if (!isServerRunning(namespace)) {
574
- if (command !== 'open') {
575
- error(
576
- `No server running for namespace '${namespace}' (resolved from ${namespaceSource}). Run 'opensteer open' first or use 'opensteer sessions' to see active sessions.`
577
- )
578
- }
353
+ if (!isServerRunning()) {
579
354
  if (!existsSync(SERVER_SCRIPT)) {
580
355
  error(
581
356
  `Server script not found: ${SERVER_SCRIPT}. Run the build script first.`
582
357
  )
583
358
  }
584
- startServer(namespace)
359
+ startServer()
585
360
  try {
586
- await waitForSocket(socketPath, CONNECT_TIMEOUT)
361
+ await waitForSocket(CONNECT_TIMEOUT)
587
362
  } catch {
588
363
  error('Failed to start server. Check that the build is complete.')
589
364
  }
590
365
  }
591
366
 
592
367
  try {
593
- const response = await sendCommand(socketPath, request)
368
+ const response = await sendCommand(request)
594
369
 
595
370
  if (response.ok) {
596
- if (command === 'open') {
597
- writeActiveNamespace(namespace)
598
- }
599
371
  output({ ok: true, ...response.result })
600
372
  } else {
601
373
  process.stderr.write(
@@ -604,7 +376,7 @@ async function main() {
604
376
  process.exit(1)
605
377
  }
606
378
  } catch (err) {
607
- error(err instanceof Error ? err.message : 'Connection failed')
379
+ error(err.message || 'Connection failed')
608
380
  }
609
381
  }
610
382