opensteer 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,8 +4,23 @@
4
4
 
5
5
  - Breaking: removed legacy `ai` config from `OpensteerConfig`; use top-level `model` instead.
6
6
  - Breaking: `OPENSTEER_AI_MODEL` is no longer supported; use `OPENSTEER_MODEL`.
7
+ - Breaking: `OPENSTEER_RUNTIME` is no longer supported; use `OPENSTEER_MODE`.
8
+ - Breaking: mode selection now uses `mode: 'local' | 'remote'` and remote credentials use `remote.apiKey`.
7
9
  - Opensteer now enables built-in LLM resolve/extract by default with model `gpt-5.1`.
8
- - Cloud mode now falls back to `OPENSTEER_API_KEY` when `cloud.key` is omitted.
10
+ - Remote mode now falls back to `OPENSTEER_API_KEY` when `remote.apiKey` is omitted.
11
+ - Mutating actions now include smart best-effort post-action wait with per-action
12
+ profiles and optional per-call overrides via `wait`.
13
+ - Added structured interaction diagnostics via `OpensteerActionError` for
14
+ descriptor-aware interaction methods (`click`, `dblclick`, `rightclick`,
15
+ `hover`, `input`, `select`, `scroll`, `uploadFile`).
16
+ - Added `ActionFailure` types (`ActionFailureCode`, `retryable`,
17
+ `classificationSource`, optional `details`) to support programmatic handling
18
+ of action failures.
19
+ - Added DOM actionability probe + Playwright call-log classification to report
20
+ reasons like `BLOCKED_BY_INTERCEPTOR`, `NOT_VISIBLE`, `NOT_EDITABLE`, and
21
+ timeout/stale-target cases more accurately.
22
+ - Remote action failures now accept optional structured failure details and map
23
+ them to `OpensteerActionError` when available.
9
24
 
10
25
  ## 0.1.0
11
26
 
package/README.md CHANGED
@@ -2,11 +2,14 @@
2
2
 
3
3
  Lean browser automation SDK for coding agents and script replay.
4
4
 
5
- `opensteer` wraps only operations that need descriptor resolution (`snapshot`,
6
- `click`, `dblclick`, `rightclick`, `hover`, `input`, `select`, `scroll`,
7
- `extract`, `extractFromPlan`, `state`).
5
+ `opensteer` provides descriptor-aware actions (`click`, `dblclick`,
6
+ `rightclick`, `hover`, `input`, `select`, `scroll`, `extract`,
7
+ `extractFromPlan`, `uploadFile`), observation (`snapshot`, `state`,
8
+ `screenshot`), navigation (`goto`), and convenience methods for tabs, cookies,
9
+ keyboard, element info, and wait.
8
10
 
9
- Everything else is raw Playwright via `ov.page` and `ov.context`.
11
+ For anything not covered, use raw Playwright via `opensteer.page` and
12
+ `opensteer.context`.
10
13
 
11
14
  ## Install
12
15
 
@@ -22,23 +25,23 @@ pnpm add opensteer playwright
22
25
  ```ts
23
26
  import { Opensteer } from "opensteer";
24
27
 
25
- const ov = new Opensteer({ name: "my-scraper" }); // defaults to model: 'gpt-5.1'
26
- await ov.launch({ headless: false });
28
+ const opensteer = new Opensteer({ name: "my-scraper" }); // defaults to model: 'gpt-5.1'
29
+ await opensteer.launch({ headless: false });
27
30
 
28
- await ov.page.goto("https://example.com");
29
- const html = await ov.snapshot();
31
+ await opensteer.goto("https://example.com");
32
+ const html = await opensteer.snapshot();
30
33
 
31
- await ov.click({ description: "login-button" });
32
- await ov.input({ description: "email", text: "user@example.com" });
33
- await ov.page.keyboard.press("Enter");
34
+ await opensteer.click({ description: "login-button" });
35
+ await opensteer.input({ description: "email", text: "user@example.com" });
36
+ await opensteer.page.keyboard.press("Enter");
34
37
 
35
- await ov.close();
38
+ await opensteer.close();
36
39
  ```
37
40
 
38
41
  ## Core Model
39
42
 
40
- - `ov.page`: raw Playwright `Page`
41
- - `ov.context`: raw Playwright `BrowserContext`
43
+ - `opensteer.page`: raw Playwright `Page`
44
+ - `opensteer.context`: raw Playwright `BrowserContext`
42
45
  - Opensteer methods: descriptor-aware operations that can persist selectors
43
46
  - Selector storage: `.opensteer/selectors/<namespace>`
44
47
 
@@ -54,14 +57,60 @@ For actions like `click`/`input`/`hover`/`select`/`scroll`:
54
57
 
55
58
  When steps 2-4 resolve and `description` is provided, the path is persisted.
56
59
 
60
+ ## Smart Post-Action Wait
61
+
62
+ Mutating actions (`click`, `input`, `select`, `scroll`, etc.) include a
63
+ best-effort post-action wait so delayed visual updates are usually settled
64
+ before the method resolves.
65
+
66
+ You can disable or tune this per call:
67
+
68
+ ```ts
69
+ await opensteer.click({ description: "Save button", wait: false });
70
+
71
+ await opensteer.click({
72
+ description: "Save button",
73
+ wait: { timeout: 9000, settleMs: 900, includeNetwork: true, networkQuietMs: 400 },
74
+ });
75
+ ```
76
+
77
+ ## Action Failure Diagnostics
78
+
79
+ Descriptor-aware interaction methods (`click`, `dblclick`, `rightclick`,
80
+ `hover`, `input`, `select`, `scroll`, `uploadFile`) throw
81
+ `OpensteerActionError` when an interaction cannot be completed.
82
+
83
+ The error includes structured failure metadata for agent/tooling decisions:
84
+
85
+ - `error.failure.code` (`ActionFailureCode`)
86
+ - `error.failure.message`
87
+ - `error.failure.retryable`
88
+ - `error.failure.classificationSource`
89
+ - `error.failure.details` (for blocker and observation details when available)
90
+
91
+ ```ts
92
+ import { Opensteer, OpensteerActionError } from "opensteer";
93
+
94
+ try {
95
+ await opensteer.click({ description: "Save button" });
96
+ } catch (err) {
97
+ if (err instanceof OpensteerActionError) {
98
+ console.error(err.failure.code); // e.g. BLOCKED_BY_INTERCEPTOR
99
+ console.error(err.failure.message);
100
+ console.error(err.failure.classificationSource);
101
+ }
102
+ throw err;
103
+ }
104
+ ```
105
+
57
106
  ## Snapshot Modes
58
107
 
59
108
  ```ts
60
- await ov.snapshot(); // action mode (default)
61
- await ov.snapshot({ mode: "extraction" });
62
- await ov.snapshot({ mode: "clickable" });
63
- await ov.snapshot({ mode: "scrollable" });
64
- await ov.snapshot({ mode: "full" });
109
+ await opensteer.snapshot(); // action mode (default)
110
+ await opensteer.snapshot({ mode: "extraction" });
111
+ await opensteer.snapshot({ mode: "clickable" });
112
+ await opensteer.snapshot({ mode: "scrollable" });
113
+ await opensteer.snapshot({ mode: "full" });
65
114
  ```
66
115
 
67
116
  ## Two Usage Patterns
@@ -77,16 +126,27 @@ Opensteer uses built-in LLM resolve/extract by default. You can override the
77
126
  default model with top-level `model` or `OPENSTEER_MODEL`.
78
127
 
79
128
  ```ts
80
- const ov = new Opensteer({
129
+ const opensteer = new Opensteer({
81
130
  name: "run-mode",
82
131
  model: "gpt-5-mini",
83
132
  });
84
133
  ```
85
134
 
135
+ ## Mode Selection
136
+
137
+ Opensteer defaults to local mode.
138
+
139
+ - `OPENSTEER_MODE=local` runs local Playwright.
140
+ - `OPENSTEER_MODE=remote` runs remote mode (requires `OPENSTEER_API_KEY`).
141
+ - `mode: "remote"` in constructor config always forces remote mode.
142
+
143
+ Remote mode is fail-fast: it does not automatically fall back to local mode.
144
+
86
145
  ## Docs
87
146
 
88
147
  - `docs/getting-started.md`
89
148
  - `docs/api-reference.md`
149
+ - `docs/remote-integration.md`
90
150
  - `docs/html-cleaning.md`
91
151
  - `docs/selectors.md`
92
152
  - `docs/live-web-tests.md`
package/bin/opensteer.mjs CHANGED
@@ -59,7 +59,7 @@ function parseValue(str) {
59
59
  function buildRequest(command, flags, positional) {
60
60
  const id = 1
61
61
  const globalFlags = {}
62
- for (const key of ['name', 'headless', 'json', 'cdp-url', 'channel', 'user-data-dir']) {
62
+ for (const key of ['name', 'headless', 'json', 'connect-url', 'channel', 'profile-dir']) {
63
63
  if (key in flags) {
64
64
  globalFlags[key] = flags[key]
65
65
  delete flags[key]
@@ -180,10 +180,10 @@ function isServerRunning() {
180
180
  function cleanStaleFiles() {
181
181
  try {
182
182
  unlinkSync(SOCKET_PATH)
183
- } catch {}
183
+ } catch { }
184
184
  try {
185
185
  unlinkSync(PID_PATH)
186
- } catch {}
186
+ } catch { }
187
187
  }
188
188
 
189
189
  function startServer() {
@@ -336,13 +336,18 @@ Utility:
336
336
  Global Flags:
337
337
  --name <namespace> Storage namespace (default: "cli")
338
338
  --headless Launch browser in headless mode
339
- --cdp-url <url> Connect to running Chrome via CDP (e.g. http://localhost:9222)
339
+ --connect-url <url> Connect to a running browser (e.g. http://localhost:9222)
340
340
  --channel <browser> Use installed browser (chrome, chrome-beta, msedge)
341
- --user-data-dir <path> Chrome profile directory for logged-in sessions
341
+ --profile-dir <path> Browser profile directory for logged-in sessions
342
342
  --element <N> Target element by counter
343
343
  --selector <css> Target element by CSS selector
344
344
  --description <text> Description for selector persistence
345
345
  --help Show this help
346
+
347
+ Environment:
348
+ OPENSTEER_MODE Runtime mode: "local" (default) or "remote"
349
+ OPENSTEER_API_KEY Required when remote mode is selected
350
+ OPENSTEER_BASE_URL Override remote control-plane base URL
346
351
  `)
347
352
  }
348
353