opensteer 0.9.3 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +158 -165
  2. package/dist/{chunk-UM2Q4JD2.js → chunk-7D45QUZ3.js} +5 -7
  3. package/dist/chunk-7D45QUZ3.js.map +1 -0
  4. package/dist/{chunk-GREXSYNC.js → chunk-7LQL5YUR.js} +578 -224
  5. package/dist/chunk-7LQL5YUR.js.map +1 -0
  6. package/dist/{chunk-2TIVULZY.js → chunk-GSCQQKZZ.js} +53 -9
  7. package/dist/chunk-GSCQQKZZ.js.map +1 -0
  8. package/dist/{chunk-BMPUL66S.js → chunk-T5P2QGZ3.js} +58 -53
  9. package/dist/chunk-T5P2QGZ3.js.map +1 -0
  10. package/dist/{chunk-FIMNKEG5.js → chunk-ZRF7WMS3.js} +4 -4
  11. package/dist/{chunk-FIMNKEG5.js.map → chunk-ZRF7WMS3.js.map} +1 -1
  12. package/dist/cli/bin.cjs +707 -278
  13. package/dist/cli/bin.cjs.map +1 -1
  14. package/dist/cli/bin.js +30 -34
  15. package/dist/cli/bin.js.map +1 -1
  16. package/dist/index.cjs +733 -473
  17. package/dist/index.cjs.map +1 -1
  18. package/dist/index.d.cts +25 -460
  19. package/dist/index.d.ts +25 -460
  20. package/dist/index.js +4 -5
  21. package/dist/local-view/serve-entry.cjs +106 -57
  22. package/dist/local-view/serve-entry.cjs.map +1 -1
  23. package/dist/local-view/serve-entry.js +2 -2
  24. package/dist/opensteer-T2JENADR.js +6 -0
  25. package/dist/{opensteer-IBDPRIEX.js.map → opensteer-T2JENADR.js.map} +1 -1
  26. package/dist/{session-control-IFE3IPS3.js → session-control-M3JD7ZKA.js} +4 -4
  27. package/dist/{session-control-IFE3IPS3.js.map → session-control-M3JD7ZKA.js.map} +1 -1
  28. package/package.json +6 -6
  29. package/skills/opensteer/SKILL.md +134 -95
  30. package/skills/recorder/SKILL.md +43 -48
  31. package/dist/chunk-2TIVULZY.js.map +0 -1
  32. package/dist/chunk-BMPUL66S.js.map +0 -1
  33. package/dist/chunk-GREXSYNC.js.map +0 -1
  34. package/dist/chunk-KCINASQC.js +0 -3
  35. package/dist/chunk-KCINASQC.js.map +0 -1
  36. package/dist/chunk-UM2Q4JD2.js.map +0 -1
  37. package/dist/opensteer-IBDPRIEX.js +0 -6
  38. package/skills/recorder/references/recorder-reference.md +0 -71
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
- import { runLocalViewService } from '../chunk-FIMNKEG5.js';
3
- import '../chunk-BMPUL66S.js';
2
+ import { runLocalViewService } from '../chunk-ZRF7WMS3.js';
3
+ import '../chunk-T5P2QGZ3.js';
4
4
 
5
5
  // src/local-view/serve-entry.ts
6
6
  runLocalViewService().catch((error) => {
@@ -0,0 +1,6 @@
1
+ export { Opensteer } from './chunk-7D45QUZ3.js';
2
+ import './chunk-7LQL5YUR.js';
3
+ import './chunk-GSCQQKZZ.js';
4
+ import './chunk-T5P2QGZ3.js';
5
+ //# sourceMappingURL=opensteer-T2JENADR.js.map
6
+ //# sourceMappingURL=opensteer-T2JENADR.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":[],"names":[],"mappings":"","file":"opensteer-IBDPRIEX.js"}
1
+ {"version":3,"sources":[],"names":[],"mappings":"","file":"opensteer-T2JENADR.js"}
@@ -1,5 +1,5 @@
1
- import { OpensteerBrowserManager } from './chunk-2TIVULZY.js';
2
- import { readLocalViewSessionManifest, readPersistedLocalBrowserSessionRecord, deleteLocalViewSessionManifest } from './chunk-BMPUL66S.js';
1
+ import { OpensteerBrowserManager } from './chunk-GSCQQKZZ.js';
2
+ import { readLocalViewSessionManifest, readPersistedLocalBrowserSessionRecord, deleteLocalViewSessionManifest } from './chunk-T5P2QGZ3.js';
3
3
 
4
4
  // src/local-view/session-control.ts
5
5
  var LocalViewSessionCloseError = class extends Error {
@@ -35,5 +35,5 @@ async function closeLocalViewSessionBrowser(sessionId) {
35
35
  }
36
36
 
37
37
  export { LocalViewSessionCloseError, closeLocalViewSessionBrowser };
38
- //# sourceMappingURL=session-control-IFE3IPS3.js.map
39
- //# sourceMappingURL=session-control-IFE3IPS3.js.map
38
+ //# sourceMappingURL=session-control-M3JD7ZKA.js.map
39
+ //# sourceMappingURL=session-control-M3JD7ZKA.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/local-view/session-control.ts"],"names":[],"mappings":";;;;AAOO,IAAM,0BAAA,GAAN,cAAyC,KAAA,CAAM;AAAA,EACpD,WAAA,CACE,SACS,UAAA,EACT;AACA,IAAA,KAAA,CAAM,OAAO,CAAA;AAFJ,IAAA,IAAA,CAAA,UAAA,GAAA,UAAA;AAGT,IAAA,IAAA,CAAK,IAAA,GAAO,4BAAA;AAAA,EACd;AACF;AAEA,eAAsB,6BAA6B,SAAA,EAAkC;AACnF,EAAA,MAAM,QAAA,GAAW,MAAM,4BAAA,CAA6B,SAAS,CAAA;AAC7D,EAAA,IAAI,CAAC,QAAA,EAAU;AACb,IAAA,MAAM,IAAI,0BAAA,CAA2B,oBAAA,EAAsB,GAAG,CAAA;AAAA,EAChE;AAEA,EAAA,IAAI,QAAA,CAAS,cAAc,OAAA,EAAS;AAClC,IAAA,MAAM,IAAI,0BAAA;AAAA,MACR,wEAAA;AAAA,MACA;AAAA,KACF;AAAA,EACF;AAEA,EAAA,MAAM,MAAA,GAAS,MAAM,sCAAA,CAAuC,QAAA,CAAS,QAAQ,CAAA;AAC7E,EAAA,IACE,CAAC,MAAA,IACD,MAAA,CAAO,GAAA,KAAQ,QAAA,CAAS,GAAA,IACxB,MAAA,CAAO,SAAA,KAAc,QAAA,CAAS,SAAA,IAC9B,MAAA,CAAO,MAAA,KAAW,SAAS,MAAA,EAC3B;AACA,IAAA,MAAM,8BAAA,CAA+B,SAAS,CAAA,CAAE,KAAA,CAAM,MAAM,MAAS,CAAA;AACrE,IAAA,MAAM,IAAI,0BAAA,CAA2B,oBAAA,EAAsB,GAAG,CAAA;AAAA,EAChE;AAEA,EAAA,MAAM,OAAA,GAAU,IAAI,uBAAA,CAAwB;AAAA,IAC1C,UAAU,QAAA,CAAS,QAAA;AAAA,IACnB,GAAI,SAAS,SAAA,KAAc,MAAA,GAAY,EAAC,GAAI,EAAE,SAAA,EAAW,QAAA,CAAS,SAAA,EAAU;AAAA,IAC5E,YAAY,MAAA,CAAO,MAAA;AAAA,IACnB,OAAA,EAAS;AAAA,GACV,CAAA;AACD,EAAA,MAAM,QAAQ,KAAA,EAAM;AACtB","file":"session-control-IFE3IPS3.js","sourcesContent":["import { readPersistedLocalBrowserSessionRecord } from \"../live-session.js\";\nimport { OpensteerBrowserManager } from \"../browser-manager.js\";\nimport {\n deleteLocalViewSessionManifest,\n readLocalViewSessionManifest,\n} from \"./session-manifest.js\";\n\nexport class LocalViewSessionCloseError extends Error {\n constructor(\n message: string,\n readonly statusCode: 404 | 409,\n ) {\n super(message);\n this.name = \"LocalViewSessionCloseError\";\n }\n}\n\nexport async function closeLocalViewSessionBrowser(sessionId: string): Promise<void> {\n const manifest = await readLocalViewSessionManifest(sessionId);\n if (!manifest) {\n throw new LocalViewSessionCloseError(\"Session not found.\", 404);\n }\n\n if (manifest.ownership !== \"owned\") {\n throw new LocalViewSessionCloseError(\n \"Only Opensteer-owned local browsers can be closed from the local view.\",\n 409,\n );\n }\n\n const record = await readPersistedLocalBrowserSessionRecord(manifest.rootPath);\n if (\n !record ||\n record.pid !== manifest.pid ||\n record.startedAt !== manifest.startedAt ||\n record.engine !== manifest.engine\n ) {\n await deleteLocalViewSessionManifest(sessionId).catch(() => undefined);\n throw new LocalViewSessionCloseError(\"Session not found.\", 404);\n }\n\n const manager = new OpensteerBrowserManager({\n rootPath: manifest.rootPath,\n ...(manifest.workspace === undefined ? {} : { workspace: manifest.workspace }),\n engineName: record.engine,\n browser: \"persistent\",\n });\n await manager.close();\n}\n"]}
1
+ {"version":3,"sources":["../src/local-view/session-control.ts"],"names":[],"mappings":";;;;AAOO,IAAM,0BAAA,GAAN,cAAyC,KAAA,CAAM;AAAA,EACpD,WAAA,CACE,SACS,UAAA,EACT;AACA,IAAA,KAAA,CAAM,OAAO,CAAA;AAFJ,IAAA,IAAA,CAAA,UAAA,GAAA,UAAA;AAGT,IAAA,IAAA,CAAK,IAAA,GAAO,4BAAA;AAAA,EACd;AACF;AAEA,eAAsB,6BAA6B,SAAA,EAAkC;AACnF,EAAA,MAAM,QAAA,GAAW,MAAM,4BAAA,CAA6B,SAAS,CAAA;AAC7D,EAAA,IAAI,CAAC,QAAA,EAAU;AACb,IAAA,MAAM,IAAI,0BAAA,CAA2B,oBAAA,EAAsB,GAAG,CAAA;AAAA,EAChE;AAEA,EAAA,IAAI,QAAA,CAAS,cAAc,OAAA,EAAS;AAClC,IAAA,MAAM,IAAI,0BAAA;AAAA,MACR,wEAAA;AAAA,MACA;AAAA,KACF;AAAA,EACF;AAEA,EAAA,MAAM,MAAA,GAAS,MAAM,sCAAA,CAAuC,QAAA,CAAS,QAAQ,CAAA;AAC7E,EAAA,IACE,CAAC,MAAA,IACD,MAAA,CAAO,GAAA,KAAQ,QAAA,CAAS,GAAA,IACxB,MAAA,CAAO,SAAA,KAAc,QAAA,CAAS,SAAA,IAC9B,MAAA,CAAO,MAAA,KAAW,SAAS,MAAA,EAC3B;AACA,IAAA,MAAM,8BAAA,CAA+B,SAAS,CAAA,CAAE,KAAA,CAAM,MAAM,MAAS,CAAA;AACrE,IAAA,MAAM,IAAI,0BAAA,CAA2B,oBAAA,EAAsB,GAAG,CAAA;AAAA,EAChE;AAEA,EAAA,MAAM,OAAA,GAAU,IAAI,uBAAA,CAAwB;AAAA,IAC1C,UAAU,QAAA,CAAS,QAAA;AAAA,IACnB,GAAI,SAAS,SAAA,KAAc,MAAA,GAAY,EAAC,GAAI,EAAE,SAAA,EAAW,QAAA,CAAS,SAAA,EAAU;AAAA,IAC5E,YAAY,MAAA,CAAO,MAAA;AAAA,IACnB,OAAA,EAAS;AAAA,GACV,CAAA;AACD,EAAA,MAAM,QAAQ,KAAA,EAAM;AACtB","file":"session-control-M3JD7ZKA.js","sourcesContent":["import { readPersistedLocalBrowserSessionRecord } from \"../live-session.js\";\nimport { OpensteerBrowserManager } from \"../browser-manager.js\";\nimport {\n deleteLocalViewSessionManifest,\n readLocalViewSessionManifest,\n} from \"./session-manifest.js\";\n\nexport class LocalViewSessionCloseError extends Error {\n constructor(\n message: string,\n readonly statusCode: 404 | 409,\n ) {\n super(message);\n this.name = \"LocalViewSessionCloseError\";\n }\n}\n\nexport async function closeLocalViewSessionBrowser(sessionId: string): Promise<void> {\n const manifest = await readLocalViewSessionManifest(sessionId);\n if (!manifest) {\n throw new LocalViewSessionCloseError(\"Session not found.\", 404);\n }\n\n if (manifest.ownership !== \"owned\") {\n throw new LocalViewSessionCloseError(\n \"Only Opensteer-owned local browsers can be closed from the local view.\",\n 409,\n );\n }\n\n const record = await readPersistedLocalBrowserSessionRecord(manifest.rootPath);\n if (\n !record ||\n record.pid !== manifest.pid ||\n record.startedAt !== manifest.startedAt ||\n record.engine !== manifest.engine\n ) {\n await deleteLocalViewSessionManifest(sessionId).catch(() => undefined);\n throw new LocalViewSessionCloseError(\"Session not found.\", 404);\n }\n\n const manager = new OpensteerBrowserManager({\n rootPath: manifest.rootPath,\n ...(manifest.workspace === undefined ? {} : { workspace: manifest.workspace }),\n engineName: record.engine,\n browser: \"persistent\",\n });\n await manager.close();\n}\n"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opensteer",
3
- "version": "0.9.3",
3
+ "version": "0.9.5",
4
4
  "description": "Opensteer browser automation, replay, and reverse-engineering toolkit.",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -57,7 +57,7 @@
57
57
  "skills": "^1.4.6",
58
58
  "ws": "^8.18.0",
59
59
  "@opensteer/engine-playwright": "0.8.9",
60
- "@opensteer/runtime-core": "0.2.3"
60
+ "@opensteer/runtime-core": "0.2.4"
61
61
  },
62
62
  "optionalDependencies": {
63
63
  "webcrack": "^2.15.1"
@@ -71,12 +71,12 @@
71
71
  }
72
72
  },
73
73
  "devDependencies": {
74
- "@opensteer/browser-core": "0.7.9",
75
- "@opensteer/protocol": "0.8.3",
76
- "@opensteer/engine-abp": "0.8.9"
74
+ "@opensteer/browser-core": "0.7.10",
75
+ "@opensteer/engine-abp": "0.8.9",
76
+ "@opensteer/protocol": "0.8.4"
77
77
  },
78
78
  "scripts": {
79
- "build": "tsup && node ../../scripts/copy-opensteer-local-view-assets.mjs && node ../../scripts/sync-package-skills.mjs",
79
+ "build": "tsup && node ../../scripts/copy-opensteer-local-view-assets.mjs && node ../../scripts/sync-package-skills.mjs && node ../../scripts/sync-package-readme.mjs",
80
80
  "clean": "rimraf dist skills",
81
81
  "typecheck": "tsc --noEmit -p tsconfig.json"
82
82
  }
@@ -6,25 +6,48 @@ argument-hint: "[goal]"
6
6
 
7
7
  # Opensteer
8
8
 
9
- Opensteer gives AI agents a real Chromium browser — local or cloud. Use it when normal code is not enough because the task depends on a live browser session.
9
+ Opensteer gives AI agents a real Chromium browser. Use it when the task depends on a live browser session — clicks, forms, extraction, cookies, network capture, or browser-backed fetch.
10
10
 
11
- Default workflow:
11
+ ## Core Workflow
12
12
 
13
- 1. CLI to explore the site and discover behavior.
14
- 2. Save stable targets with `persist`.
15
- 3. SDK to write the final reusable TypeScript.
13
+ Follow this order. Do not skip steps.
16
14
 
17
- Do not stop at manual exploration if the user needs automation.
15
+ 1. **Open** a browser in a workspace.
16
+ 2. **Snapshot** to see the page and get element numbers.
17
+ 3. **Interact** using element numbers from the latest snapshot. Every action requires `--persist <key>`.
18
+ 4. **Re-snapshot** after navigation or UI changes before reusing element numbers.
19
+ 5. **Extract** data using a template with `--persist <key>`.
20
+ 6. **Write SDK code** that replays persisted targets — no templates in the SDK, only persist keys.
21
+ 7. **Close** the browser when done.
18
22
 
19
- ## Setup
23
+ ```bash
24
+ opensteer open https://example.com --workspace demo
25
+ opensteer snapshot action --workspace demo
26
+ opensteer input 5 "laptop" --workspace demo --press-enter --persist "search input"
27
+ opensteer click 7 --workspace demo --persist "search button"
28
+ opensteer snapshot extraction --workspace demo
29
+ opensteer extract '{"items":[{"title":13,"price":14},{"title":22,"price":23},{"title":31,"price":32}]}' --workspace demo --persist "search results"
30
+ ```
31
+
32
+ ```ts
33
+ import { Opensteer } from "opensteer";
34
+
35
+ const opensteer = new Opensteer({ workspace: "demo", rootDir: process.cwd() });
36
+
37
+ await opensteer.open("https://example.com");
38
+ await opensteer.input({ persist: "search input", text: "laptop", pressEnter: true });
39
+ await opensteer.click({ persist: "search button" });
40
+ const data = await opensteer.extract({ persist: "search results" });
41
+ await opensteer.close();
42
+ ```
20
43
 
21
- Install the Opensteer skill so the coding agent can use it:
44
+ ## Setup
22
45
 
23
46
  ```bash
24
47
  opensteer skills install
25
48
  ```
26
49
 
27
- This registers the skill with the agent's tool system. Only needed once per environment.
50
+ Only needed once per environment.
28
51
 
29
52
  ## When To Use
30
53
 
@@ -37,18 +60,6 @@ This registers the skill with the agent's tool system. Only needed once per envi
37
60
 
38
61
  If the user wants to manually drive a browser and record the flow, use the `recorder` skill instead.
39
62
 
40
- ## Core Rules
41
-
42
- 1. Always use a workspace for stateful commands: `--workspace <id>` or `OPENSTEER_WORKSPACE`.
43
- 2. In this repo, prefer `pnpm run opensteer:local -- <command>` instead of bare `opensteer ...`.
44
- 3. Re-snapshot after navigation or big UI changes before reusing element numbers.
45
- 4. CLI to discover, SDK for the final implementation.
46
- 5. Use `persist` for stable reusable targets and extraction payloads.
47
- 6. Use `exec` for SDK code and API experiments. Use `evaluate` only for page-context JavaScript.
48
- 7. If `fetch()` fails with auth errors, inspect `state()`, `cookies()`, and `storage()` before changing transport.
49
- 8. Keep output simple. Prefer ordinary TypeScript with `Opensteer`, no extra abstraction.
50
- 9. Close the browser when done. Do not leave headed browsers running. Use `opensteer browser delete --workspace <id>` or SDK cleanup when the session does not need to stay open.
51
-
52
63
  ## Choose A Path
53
64
 
54
65
  ```
@@ -68,26 +79,86 @@ What does the task need?
68
79
 
69
80
  Use this when the goal is clicking, typing, navigating, or extracting visible data.
70
81
 
71
- ### CLI exploration
82
+ ### Persist is required
83
+
84
+ Every `click`, `hover`, `input`, `scroll`, and `extract` command requires `--persist <key>`. This saves a stable element descriptor so the action is replayable across sessions. Name the key after what the element is:
72
85
 
73
86
  ```bash
74
- opensteer open https://example.com --workspace demo
75
- opensteer snapshot action --workspace demo
76
- opensteer input 5 "laptop" --workspace demo --press-enter --persist "search input"
77
87
  opensteer click 7 --workspace demo --persist "search button"
78
- opensteer snapshot extraction --workspace demo
79
- opensteer extract '{"items":[{"name":{"element":13},"price":{"element":14}}]}' \
80
- --workspace demo \
81
- --persist "search results"
88
+ opensteer input 5 "laptop" --workspace demo --press-enter --persist "search input"
89
+ opensteer scroll down 500 --workspace demo --persist "page scroll"
82
90
  ```
83
91
 
84
- Element numbers come from `c="N"` markers in the snapshot HTML.
92
+ ### Element numbers
93
+
94
+ Element numbers come from `c="N"` markers in the snapshot HTML. They are only valid for the current snapshot. After navigation or DOM changes, snapshot again to get fresh numbers.
95
+
96
+ ```bash
97
+ opensteer snapshot action --workspace demo # for interactions
98
+ opensteer snapshot extraction --workspace demo # for data extraction
99
+ ```
100
+
101
+ Read the full snapshot output. Do not pipe it through `head`, `grep`, or `sed` — filtering destroys the structural context you need to identify which elements belong to the same card.
102
+
103
+ ### Extraction templates
104
+
105
+ The `extract` command takes a JSON template that describes the fields in one or more items. Opensteer merges the structural pattern across all provided examples and generalizes to every matching item on the page.
106
+
107
+ **Template format:**
108
+
109
+ - Bare number: `13` reads text content of element `c="13"`.
110
+ - Object with attribute: `{"c": 13, "attr": "href"}` reads an attribute from that element.
111
+ - Selector: `{"selector": "#price"}` targets by CSS selector.
112
+ - Page source: `{"source": "current_url"}` reads page metadata.
113
+
114
+ **How many items to include:**
115
+
116
+ **Lazy — 3 items from 3 different positions (recommended for reusable SDK descriptors).** Give one entry per card for 3 different cards. Opensteer compares the 3 examples, cancels out position noise, and produces a descriptor that matches all similar items. Use this when the goal is a persist key the SDK can replay later.
117
+
118
+ ```bash
119
+ opensteer extract '{
120
+ "products": [
121
+ {"title": 47, "price": 51, "url": {"c": 47, "attr": "href"}},
122
+ {"title": 62, "price": 66, "url": {"c": 62, "attr": "href"}},
123
+ {"title": 78, "price": 83, "url": {"c": 78, "attr": "href"}}
124
+ ]
125
+ }' --workspace demo --persist "search results"
126
+ ```
127
+
128
+ **Eager — all visible items (use when you need the full data immediately).** Include every item visible in the snapshot. This returns all data in one shot from the current session. The descriptor is still saved under `--persist` and can be replayed, but the generalization is weaker than the 3-item approach.
129
+
130
+ ```bash
131
+ opensteer extract '{
132
+ "products": [
133
+ {"title": 47, "price": 51, "url": {"c": 47, "attr": "href"}},
134
+ {"title": 62, "price": 66, "url": {"c": 62, "attr": "href"}},
135
+ {"title": 78, "price": 83, "url": {"c": 78, "attr": "href"}},
136
+ ...continue for every visible card...
137
+ ]
138
+ }' --workspace demo --persist "search results"
139
+ ```
140
+
141
+ **Rule: all fields in each array entry must come from the same card.** Never take a field from card 1 and another field from card 2 within the same entry — that produces a broken descriptor.
142
+
143
+ Wrong — title from card 1, price from card 2 mixed in the same entry:
144
+
145
+ ```bash
146
+ # DO NOT DO THIS — fields across cards in one entry
147
+ opensteer extract '{"products":[{"title":47,"price":66}]}' --workspace demo --persist "search results"
148
+ ```
149
+
150
+ For non-array fields at the top level, point to the elements directly:
151
+
152
+ ```bash
153
+ opensteer extract '{"pageTitle":3,"totalResults":8,"url":{"source":"current_url"}}' \
154
+ --workspace demo --persist "page metadata"
155
+ ```
85
156
 
86
157
  ### SDK implementation
87
158
 
88
- ```ts
89
- import { Opensteer } from "opensteer";
159
+ The SDK `extract()` method replays a previously persisted template. It does not accept inline templates — those belong in the CLI exploration phase.
90
160
 
161
+ ```ts
91
162
  const opensteer = new Opensteer({ workspace: "demo", rootDir: process.cwd() });
92
163
 
93
164
  await opensteer.open("https://example.com");
@@ -96,7 +167,7 @@ await opensteer.click({ persist: "search button" });
96
167
  const data = await opensteer.extract({ persist: "search results" });
97
168
  ```
98
169
 
99
- Use `selector` in SDK code only when a stable CSS selector is cleaner than `persist`.
170
+ Use `selector` in SDK action code only when a stable CSS selector is cleaner than persist.
100
171
 
101
172
  ## Network Path
102
173
 
@@ -107,7 +178,7 @@ Use this when the goal is to find or replay a site API.
107
178
  ```bash
108
179
  opensteer open https://example.com --workspace demo
109
180
  opensteer goto https://example.com/search --workspace demo --capture-network page-load
110
- opensteer input 5 "laptop" --workspace demo --press-enter --capture-network search
181
+ opensteer input 5 "laptop" --workspace demo --press-enter --persist "search input" --capture-network search
111
182
  opensteer network query --workspace demo --capture search --json
112
183
  opensteer network detail rec_123 --workspace demo --probe
113
184
  ```
@@ -175,10 +246,7 @@ Flags: `--inline`, `--external`, `--dynamic`, `--workers` to filter by source ty
175
246
  ### Beautify and deobfuscate
176
247
 
177
248
  ```bash
178
- # Format minified code
179
249
  opensteer scripts beautify <artifactId> --workspace demo --persist
180
-
181
- # Deobfuscate packed/obfuscated code
182
250
  opensteer scripts deobfuscate <artifactId> --workspace demo --persist
183
251
  ```
184
252
 
@@ -209,7 +277,7 @@ opensteer scripts sandbox art_ghi789 --workspace demo
209
277
 
210
278
  ## Computer-Use
211
279
 
212
- Use this only when DOM targeting is not enough.
280
+ Use this only when DOM targeting is not enough — canvas, WebGL, or elements that cannot be reached by selector.
213
281
 
214
282
  ```bash
215
283
  opensteer computer click 245 380 --workspace demo --capture-network action
@@ -231,11 +299,10 @@ After coordinate-based actions, switch back to normal extraction or request anal
231
299
  Use when handling OAuth popups, multi-page flows, or any task that opens new tabs.
232
300
 
233
301
  ```bash
234
- opensteer tab list --workspace demo # List all open tabs
235
- opensteer tab new https://example.com --workspace demo # Open new tab
302
+ opensteer tab list --workspace demo
303
+ opensteer tab new https://example.com --workspace demo
236
304
  opensteer tab 2 --workspace demo # Switch to tab 2
237
- opensteer tab close 3 --workspace demo # Close tab 3
238
- opensteer tab close --workspace demo # Close current tab
305
+ opensteer tab close 3 --workspace demo
239
306
  ```
240
307
 
241
308
  ```ts
@@ -251,11 +318,11 @@ Re-snapshot after switching tabs — element numbers are per-page.
251
318
 
252
319
  Each workspace has one browser. Three modes:
253
320
 
254
- | Mode | What it does | Data persists? |
255
- | ------------------------ | ----------------------------------------------- | --------------------------------------------------------------------- |
256
- | **Persistent** (default) | Browser tied to workspace, survives restarts | Yes — cookies, localStorage, logins, history, extensions all retained |
257
- | **Temporary** | Headless browser in `/tmp`, cleaned up on close | No |
258
- | **Attach** | Connects to an already-running browser via CDP | Depends on that browser |
321
+ | Mode | What it does | Data persists? |
322
+ | ------------------------ | ----------------------------------------------- | ----------------------- |
323
+ | **Persistent** (default) | Browser tied to workspace, survives restarts | Yes |
324
+ | **Temporary** | Headless browser in `/tmp`, cleaned up on close | No |
325
+ | **Attach** | Connects to an already-running browser via CDP | Depends on that browser |
259
326
 
260
327
  ### Headless vs headed
261
328
 
@@ -267,99 +334,69 @@ opensteer open https://example.com --workspace demo --headless false
267
334
 
268
335
  Use headed mode for debugging or when the user wants to watch. For hands-free automation, keep headless and use `opensteer view` if a human needs to observe.
269
336
 
270
- ### Persistent sessions
271
-
272
- When you `opensteer open` with a workspace, the browser's full Chrome user-data directory lives at `~/.opensteer/workspaces/<id>/browser/user-data/`. Everything Chrome normally persists (cookies, localStorage, IndexedDB, history, extensions) survives between runs.
273
-
274
- Re-running `opensteer open --workspace demo` reconnects to the existing browser if it's still alive, or launches a fresh one with the same profile if it died.
275
-
276
337
  ### Profile cloning
277
338
 
278
339
  Clone a real user's Chrome profile to start a workspace with their logins already active:
279
340
 
280
341
  ```bash
281
- # Discover available local browsers and profiles
282
342
  opensteer browser discover
283
-
284
- # Clone a profile into a workspace
285
343
  opensteer browser clone --workspace demo \
286
344
  --source-user-data-dir "$HOME/Library/Application Support/Google/Chrome" \
287
345
  --source-profile-directory Default
288
346
  ```
289
347
 
290
- This copies cookies, localStorage, extensions, and settings from the source browser. Caches and lock files are skipped. The source browser does not need to be closed — cloning while running is safe.
291
-
292
- ### Attach to an existing browser
293
-
294
- ```bash
295
- opensteer open https://example.com --workspace demo --attach-endpoint http://localhost:9222
296
- ```
348
+ This copies cookies, localStorage, extensions, and settings. The source browser does not need to be closed.
297
349
 
298
350
  ### Workspace lifecycle
299
351
 
300
352
  ```bash
301
- opensteer browser status --workspace demo # Check if browser is running
353
+ opensteer browser status --workspace demo
302
354
  opensteer browser reset --workspace demo # Wipe browser data, keep workspace
303
355
  opensteer browser delete --workspace demo # Delete workspace entirely
304
356
  ```
305
357
 
306
358
  ## Cloud Mode
307
359
 
308
- Run the browser on Opensteer's cloud infrastructure instead of locally. Use cloud mode when you need browsers that run 24/7, or when the local machine should not run Chromium.
309
-
310
- ### Setup
360
+ Run the browser on Opensteer's cloud infrastructure instead of locally.
311
361
 
312
362
  ```bash
313
- export OPENSTEER_API_KEY=osk_your_key_here # Required
314
- export OPENSTEER_PROVIDER=cloud # Or use --provider cloud per command
363
+ export OPENSTEER_API_KEY=osk_your_key_here
364
+ export OPENSTEER_PROVIDER=cloud
315
365
  ```
316
366
 
317
- ### Usage
318
-
319
367
  All CLI commands work the same with `--provider cloud`:
320
368
 
321
369
  ```bash
322
370
  opensteer open https://example.com --workspace demo --provider cloud
323
371
  opensteer snapshot action --workspace demo
324
- opensteer click 5 --workspace demo
372
+ opensteer click 5 --workspace demo --persist "nav link"
325
373
  ```
326
374
 
327
- ### Export local browser profile to cloud
328
-
329
- Sync a local browser's cookies to a cloud browser profile so the cloud session starts logged in:
375
+ Export a local profile to cloud:
330
376
 
331
377
  ```bash
332
- # Reads cookies from local Chrome, decrypts them, uploads to cloud
333
378
  opensteer browser clone --workspace demo \
334
379
  --source-user-data-dir "$HOME/Library/Application Support/Google/Chrome" \
335
380
  --source-profile-directory Default \
336
381
  --provider cloud
337
382
  ```
338
383
 
339
- The cookies are extracted from the local SQLite database, decrypted, packaged into a portable format, and uploaded. The cloud browser then starts with those cookies applied.
340
-
341
384
  ## Local View
342
385
 
343
- When Opensteer runs headless, a human cannot see what the browser is doing. Local view streams live screenshots from headless sessions to a browser-based viewer.
386
+ Stream live screenshots from headless sessions to a browser-based viewer.
344
387
 
345
388
  ```bash
346
389
  opensteer view # Start viewer service, print URL
347
390
  opensteer view stop # Stop the viewer service
348
- opensteer view --auto # Auto-start viewer on every browser launch
349
- opensteer view --no-auto # Only start viewer when manually requested
391
+ opensteer view --auto # Auto-start on every browser launch
392
+ opensteer view --no-auto # Only start when manually requested
350
393
  ```
351
394
 
352
- The viewer is a local web UI that shows:
353
-
354
- - Live JPEG stream of the active browser tab
355
- - Tab bar with switching
356
- - Navigation controls (back, forward, reload, URL bar)
357
-
358
- Local view is a passive observer — it connects to the browser's existing CDP endpoint. Starting or stopping it has zero impact on running browser sessions.
395
+ Local view is a passive observer. Starting or stopping it has zero impact on running sessions.
359
396
 
360
397
  ## Interaction Capture & Replay
361
398
 
362
- Record a trace of browser interactions (clicks, typing, network, DOM changes) and replay them deterministically. Useful for building repeatable test flows or comparing behavior across runs.
399
+ Record browser interactions and replay them deterministically.
363
400
 
364
401
  ```bash
365
402
  opensteer interaction capture --workspace demo --key "login-flow" --duration 30000
@@ -376,14 +413,11 @@ Commands that use `--persist` save artifacts to the workspace. Read them back wi
376
413
  opensteer artifact read <artifactId> --workspace demo
377
414
  ```
378
415
 
379
- Artifacts are created by `extract --persist`, `scripts capture --persist`, `scripts beautify --persist`, and other persist-enabled commands.
380
-
381
- ## Useful SDK Surface
416
+ ## SDK Surface
382
417
 
383
418
  - `open(url)`, `goto(url, { captureNetwork? })`, `close()`
384
- - `snapshot("action" | "extraction")`
385
419
  - `click()`, `hover()`, `input()`, `scroll()`
386
- - `extract()`
420
+ - `extract({ persist })` — replay-only, no inline templates
387
421
  - `listPages()`, `newPage()`, `activatePage()`, `closePage()`
388
422
  - `network.query()`, `network.detail()`
389
423
  - `waitForPage()`
@@ -396,8 +430,13 @@ Artifacts are created by `extract --persist`, `scripts capture --persist`, `scri
396
430
 
397
431
  ## Guardrails
398
432
 
399
- - Snapshot before using element numbers. Snapshot again after UI changes.
433
+ - Always snapshot before using element numbers. Snapshot again after UI changes.
434
+ - Always include `--persist <key>` on click, hover, input, scroll, and extract.
435
+ - Extraction templates: use 3 items from 3 different positions for reusable descriptors; use all visible items when you need the full data immediately. All fields in each array entry must come from the same card/row.
436
+ - Do not pass templates to the SDK `extract()` — use persist keys only.
437
+ - Re-snapshot after navigation before reusing element numbers.
400
438
  - Do not use `evaluate` for API work — use `exec` or `fetch`.
439
+ - If `fetch()` fails with auth errors, check `state()`, `cookies()`, `storage()` first.
401
440
  - Do not keep the result as a manual-only workflow if the user needs reusable automation.
402
441
  - Prefer a small final script over a large framework.
403
442
  - Close browsers when done. Do not leave headed browser windows open.
@@ -1,19 +1,25 @@
1
1
  ---
2
- name: recorder
3
- description: Use when the user wants to record a live browser workflow and turn it into a replayable Opensteer script. Prefer this for manual browser capture, multi-tab recording, and record-then-rerun workflows with the Opensteer CLI.
4
- argument-hint: "[url]"
2
+ name: "recorder"
3
+ description: "Use when the user wants to record a live browser workflow and turn it into a replayable script or API. Prefer this for manual browser capture, multi-tab recording, and record-then-rerun workflows with the Opensteer CLI."
5
4
  ---
6
5
 
7
6
  # Recorder
8
7
 
9
- Use the Opensteer recorder when the user wants to perform a real browser flow manually and save it as a deterministic replay script.
8
+ Record a real browser flow performed manually by the user and save it as a deterministic replay script. Do not use this when the user wants programmatic browser automation without manual recording — use the `opensteer` skill instead.
10
9
 
11
- ## Inputs
10
+ ## Prerequisites
12
11
 
13
- - `url`: starting URL to open
14
- - `workspace`: target Opensteer workspace id
15
- - `provider`: `local` or `cloud`
16
- - optional `output`: explicit output path
12
+ Verify the CLI is available:
13
+
14
+ ```bash
15
+ command -v opensteer >/dev/null 2>&1 && echo "ok" || echo "opensteer not found"
16
+ ```
17
+
18
+ For cloud recording, verify environment variables are set:
19
+
20
+ ```bash
21
+ test -n "$OPENSTEER_BASE_URL" && test -n "$OPENSTEER_API_KEY" && test -n "$OPENSTEER_CLOUD_APP_BASE_URL" && echo "ok" || echo "missing cloud env vars"
22
+ ```
17
23
 
18
24
  ## Quick Start
19
25
 
@@ -29,56 +35,45 @@ Cloud recording:
29
35
  opensteer record --provider cloud --workspace <id> --url <url>
30
36
  ```
31
37
 
32
- Cloud recording requires:
33
-
34
- - `OPENSTEER_BASE_URL`
35
- - `OPENSTEER_API_KEY`
36
- - `OPENSTEER_CLOUD_APP_BASE_URL`
37
-
38
38
  ## Mode Selection
39
39
 
40
- - Use `provider=local` when the user wants to interact with a local Playwright browser window.
41
- - Use `provider=cloud` when the user wants to interact through the cloud browser session UI.
42
- - Keep local recording on the default headed persistent browser flow.
43
- - In cloud mode, do not force `headless=false`. Use the normal cloud launch behavior unless the user explicitly overrides it.
40
+ - Use `provider=local` when the user wants to interact with a local Playwright browser window. Local requires a headed, persistent browser. Do not pass `--headless true`.
41
+ - Use `provider=cloud` when the user wants to interact through the cloud browser session UI. Do not force `headless=false` in cloud mode. Cloud does not support `browser.mode="attach"`.
44
42
 
45
43
  ## Workflow
46
44
 
47
- 1. Start the recorder with `opensteer record`.
48
- 2. If the provider is `cloud`, give the user the browser session URL printed by the CLI.
45
+ 1. Run `opensteer record --workspace <id> --url <url>` (add `--provider cloud` for cloud).
46
+ 2. If cloud, give the user the browser session URL printed by the CLI.
49
47
  3. Tell the user to perform the workflow manually.
50
48
  4. Tell the user exactly how to stop:
51
- - local: click the injected `Stop recording` button in the browser page
52
- - cloud: click `Stop recording` in the browser session toolbar UI
53
- 5. Wait for the recorder process to finish. Do not assume recording is complete just because the browser URL was printed.
54
- 6. Only after the CLI exits, read the generated script from disk and inspect what was captured.
55
- 7. If the user wants verification, rerun the generated script instead of only reviewing the file.
56
-
57
- ## Guardrails
49
+ - Local: click the injected **Stop recording** button in the browser page.
50
+ - Cloud: click **Stop recording** in the browser session toolbar UI.
51
+ 5. Keep the `record` command alive while the user is recording. Do not interrupt it. Do not stop with `Ctrl+C` unless the user explicitly wants to abort.
52
+ 6. Wait for the CLI process to exit. Do not assume recording is complete just because the browser URL was printed.
53
+ 7. Verify the output file exists at `.opensteer/workspaces/<id>/recorded-flow.ts` (or the `--output` path if specified).
54
+ 8. Read and summarize the generated script before editing it.
55
+ 9. If the user wants verification, replay the script: `npx tsx <path-to-recorded-flow.ts>`.
58
56
 
59
- - Recording requires `engine=playwright`.
60
- - Local recording only supports a persistent browser.
61
- - Local recording requires a headed browser. Do not pass `--headless true` in local mode.
62
- - Cloud recording does not support `browser.mode="attach"`.
63
- - Do not stop recording with `Ctrl+C` unless the user explicitly wants to abort the run.
64
- - Do not use removed timeout flags such as `--record-timeout-ms`.
65
- - If a launch argument value starts with `--`, pass it as `--arg=...`, not `--arg ...`.
66
- - If the flow depends on recorder limits such as iframes, file upload, drag-and-drop, or canvas behavior, read the reference file before promising support.
57
+ ## Limitations
67
58
 
68
- ## Output Contract
59
+ The recorder captures clicks, text entry, key presses, scrolling, select changes, navigation, and multi-tab operations. It does not fully support:
69
60
 
70
- - Default output path: `.opensteer/workspaces/<id>/recorded-flow.ts`
71
- - The CLI writes the replay script locally after recording completes in both local and cloud modes.
72
- - Generated scripts use the public Opensteer SDK surface. Cloud recordings bootstrap `provider.mode = "cloud"` and local recordings bootstrap the workspace-backed local flow.
61
+ - Cross-origin iframes (not recorded)
62
+ - Shadow DOM selectors (best effort)
63
+ - File uploads, drag-and-drop, and canvas interactions
64
+ - Browser back/forward detection (may fall back to direct navigation replay)
73
65
 
74
- ## Agent Guidance
66
+ ## Rules
75
67
 
76
- - Keep the `record` command alive while the user is recording.
77
- - If the user is actively driving the session, avoid mixing in extra agent actions unless they explicitly ask for help recording a combined flow.
78
- - After recording completes, summarize the captured flow before editing it.
79
- - If replay fails, debug the generated script and rerun it instead of re-recording immediately.
68
+ - Recording requires `engine=playwright`.
69
+ - Do not use removed timeout flags such as `--record-timeout-ms`.
70
+ - If a launch argument value starts with `--`, pass it as `--arg=...`, not `--arg ...`.
71
+ - Do not mix in extra agent actions while the user is recording unless they explicitly ask.
72
+ - If replay fails, debug and fix the generated script rather than re-recording immediately.
80
73
 
81
- ## References
74
+ ## Troubleshooting
82
75
 
83
- - [Recorder Reference](references/recorder-reference.md)
84
- - [Opensteer Skill](../opensteer/SKILL.md)
76
+ - **Recorder fails to start**: verify the workspace ID is valid and the browser engine is playwright.
77
+ - **CLI exits with an error**: read stderr for the error message before retrying.
78
+ - **Generated script has errors**: inspect and fix the script rather than re-recording.
79
+ - **Output file missing**: check if the user stopped recording correctly (button click, not Ctrl+C).