opensteer 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +2 -3
- package/dist/{chunk-YIQDOALV.js → chunk-AVXUMEDG.js} +3 -2
- package/dist/{chunk-SPHS6YWD.js → chunk-DN3GI5CH.js} +3 -2
- package/dist/{chunk-QHZFY3ZK.js → chunk-FAHE5DB2.js} +79 -20
- package/dist/{chunk-XIH3WGPY.js → chunk-SGZYTGY3.js} +232 -22
- package/dist/cli/server.cjs +312 -43
- package/dist/cli/server.js +1 -1
- package/dist/{extractor-CZFCFUME.js → extractor-4Q3TFZJB.js} +2 -2
- package/dist/index.cjs +316 -43
- package/dist/index.d.cts +13 -4
- package/dist/index.d.ts +13 -4
- package/dist/index.js +8 -4
- package/dist/{resolver-ZREUOOTV.js → resolver-MGN64KCP.js} +2 -2
- package/package.json +1 -1
- package/skills/opensteer/SKILL.md +92 -99
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as playwright from 'playwright';
|
|
2
|
-
import { BrowserContextOptions, Page, BrowserContext,
|
|
2
|
+
import { BrowserContextOptions, Page, BrowserContext, ElementHandle, Cookie, Browser } from 'playwright';
|
|
3
3
|
|
|
4
4
|
type MatchOperator = 'exact' | 'startsWith' | 'contains';
|
|
5
5
|
interface AttributeMatchClause {
|
|
@@ -362,6 +362,7 @@ declare class LocalSelectorStorage {
|
|
|
362
362
|
|
|
363
363
|
declare class Opensteer {
|
|
364
364
|
private readonly config;
|
|
365
|
+
private readonly runtimeEnv;
|
|
365
366
|
private readonly aiResolve;
|
|
366
367
|
private readonly aiExtract;
|
|
367
368
|
private readonly namespace;
|
|
@@ -524,6 +525,8 @@ declare function extractWithPaths(page: Page, fields: FieldSelector[]): Promise<
|
|
|
524
525
|
declare function extractArrayWithPaths(page: Page, array: ArraySelector): Promise<Array<Record<string, unknown>>>;
|
|
525
526
|
declare function extractArrayRowsWithPaths(page: Page, array: ArraySelector): Promise<Array<ArrayExtractedRow>>;
|
|
526
527
|
declare function countArrayItemsWithPath(page: Page, itemParentPath: ElementPath): Promise<number>;
|
|
528
|
+
declare function queryAllByElementPath(page: Page, path: ElementPath): Promise<ElementHandle<Element>[]>;
|
|
529
|
+
declare function buildArrayFieldPathCandidates(path: ElementPath): string[];
|
|
527
530
|
|
|
528
531
|
declare function listTabs(context: BrowserContext, activePage: Page): Promise<TabInfo[]>;
|
|
529
532
|
declare function createTab(context: BrowserContext, url?: string, gotoOptions?: GotoOptions): Promise<{
|
|
@@ -644,17 +647,23 @@ declare function resolveCountersBatch(page: Page, requests: CounterRequest[]): P
|
|
|
644
647
|
declare function normalizeNamespace(input?: string): string;
|
|
645
648
|
declare function resolveNamespaceDir(rootDir: string, namespace: string): string;
|
|
646
649
|
|
|
650
|
+
type RuntimeEnv = Record<string, string | undefined>;
|
|
651
|
+
|
|
647
652
|
declare function createResolveCallback(model: string, options?: {
|
|
648
653
|
temperature?: number;
|
|
649
654
|
maxTokens?: number | null;
|
|
655
|
+
env?: RuntimeEnv;
|
|
650
656
|
}): AiResolveCallback;
|
|
651
657
|
|
|
652
658
|
declare function createExtractCallback(model: string, options?: {
|
|
653
659
|
temperature?: number;
|
|
654
660
|
maxTokens?: number | null;
|
|
661
|
+
env?: RuntimeEnv;
|
|
655
662
|
}): AiExtractCallback;
|
|
656
663
|
|
|
657
|
-
declare function getModelProvider(modelStr: string
|
|
664
|
+
declare function getModelProvider(modelStr: string, options?: {
|
|
665
|
+
env?: RuntimeEnv;
|
|
666
|
+
}): Promise<unknown>;
|
|
658
667
|
|
|
659
668
|
interface AiModelConfig {
|
|
660
669
|
model: string;
|
|
@@ -865,7 +874,7 @@ interface ResolvedAgentConfig {
|
|
|
865
874
|
declare function resolveAgentConfig(args: {
|
|
866
875
|
agentConfig: OpensteerAgentConfig;
|
|
867
876
|
fallbackModel?: string;
|
|
868
|
-
env?:
|
|
877
|
+
env?: RuntimeEnv;
|
|
869
878
|
}): ResolvedAgentConfig;
|
|
870
879
|
declare function createCuaClient(config: ResolvedAgentConfig): CuaClient;
|
|
871
880
|
|
|
@@ -890,4 +899,4 @@ declare class OpensteerCuaAgentHandler {
|
|
|
890
899
|
private maybeRenderCursor;
|
|
891
900
|
}
|
|
892
901
|
|
|
893
|
-
export { type ActionExecutionResult, type ActionFailure, type ActionFailureBlocker, type ActionFailureClassificationSource, type ActionFailureCode, type ActionFailureDetails, type ActionResult, type ActionWaitOptions, ActionWsClient, type AiExtractArgs, type AiExtractCallback, type AiExtractResult, type AiModelConfig, type AiResolveArgs, type AiResolveCallback, type AiResolveCallbackResult, type AiResolveResult, type ArrayExtractedRow, type ArrayRowMetadata, type ArraySelector, type AttributeMatchClause, type BaseActionOptions, type BoundingBox, type ClickOptions, type CloudActionFailure, type CloudActionFailureDetails, type CloudActionMethod, type CloudActionRequest, type CloudActionResponse, type CloudActionSuccess, CloudCdpClient, type CloudCdpConnectArgs, type CloudCdpConnection, type CloudErrorCode, type CloudSelectorCacheImportEntry, type CloudSelectorCacheImportRequest, type CloudSelectorCacheImportResponse, CloudSessionClient, type CloudSessionContractVersion, type CloudSessionCreateRequest, type CloudSessionCreateResponse, type CloudSessionSourceType, type CloudSessionSummary, type ContextHop, type CookieParam, type CounterRequest, CounterResolutionError, type CounterResolutionErrorCode, type DomPath, type ElementPath, ElementPathError, type ElementPathErrorCode, type ExtractFromPlanOptions, type ExtractOptions, type ExtractSchema, type ExtractSchemaField, type ExtractSchemaValue, type ExtractionFieldPlan, type ExtractionPlan, type ExtractionRunResult, type FieldSelector, type FileUploadOptions, type GotoOptions, type HoverOptions, type InputOptions, type LaunchOptions, LocalSelectorStorage, type MarkInteractivityOptions, type MatchClause, type MatchOperator, OPENSTEER_HIDDEN_ATTR, OPENSTEER_INTERACTIVE_ATTR, OPENSTEER_SCROLLABLE_ATTR, OS_BOUNDARY_ATTR, OS_IFRAME_BOUNDARY_TAG, OS_NODE_ID_ATTR, OS_SHADOW_BOUNDARY_TAG, OS_UNAVAILABLE_ATTR, Opensteer, OpensteerActionError, type OpensteerAgentAction, OpensteerAgentActionError, OpensteerAgentApiError, OpensteerAgentBusyError, type OpensteerAgentConfig, OpensteerAgentConfigError, OpensteerAgentError, type OpensteerAgentExecuteOptions, OpensteerAgentExecutionError, type OpensteerAgentInstance, type OpensteerAgentMode, type OpensteerAgentModelConfig, type OpensteerAgentProvider, OpensteerAgentProviderError, type OpensteerAgentResult, type OpensteerAgentUsage, type OpensteerAuthScheme, type OpensteerBrowserConfig, type OpensteerCloudAnnouncePolicy, type OpensteerCloudConfig, OpensteerCloudError, type OpensteerCloudOptions, type OpensteerConfig, OpensteerCuaAgentHandler, type OpensteerStorageConfig, type PathNode, type PathNodePosition, type PositionMatchClause, type PreparedSnapshot, type RegistryEntry, type ResolvedElementPath, type ScreenshotOptions, type ScrollOptions, type SelectOptions, type SelectorFile, type SelectorRegistry, type SerializeOptions, type SerializedNodeMeta, type SerializedPageHTML, type SnapshotMode, type SnapshotOptions, type StateResult, type TabInfo, buildElementPathFromHandle, buildElementPathFromSelector, buildPathSelectorHint, cleanForAction, cleanForClickable, cleanForExtraction, cleanForFull, cleanForScrollable, clearCookies, cloneElementPath, closeTab, cloudNotLaunchedError, cloudSessionContractVersion, cloudUnsupportedMethodError, collectLocalSelectorCacheEntries, countArrayItemsWithPath, createCuaClient, createEmptyRegistry, createExtractCallback, createResolveCallback, createTab, exportCookies, extractArrayRowsWithPaths, extractArrayWithPaths, extractWithPaths, getCookies, getElementAttributes, getElementBoundingBox, getElementText, getElementValue, getModelProvider, getPageHtml, getPageTitle, importCookies, listTabs, markInteractiveElements, normalizeNamespace, performClick, performFileUpload, performHover, performInput, performScroll, performSelect, prepareSnapshot, pressKey, resolveAgentConfig, resolveCounterElement, resolveCountersBatch, resolveElementPath, resolveNamespaceDir, sanitizeElementPath, serializePageHTML, setCookie, switchTab, typeText, waitForVisualStability };
|
|
902
|
+
export { type ActionExecutionResult, type ActionFailure, type ActionFailureBlocker, type ActionFailureClassificationSource, type ActionFailureCode, type ActionFailureDetails, type ActionResult, type ActionWaitOptions, ActionWsClient, type AiExtractArgs, type AiExtractCallback, type AiExtractResult, type AiModelConfig, type AiResolveArgs, type AiResolveCallback, type AiResolveCallbackResult, type AiResolveResult, type ArrayExtractedRow, type ArrayRowMetadata, type ArraySelector, type AttributeMatchClause, type BaseActionOptions, type BoundingBox, type ClickOptions, type CloudActionFailure, type CloudActionFailureDetails, type CloudActionMethod, type CloudActionRequest, type CloudActionResponse, type CloudActionSuccess, CloudCdpClient, type CloudCdpConnectArgs, type CloudCdpConnection, type CloudErrorCode, type CloudSelectorCacheImportEntry, type CloudSelectorCacheImportRequest, type CloudSelectorCacheImportResponse, CloudSessionClient, type CloudSessionContractVersion, type CloudSessionCreateRequest, type CloudSessionCreateResponse, type CloudSessionSourceType, type CloudSessionSummary, type ContextHop, type CookieParam, type CounterRequest, CounterResolutionError, type CounterResolutionErrorCode, type DomPath, type ElementPath, ElementPathError, type ElementPathErrorCode, type ExtractFromPlanOptions, type ExtractOptions, type ExtractSchema, type ExtractSchemaField, type ExtractSchemaValue, type ExtractionFieldPlan, type ExtractionPlan, type ExtractionRunResult, type FieldSelector, type FileUploadOptions, type GotoOptions, type HoverOptions, type InputOptions, type LaunchOptions, LocalSelectorStorage, type MarkInteractivityOptions, type MatchClause, type MatchOperator, OPENSTEER_HIDDEN_ATTR, OPENSTEER_INTERACTIVE_ATTR, OPENSTEER_SCROLLABLE_ATTR, OS_BOUNDARY_ATTR, OS_IFRAME_BOUNDARY_TAG, OS_NODE_ID_ATTR, OS_SHADOW_BOUNDARY_TAG, OS_UNAVAILABLE_ATTR, Opensteer, OpensteerActionError, type OpensteerAgentAction, OpensteerAgentActionError, OpensteerAgentApiError, OpensteerAgentBusyError, type OpensteerAgentConfig, OpensteerAgentConfigError, OpensteerAgentError, type OpensteerAgentExecuteOptions, OpensteerAgentExecutionError, type OpensteerAgentInstance, type OpensteerAgentMode, type OpensteerAgentModelConfig, type OpensteerAgentProvider, OpensteerAgentProviderError, type OpensteerAgentResult, type OpensteerAgentUsage, type OpensteerAuthScheme, type OpensteerBrowserConfig, type OpensteerCloudAnnouncePolicy, type OpensteerCloudConfig, OpensteerCloudError, type OpensteerCloudOptions, type OpensteerConfig, OpensteerCuaAgentHandler, type OpensteerStorageConfig, type PathNode, type PathNodePosition, type PositionMatchClause, type PreparedSnapshot, type RegistryEntry, type ResolvedElementPath, type ScreenshotOptions, type ScrollOptions, type SelectOptions, type SelectorFile, type SelectorRegistry, type SerializeOptions, type SerializedNodeMeta, type SerializedPageHTML, type SnapshotMode, type SnapshotOptions, type StateResult, type TabInfo, buildArrayFieldPathCandidates, buildElementPathFromHandle, buildElementPathFromSelector, buildPathSelectorHint, cleanForAction, cleanForClickable, cleanForExtraction, cleanForFull, cleanForScrollable, clearCookies, cloneElementPath, closeTab, cloudNotLaunchedError, cloudSessionContractVersion, cloudUnsupportedMethodError, collectLocalSelectorCacheEntries, countArrayItemsWithPath, createCuaClient, createEmptyRegistry, createExtractCallback, createResolveCallback, createTab, exportCookies, extractArrayRowsWithPaths, extractArrayWithPaths, extractWithPaths, getCookies, getElementAttributes, getElementBoundingBox, getElementText, getElementValue, getModelProvider, getPageHtml, getPageTitle, importCookies, listTabs, markInteractiveElements, normalizeNamespace, performClick, performFileUpload, performHover, performInput, performScroll, performSelect, prepareSnapshot, pressKey, queryAllByElementPath, resolveAgentConfig, resolveCounterElement, resolveCountersBatch, resolveElementPath, resolveNamespaceDir, sanitizeElementPath, serializePageHTML, setCookie, switchTab, typeText, waitForVisualStability };
|
package/dist/index.js
CHANGED
|
@@ -24,6 +24,7 @@ import {
|
|
|
24
24
|
OpensteerAgentProviderError,
|
|
25
25
|
OpensteerCloudError,
|
|
26
26
|
OpensteerCuaAgentHandler,
|
|
27
|
+
buildArrayFieldPathCandidates,
|
|
27
28
|
buildElementPathFromHandle,
|
|
28
29
|
buildElementPathFromSelector,
|
|
29
30
|
buildPathSelectorHint,
|
|
@@ -66,6 +67,7 @@ import {
|
|
|
66
67
|
performSelect,
|
|
67
68
|
prepareSnapshot,
|
|
68
69
|
pressKey,
|
|
70
|
+
queryAllByElementPath,
|
|
69
71
|
resolveAgentConfig,
|
|
70
72
|
resolveCounterElement,
|
|
71
73
|
resolveCountersBatch,
|
|
@@ -77,17 +79,17 @@ import {
|
|
|
77
79
|
switchTab,
|
|
78
80
|
typeText,
|
|
79
81
|
waitForVisualStability
|
|
80
|
-
} from "./chunk-
|
|
82
|
+
} from "./chunk-SGZYTGY3.js";
|
|
81
83
|
import {
|
|
82
84
|
createResolveCallback
|
|
83
|
-
} from "./chunk-
|
|
85
|
+
} from "./chunk-DN3GI5CH.js";
|
|
84
86
|
import {
|
|
85
87
|
createExtractCallback
|
|
86
|
-
} from "./chunk-
|
|
88
|
+
} from "./chunk-AVXUMEDG.js";
|
|
87
89
|
import "./chunk-3H5RRIMZ.js";
|
|
88
90
|
import {
|
|
89
91
|
getModelProvider
|
|
90
|
-
} from "./chunk-
|
|
92
|
+
} from "./chunk-FAHE5DB2.js";
|
|
91
93
|
export {
|
|
92
94
|
ActionWsClient,
|
|
93
95
|
CloudCdpClient,
|
|
@@ -114,6 +116,7 @@ export {
|
|
|
114
116
|
OpensteerAgentProviderError,
|
|
115
117
|
OpensteerCloudError,
|
|
116
118
|
OpensteerCuaAgentHandler,
|
|
119
|
+
buildArrayFieldPathCandidates,
|
|
117
120
|
buildElementPathFromHandle,
|
|
118
121
|
buildElementPathFromSelector,
|
|
119
122
|
buildPathSelectorHint,
|
|
@@ -159,6 +162,7 @@ export {
|
|
|
159
162
|
performSelect,
|
|
160
163
|
prepareSnapshot,
|
|
161
164
|
pressKey,
|
|
165
|
+
queryAllByElementPath,
|
|
162
166
|
resolveAgentConfig,
|
|
163
167
|
resolveCounterElement,
|
|
164
168
|
resolveCountersBatch,
|
package/package.json
CHANGED
|
@@ -5,164 +5,157 @@ description: "Browser automation, web scraping, and structured data extraction u
|
|
|
5
5
|
|
|
6
6
|
# Opensteer Browser Automation
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
**Exploring interactively?** Follow Phase 1.
|
|
9
|
+
**Writing a scraper script?** Follow Phase 2.
|
|
9
10
|
|
|
10
|
-
|
|
11
|
+
> Never use raw Playwright methods when an Opensteer equivalent exists. Never call `opensteer.navigate()` — the correct method is `opensteer.goto()`.
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Phase 1 — CLI Exploration
|
|
13
16
|
|
|
14
|
-
|
|
15
|
-
| ----------------------------------------------------------------------------- | -------------------------------------------------------------- |
|
|
16
|
-
| `page.evaluate(() => [...document.querySelectorAll('.item')].map(...))` | `opensteer.extract({ description: "product listing" })` |
|
|
17
|
-
| `page.click('.submit')` | `opensteer.click({ description: "the submit button" })` |
|
|
18
|
-
| `page.fill('#search', 'query')` | `opensteer.input({ description: "search input", text: "q" })` |
|
|
17
|
+
**Step 1 — Set session and open the page.**
|
|
19
18
|
|
|
20
|
-
|
|
19
|
+
```bash
|
|
20
|
+
export OPENSTEER_SESSION=my-session
|
|
21
|
+
opensteer open https://example.com --name "my-scraper"
|
|
22
|
+
```
|
|
21
23
|
|
|
22
|
-
|
|
24
|
+
The `--name` value is the cache namespace. It must match `name:` in the SDK constructor (Phase 2). Pick a stable name now and do not change it.
|
|
23
25
|
|
|
24
|
-
**
|
|
26
|
+
**Step 2 — Snapshot and interact using counters.**
|
|
25
27
|
|
|
26
|
-
|
|
27
|
-
2. **Cache selectors** — Re-run actions with `--description` flags to cache element paths for replay
|
|
28
|
-
3. **Cache extractions** — Run `extract` with `--description` for every page type the scraper will visit
|
|
29
|
-
4. **Generate script** — Use cached descriptions in TypeScript (no counters needed)
|
|
28
|
+
Use `snapshot action` for interactions. Use `snapshot extraction` for data. Each element in the output has a counter (`c="N"`). Use that number directly.
|
|
30
29
|
|
|
31
|
-
|
|
30
|
+
```bash
|
|
31
|
+
opensteer snapshot action
|
|
32
|
+
opensteer click 3
|
|
33
|
+
opensteer input 5 "laptop" --pressEnter
|
|
34
|
+
```
|
|
32
35
|
|
|
33
|
-
|
|
36
|
+
**Step 3 — Navigate and re-snapshot after every page change.**
|
|
34
37
|
|
|
35
38
|
```bash
|
|
36
|
-
|
|
37
|
-
|
|
39
|
+
opensteer navigate https://example.com/results
|
|
40
|
+
opensteer snapshot action
|
|
41
|
+
```
|
|
38
42
|
|
|
39
|
-
|
|
40
|
-
opensteer open https://example.com/products --name "product-scraper"
|
|
43
|
+
> Use `opensteer open` once at the start only. Use `opensteer navigate` for all subsequent pages — it includes a visual stability wait that `open` does not.
|
|
41
44
|
|
|
42
|
-
|
|
43
|
-
opensteer snapshot action # Interactive elements with counters
|
|
44
|
-
opensteer snapshot extraction # Data-oriented HTML with counters
|
|
45
|
+
**Step 4 — Cache every action and extraction with `--description`.**
|
|
45
46
|
|
|
46
|
-
|
|
47
|
-
opensteer click 3
|
|
48
|
-
opensteer input 5 "laptop" --pressEnter
|
|
47
|
+
Re-run each action with `--description` added. This writes the resolved selector to the cache so scripts replay without counters.
|
|
49
48
|
|
|
50
|
-
|
|
49
|
+
```bash
|
|
51
50
|
opensteer click 3 --description "the products link"
|
|
52
51
|
opensteer input 5 "laptop" --pressEnter --description "the search input"
|
|
52
|
+
```
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
For data: take an extraction snapshot, identify counter numbers for each field, then run `extract` with a schema and `--description`. For arrays, include at least 2 items so Opensteer infers the repeating pattern.
|
|
55
|
+
|
|
56
|
+
```bash
|
|
55
57
|
opensteer snapshot extraction
|
|
56
58
|
opensteer extract '{"products":[{"name":{"element":11},"price":{"element":12}},{"name":{"element":25},"price":{"element":26}}]}' \
|
|
57
|
-
--description "product listing
|
|
59
|
+
--description "product listing"
|
|
60
|
+
```
|
|
58
61
|
|
|
59
|
-
|
|
60
|
-
opensteer click 11 --description "first product link"
|
|
61
|
-
opensteer snapshot extraction
|
|
62
|
-
opensteer extract '{"title":{"element":3},"price":{"element":7}}' \
|
|
63
|
-
--description "product detail page"
|
|
62
|
+
Repeat Step 3 → Step 4 for every distinct page type the scraper will visit.
|
|
64
63
|
|
|
65
|
-
|
|
64
|
+
**Step 5 — Close when done.**
|
|
65
|
+
|
|
66
|
+
```bash
|
|
66
67
|
opensteer close
|
|
67
68
|
```
|
|
68
69
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
- Set `--name` on `open` to define cache namespace
|
|
72
|
-
- Specify snapshot mode explicitly: `action` (interactions) or `extraction` (data)
|
|
73
|
-
- `snapshot extraction` shows structure; `extract` produces JSON — never parse snapshot HTML manually
|
|
74
|
-
- Use `--description` to cache selectors for replay (one character difference = cache miss)
|
|
75
|
-
- For arrays, include all items in the schema — Opensteer caches the structural pattern and finds all matches on replay
|
|
76
|
-
- `open` does raw `page.goto()`; use `navigate` for subsequent pages (includes stability wait)
|
|
77
|
-
- Re-snapshot after navigation or significant page changes
|
|
78
|
-
|
|
79
|
-
## Writing Scraper Scripts
|
|
70
|
+
---
|
|
80
71
|
|
|
81
|
-
|
|
72
|
+
## Phase 2 — SDK Scraper Script
|
|
82
73
|
|
|
83
|
-
|
|
74
|
+
Use cached `description` strings (exact match to CLI `--description` values). `name` must match `--name` from Phase 1.
|
|
84
75
|
|
|
85
76
|
```typescript
|
|
86
77
|
import { Opensteer } from "opensteer";
|
|
87
78
|
|
|
88
79
|
async function run() {
|
|
89
80
|
const opensteer = new Opensteer({
|
|
90
|
-
name: "
|
|
81
|
+
name: "my-scraper", // MUST match --name from Phase 1
|
|
91
82
|
storage: { rootDir: process.cwd() },
|
|
92
83
|
});
|
|
93
84
|
|
|
94
|
-
await opensteer.launch({ headless: false });
|
|
85
|
+
await opensteer.launch({ headless: false }); // headless: false — many sites block headless
|
|
95
86
|
|
|
96
87
|
try {
|
|
97
|
-
await opensteer.goto("https://example.com
|
|
88
|
+
await opensteer.goto("https://example.com");
|
|
98
89
|
|
|
99
|
-
await opensteer.input({
|
|
100
|
-
|
|
101
|
-
description: "the search input", // exact match to CLI --description
|
|
102
|
-
});
|
|
90
|
+
await opensteer.input({ description: "the search input", text: "laptop", pressEnter: true });
|
|
91
|
+
await opensteer.click({ description: "the products link" });
|
|
103
92
|
|
|
104
|
-
|
|
105
|
-
const data = await opensteer.extract({
|
|
106
|
-
description: "product listing with name and price",
|
|
107
|
-
});
|
|
93
|
+
await opensteer.waitForText("Showing results"); // only for page transitions / SPA content
|
|
108
94
|
|
|
95
|
+
const data = await opensteer.extract({ description: "product listing" });
|
|
109
96
|
console.log(JSON.stringify(data, null, 2));
|
|
110
97
|
} finally {
|
|
111
98
|
await opensteer.close();
|
|
112
99
|
}
|
|
113
100
|
}
|
|
114
101
|
|
|
115
|
-
run().catch((err) => {
|
|
116
|
-
console.error(err);
|
|
117
|
-
process.exit(1);
|
|
118
|
-
});
|
|
102
|
+
run().catch((err) => { console.error(err); process.exit(1); });
|
|
119
103
|
```
|
|
120
104
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
- No top-level `await` — wrap in `async function run()` + `run().catch(...)`
|
|
124
|
-
- Default to `headless: false` (many sites block headless)
|
|
125
|
-
- Use cached `description` strings for all interactions and extractions
|
|
126
|
-
- Do NOT add wait calls before SDK actions — they handle waiting internally
|
|
127
|
-
- Use `opensteer.waitForText("literal text")` or `page.waitForSelector("css")` only for page transitions or confirming SPA content loaded
|
|
128
|
-
- Run with: `npx tsx scraper.ts`
|
|
129
|
-
|
|
130
|
-
## Browser Connection
|
|
105
|
+
**Critical method signatures:**
|
|
131
106
|
|
|
132
|
-
|
|
133
|
-
|
|
107
|
+
```typescript
|
|
108
|
+
await opensteer.goto(url);
|
|
109
|
+
await opensteer.goto(url, { timeout: 60000 });
|
|
134
110
|
|
|
135
|
-
|
|
111
|
+
await opensteer.click({ description: "..." });
|
|
112
|
+
await opensteer.input({ description: "...", text: "...", pressEnter: true });
|
|
113
|
+
await opensteer.hover({ description: "..." });
|
|
114
|
+
await opensteer.select({ description: "...", label: "Option A" });
|
|
115
|
+
await opensteer.scroll({ direction: "down", amount: 500 });
|
|
136
116
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
3. **CSS selector**: `click --selector "#btn"` — explicit but brittle
|
|
117
|
+
await opensteer.extract({ description: "..." }); // replay from cache
|
|
118
|
+
await opensteer.extract({ schema: { title: { element: 3 } }, description: "..." }); // first cache
|
|
140
119
|
|
|
141
|
-
|
|
120
|
+
await opensteer.waitForText("literal text");
|
|
121
|
+
await opensteer.page.waitForSelector("css-selector"); // SPA content guard
|
|
142
122
|
|
|
143
|
-
|
|
144
|
-
opensteer snapshot action # Interactable elements (default)
|
|
145
|
-
opensteer snapshot extraction # Flattened HTML for data extraction
|
|
146
|
-
opensteer snapshot clickable # Only clickable elements
|
|
147
|
-
opensteer snapshot scrollable # Only scrollable containers
|
|
148
|
-
opensteer snapshot full # Raw HTML — only for debugging
|
|
123
|
+
// Do NOT add waits before opensteer actions — they handle waiting internally.
|
|
149
124
|
```
|
|
150
125
|
|
|
151
|
-
|
|
126
|
+
Run with: `npx tsx scraper.ts`
|
|
152
127
|
|
|
153
|
-
|
|
128
|
+
---
|
|
154
129
|
|
|
155
|
-
|
|
130
|
+
## Edge Cases
|
|
156
131
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
132
|
+
**Connect to an existing browser (CDP):**
|
|
133
|
+
```bash
|
|
134
|
+
opensteer open --connect-url http://localhost:9222 --name "my-scraper"
|
|
135
|
+
# Verify CDP is running: curl -s http://127.0.0.1:9222/json/version
|
|
136
|
+
```
|
|
161
137
|
|
|
162
|
-
**
|
|
138
|
+
**API-based extraction (site has internal REST/GraphQL endpoints):**
|
|
139
|
+
Navigate first to acquire session cookies, then call `fetch()` inside `page.evaluate()`. This is the only valid use of `page.evaluate()`.
|
|
140
|
+
```typescript
|
|
141
|
+
await opensteer.goto("https://example.com");
|
|
142
|
+
const data = await opensteer.page.evaluate(async () => {
|
|
143
|
+
const res = await fetch("https://api.example.com/items?limit=100");
|
|
144
|
+
return res.json();
|
|
145
|
+
});
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
**Tab management:**
|
|
149
|
+
```bash
|
|
150
|
+
opensteer tabs
|
|
151
|
+
opensteer tab-new https://example.com
|
|
152
|
+
opensteer tab-switch 0
|
|
153
|
+
opensteer tab-close 1
|
|
154
|
+
```
|
|
163
155
|
|
|
164
|
-
|
|
156
|
+
**Debugging failures (diagnose in this order):**
|
|
157
|
+
1. SPA content not loaded — add `waitForText` or `page.waitForSelector` before extraction.
|
|
158
|
+
2. Missing cache — re-run Phase 1 caching step for the page type that failed.
|
|
159
|
+
3. Obstacle blocking target — cookie banner, modal, or login wall. Dismiss it first.
|
|
165
160
|
|
|
166
|
-
|
|
167
|
-
- SDK API: [sdk-reference.md](references/sdk-reference.md)
|
|
168
|
-
- Full examples: [examples.md](references/examples.md)
|
|
161
|
+
**Full references:** [cli-reference.md](references/cli-reference.md) | [sdk-reference.md](references/sdk-reference.md)
|