@crawlee/stagehand 3.15.4-beta.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,146 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.StagehandController = void 0;
4
+ const tslib_1 = require("tslib");
5
+ const browser_pool_1 = require("@crawlee/browser-pool");
6
+ const log_1 = tslib_1.__importDefault(require("@apify/log"));
7
+ /**
8
+ * StagehandController manages the lifecycle of a Stagehand-controlled browser for Crawlee's BrowserPool.
9
+ *
10
+ * This controller bridges Crawlee's browser management system with Stagehand:
11
+ * - Created by StagehandPlugin when a new browser is needed
12
+ * - Provides page creation via Playwright (connected to Stagehand's browser via CDP)
13
+ * - Exposes the Stagehand instance so crawling context can access AI methods (act/extract/observe)
14
+ * - Handles browser cleanup by delegating to Stagehand's close method
15
+ *
16
+ * Proxy authentication is handled transparently via anonymizeProxy in the plugin layer.
17
+ *
18
+ * @ignore
19
+ */
20
+ class StagehandController extends browser_pool_1.BrowserController {
21
+ constructor(browserPlugin, stagehandInstances) {
22
+ super(browserPlugin);
23
+ Object.defineProperty(this, "stagehand", {
24
+ enumerable: true,
25
+ configurable: true,
26
+ writable: true,
27
+ value: null
28
+ });
29
+ Object.defineProperty(this, "stagehandInstances", {
30
+ enumerable: true,
31
+ configurable: true,
32
+ writable: true,
33
+ value: void 0
34
+ });
35
+ this.stagehandInstances = stagehandInstances;
36
+ }
37
+ /**
38
+ * Gets the Stagehand instance associated with this controller's browser.
39
+ */
40
+ getStagehand() {
41
+ if (!this.stagehand) {
42
+ this.stagehand = this.stagehandInstances.get(this.browser);
43
+ if (!this.stagehand) {
44
+ throw new Error('Stagehand instance not found for browser');
45
+ }
46
+ }
47
+ return this.stagehand;
48
+ }
49
+ /**
50
+ * Creates a new page using the browser's default context.
51
+ * We use Playwright's browser API directly since we connected via CDP.
52
+ */
53
+ async _newPage(_contextOptions) {
54
+ try {
55
+ // Get the default context from the Playwright browser (connected via CDP)
56
+ const contexts = this.browser.contexts();
57
+ if (contexts.length === 0) {
58
+ throw new Error('No browser context available');
59
+ }
60
+ const context = contexts[0];
61
+ const page = await context.newPage();
62
+ // Track active pages
63
+ page.once('close', () => {
64
+ this.activePages--;
65
+ });
66
+ return page;
67
+ }
68
+ catch (error) {
69
+ throw new Error(`Failed to create new page: ${error instanceof Error ? error.message : String(error)}`, {
70
+ cause: error,
71
+ });
72
+ }
73
+ }
74
+ /**
75
+ * Normalizes proxy options for Playwright.
76
+ */
77
+ normalizeProxyOptions(proxyUrl, pageOptions) {
78
+ if (!proxyUrl) {
79
+ return {};
80
+ }
81
+ const url = new URL(proxyUrl);
82
+ const username = decodeURIComponent(url.username);
83
+ const password = decodeURIComponent(url.password);
84
+ return {
85
+ proxy: {
86
+ server: url.origin,
87
+ username,
88
+ password,
89
+ bypass: pageOptions?.proxy?.bypass,
90
+ },
91
+ };
92
+ }
93
+ /**
94
+ * Sets cookies in the browser context.
95
+ * Uses Playwright's browser context API directly.
96
+ */
97
+ async _setCookies(page, cookies) {
98
+ try {
99
+ const context = page.context();
100
+ await context.addCookies(cookies);
101
+ }
102
+ catch {
103
+ // Silently skip if not supported
104
+ }
105
+ }
106
+ /**
107
+ * Gets cookies from the browser context.
108
+ * Uses Playwright's browser context API directly.
109
+ */
110
+ async _getCookies(page) {
111
+ try {
112
+ const context = page.context();
113
+ const cookies = await context.cookies();
114
+ return cookies;
115
+ }
116
+ catch {
117
+ return [];
118
+ }
119
+ }
120
+ /**
121
+ * Closes the browser and cleans up Stagehand resources.
122
+ */
123
+ async _close() {
124
+ const stagehand = this.getStagehand();
125
+ try {
126
+ await stagehand.close();
127
+ }
128
+ catch (error) {
129
+ log_1.default.error('Error closing Stagehand', { error });
130
+ }
131
+ }
132
+ /**
133
+ * Kills the browser process forcefully.
134
+ */
135
+ async _kill() {
136
+ const stagehand = this.getStagehand();
137
+ try {
138
+ await stagehand.close({ force: true });
139
+ }
140
+ catch {
141
+ // Ignore errors during force close
142
+ }
143
+ }
144
+ }
145
+ exports.StagehandController = StagehandController;
146
+ //# sourceMappingURL=stagehand-controller.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stagehand-controller.js","sourceRoot":"","sources":["../../src/internals/stagehand-controller.ts"],"names":[],"mappings":";;;;AACA,wDAA0D;AAI1D,6DAA6B;AAI7B;;;;;;;;;;;;GAYG;AACH,MAAa,mBAAoB,SAAQ,gCAAgE;IAIrG,YAAY,aAA8B,EAAE,kBAAyD;QACjG,KAAK,CAAC,aAAa,CAAC,CAAC;QAJjB;;;;mBAA8B,IAAI;WAAC;QAC1B;;;;;WAA0D;QAIvE,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,YAAY;QACR,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YAClB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAE,CAAC;YAC5D,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;YAChE,CAAC;QACL,CAAC;QACD,OAAO,IAAI,CAAC,SAAS,CAAC;IAC1B,CAAC;IAED;;;OAGG;IACgB,KAAK,CAAC,QAAQ,CAAC,eAAyB;QACvD,IAAI,CAAC;YACD,0EAA0E;YAC1E,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC;YACzC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACxB,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;YACpD,CAAC;YAED,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;YAErC,qBAAqB;YACrB,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE;gBACpB,IAAI,CAAC,WAAW,EAAE,CAAC;YACvB,CAAC,CAAC,CAAC;YAEH,OAAO,IAAI,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,8BAA8B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,EAAE;gBACpG,KAAK,EAAE,KAAK;aACf,CAAC,CAAC;QACP,CAAC;IACL,CAAC;IAED;;OAEG;IACH,qBAAqB,CAAC,QAA4B,EAAE,WAAgB;QAChE,IAAI,CAAC,QAAQ,EAAE,CAAC;YACZ,OAAO,EAAE,CAAC;QACd,CAAC;QAED,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC9B,MAAM,QAAQ,GAAG,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAClD,MAAM,QAAQ,GAAG,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAElD,OAAO;YACH,KAAK,EAAE;gBACH,MAAM,EAAE,GAAG,CAAC,MAAM;gBAClB,QAAQ;gBACR,QAAQ;gBACR,MAAM,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM;aACrC;SACJ,CAAC;IACN,CAAC;IAED;;;OAGG;IACgB,KAAK,CAAC,WAAW,CAAC,IAAU,EAAE,OAAiB;QAC9D,IAAI,CAAC;YACD,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;YAC/B,MAAM,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QACtC,CAAC;QAAC,MAAM,CAAC;YACL,iCAAiC;QACrC,CAAC;IACL,CAAC;IAED;;;OAGG;IACgB,KAAK,CAAC,WAAW,CAAC,IAAU;QAC3C,IAAI,CAAC;YACD,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;YAC/B,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;YACxC,OAAO,OAAmB,CAAC;QAC/B,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,EAAE,CAAC;QACd,CAAC;IACL,CAAC;IAED;;OAEG;IACgB,KAAK,CAAC,MAAM;QAC3B,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAEtC,IAAI,CAAC;YACD,MAAM,SAAS,CAAC,KAAK,EAAE,CAAC;QAC5B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,aAAG,CAAC,KAAK,CAAC,yBAAyB,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QACpD,CAAC;IACL,CAAC;IAED;;OAEG;IACgB,KAAK,CAAC,KAAK;QAC1B,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAEtC,IAAI,CAAC;YACD,MAAM,SAAS,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACL,mCAAmC;QACvC,CAAC;IACL,CAAC;CACJ;AA5HD,kDA4HC"}
@@ -0,0 +1,419 @@
1
+ import type { Action, ActOptions, ActResult, AgentConfig, ExtractOptions, LLMClient, ModelConfiguration, NonStreamingAgentInstance, ObserveOptions, Stagehand, StreamingAgentInstance } from '@browserbasehq/stagehand';
2
+ import type { BrowserCrawlerOptions, BrowserCrawlingContext, BrowserHook, BrowserRequestHandler, GetUserDataFromRequest, LoadedContext, RouterRoutes } from '@crawlee/browser';
3
+ import { BrowserCrawler, Configuration } from '@crawlee/browser';
4
+ import type { Dictionary } from '@crawlee/types';
5
+ import type { LaunchOptions, Page, Response } from 'playwright';
6
+ import type { z } from 'zod';
7
+ import type { StagehandController } from './stagehand-controller';
8
+ import type { StagehandLaunchContext } from './stagehand-launcher';
9
+ import type { StagehandPlugin } from './stagehand-plugin';
10
+ /**
11
+ * Stagehand-specific configuration options.
12
+ */
13
+ export interface StagehandOptions {
14
+ /**
15
+ * Environment to run Stagehand in.
16
+ * - `'LOCAL'`: Use local browser (default)
17
+ * - `'BROWSERBASE'`: Use Browserbase cloud browsers
18
+ * @default 'LOCAL'
19
+ */
20
+ env?: 'LOCAL' | 'BROWSERBASE';
21
+ /**
22
+ * API key - interpreted based on the `env` setting:
23
+ * - When `env: 'LOCAL'`: LLM provider API key (OpenAI, Anthropic, or Google)
24
+ * - When `env: 'BROWSERBASE'`: Browserbase API key
25
+ *
26
+ * For LOCAL env, can also be set via environment variables:
27
+ * - OpenAI: `OPENAI_API_KEY`
28
+ * - Anthropic: `ANTHROPIC_API_KEY`
29
+ * - Google: `GOOGLE_API_KEY`
30
+ *
31
+ * @example
32
+ * ```typescript
33
+ * // Local with OpenAI
34
+ * stagehandOptions: {
35
+ * env: 'LOCAL',
36
+ * model: 'openai/gpt-4.1-mini',
37
+ * apiKey: 'sk-...',
38
+ * }
39
+ *
40
+ * // Browserbase cloud
41
+ * stagehandOptions: {
42
+ * env: 'BROWSERBASE',
43
+ * apiKey: 'bb-...',
44
+ * projectId: 'proj-...',
45
+ * }
46
+ * ```
47
+ */
48
+ apiKey?: string;
49
+ /**
50
+ * Browserbase project ID (required when env is 'BROWSERBASE').
51
+ */
52
+ projectId?: string;
53
+ /**
54
+ * AI model to use for act(), extract(), observe() operations.
55
+ * Can be a string like "openai/gpt-4.1-mini" or a detailed ModelConfiguration object.
56
+ * @default 'openai/gpt-4.1-mini'
57
+ * @example "openai/gpt-4.1-mini"
58
+ * @example "anthropic/claude-sonnet-4-20250514"
59
+ */
60
+ model?: ModelConfiguration;
61
+ /**
62
+ * Logging verbosity level.
63
+ * - 0: Minimal logging
64
+ * - 1: Standard logging
65
+ * - 2: Debug logging
66
+ * @default 0
67
+ */
68
+ verbose?: 0 | 1 | 2;
69
+ /**
70
+ * Enable automatic error recovery for failed AI operations.
71
+ * @default true
72
+ */
73
+ selfHeal?: boolean;
74
+ /**
75
+ * Time to wait for DOM to stabilize before performing AI operations (ms).
76
+ * @default 30000
77
+ */
78
+ domSettleTimeout?: number;
79
+ /**
80
+ * Custom LLM client for AI operations.
81
+ */
82
+ llmClient?: LLMClient;
83
+ /**
84
+ * Custom system prompt for AI operations.
85
+ */
86
+ systemPrompt?: string;
87
+ /**
88
+ * Enable logging of AI inference details to file for debugging.
89
+ * @default false
90
+ */
91
+ logInferenceToFile?: boolean;
92
+ /**
93
+ * Cache directory for observation caching to improve performance.
94
+ */
95
+ cacheDir?: string;
96
+ }
97
+ /**
98
+ * Enhanced Playwright Page with Stagehand AI methods.
99
+ */
100
+ export interface StagehandPage extends Page {
101
+ /**
102
+ * Perform an action on the page using natural language.
103
+ *
104
+ * @param instruction - Natural language instruction for the action
105
+ * @param options - Optional configuration for the action
106
+ * @returns Promise that resolves with the action result
107
+ *
108
+ * @example
109
+ * ```typescript
110
+ * await page.act('Click the login button');
111
+ * await page.act('Fill in email with test@example.com');
112
+ * await page.act('Scroll down to load more items');
113
+ * ```
114
+ */
115
+ act(instruction: string, options?: Omit<ActOptions, 'page'>): Promise<ActResult>;
116
+ /**
117
+ * Extract structured data from the page using natural language and a Zod schema.
118
+ *
119
+ * @param instruction - Natural language description of what to extract
120
+ * @param schema - Zod schema defining the structure of the data
121
+ * @param options - Optional configuration for the extraction
122
+ * @returns Promise that resolves with the extracted data matching the schema
123
+ *
124
+ * @example
125
+ * ```typescript
126
+ * const data = await page.extract(
127
+ * 'Get product title and price',
128
+ * z.object({
129
+ * title: z.string(),
130
+ * price: z.number(),
131
+ * })
132
+ * );
133
+ * ```
134
+ */
135
+ extract<T>(instruction: string, schema: z.ZodSchema<T>, options?: Omit<ExtractOptions, 'page'>): Promise<T>;
136
+ /**
137
+ * Observe the page and get AI-suggested actions.
138
+ *
139
+ * @param options - Optional configuration for the observation
140
+ * @returns Promise that resolves with available actions on the page
141
+ *
142
+ * @example
143
+ * ```typescript
144
+ * const suggestions = await page.observe();
145
+ * console.log('Available actions:', suggestions);
146
+ * ```
147
+ */
148
+ observe(options?: Omit<ObserveOptions, 'page'>): Promise<Action[]>;
149
+ /**
150
+ * Create an autonomous agent for multi-step workflows.
151
+ *
152
+ * @param config - Configuration for the agent
153
+ * @returns Agent instance that can execute complex workflows
154
+ *
155
+ * @example
156
+ * ```typescript
157
+ * const agent = page.agent({ task: 'Find and add cheapest laptop to cart' });
158
+ * await agent.execute();
159
+ * ```
160
+ */
161
+ agent(config: AgentConfig & {
162
+ stream: true;
163
+ }): StreamingAgentInstance;
164
+ agent(config?: AgentConfig & {
165
+ stream?: false;
166
+ }): NonStreamingAgentInstance;
167
+ }
168
+ /**
169
+ * Crawling context for StagehandCrawler with enhanced page object.
170
+ */
171
+ export interface StagehandCrawlingContext<UserData extends Dictionary = Dictionary> extends BrowserCrawlingContext<StagehandCrawler, StagehandPage, Response, StagehandController, UserData> {
172
+ /**
173
+ * Enhanced Playwright page with Stagehand AI methods.
174
+ * Use page.act(), page.extract(), page.observe(), page.agent() for AI-powered operations.
175
+ */
176
+ page: StagehandPage;
177
+ /**
178
+ * Stagehand instance for advanced control.
179
+ * Usually you don't need to access this directly - use the enhanced page methods instead.
180
+ */
181
+ stagehand: Stagehand;
182
+ }
183
+ /**
184
+ * Hook function for StagehandCrawler.
185
+ */
186
+ export interface StagehandHook extends BrowserHook<StagehandCrawlingContext, StagehandGotoOptions> {
187
+ }
188
+ /**
189
+ * Request handler for StagehandCrawler.
190
+ */
191
+ export interface StagehandRequestHandler extends BrowserRequestHandler<LoadedContext<StagehandCrawlingContext>> {
192
+ }
193
+ /**
194
+ * Goto options for StagehandCrawler navigation.
195
+ */
196
+ export type StagehandGotoOptions = Dictionary & Parameters<Page['goto']>[1];
197
+ /**
198
+ * Options for StagehandCrawler.
199
+ */
200
+ export interface StagehandCrawlerOptions extends BrowserCrawlerOptions<StagehandCrawlingContext, {
201
+ browserPlugins: [StagehandPlugin];
202
+ }> {
203
+ /**
204
+ * Stagehand-specific configuration options.
205
+ * These options configure the AI behavior and Browserbase integration.
206
+ */
207
+ stagehandOptions?: StagehandOptions;
208
+ /**
209
+ * Launch context with Stagehand-specific options.
210
+ */
211
+ launchContext?: StagehandLaunchContext;
212
+ /**
213
+ * Function that is called to process each request.
214
+ *
215
+ * The function receives the {@apilink StagehandCrawlingContext} as an argument, where:
216
+ * - `request` is an instance of the {@apilink Request} object with details about the URL to open, HTTP method etc.
217
+ * - `page` is an enhanced Playwright [`Page`](https://playwright.dev/docs/api/class-page) with AI methods
218
+ * - `browserController` is an instance of {@apilink StagehandController}
219
+ * - `response` is the main resource response as returned by `page.goto(request.url)`
220
+ * - `stagehand` is the Stagehand instance for advanced control
221
+ *
222
+ * The page object is enhanced with AI-powered methods:
223
+ * - `page.act(instruction)` - Perform actions using natural language
224
+ * - `page.extract(instruction, schema)` - Extract structured data
225
+ * - `page.observe()` - Get AI-suggested actions
226
+ * - `page.agent(config)` - Create autonomous agents
227
+ *
228
+ * The function must return a promise, which is then awaited by the crawler.
229
+ *
230
+ * If the function throws an exception, the crawler will try to re-crawl the
231
+ * request later, up to `option.maxRequestRetries` times.
232
+ *
233
+ * @example
234
+ * ```typescript
235
+ * async requestHandler({ request, page, log }) {
236
+ * log.info(`Processing ${request.url}`);
237
+ *
238
+ * // Use AI-powered actions
239
+ * await page.act('Click the Products menu');
240
+ *
241
+ * // Extract structured data
242
+ * const products = await page.extract(
243
+ * 'Get all products',
244
+ * z.object({
245
+ * items: z.array(z.object({
246
+ * name: z.string(),
247
+ * price: z.number(),
248
+ * })),
249
+ * })
250
+ * );
251
+ *
252
+ * // Mix with standard Playwright methods
253
+ * await page.screenshot({ path: 'products.png' });
254
+ * }
255
+ * ```
256
+ */
257
+ requestHandler?: StagehandRequestHandler;
258
+ /**
259
+ * Function called when request handling fails after all retries.
260
+ */
261
+ failedRequestHandler?: StagehandRequestHandler;
262
+ /**
263
+ * Async functions that are sequentially evaluated before the navigation.
264
+ */
265
+ preNavigationHooks?: StagehandHook[];
266
+ /**
267
+ * Async functions that are sequentially evaluated after the navigation.
268
+ */
269
+ postNavigationHooks?: StagehandHook[];
270
+ }
271
+ /**
272
+ * StagehandCrawler provides AI-powered web crawling using Browserbase's Stagehand library.
273
+ *
274
+ * It extends {@apilink BrowserCrawler} and adds natural language interaction capabilities:
275
+ * - `page.act()` - Perform actions using natural language
276
+ * - `page.extract()` - Extract structured data with AI
277
+ * - `page.observe()` - Get AI-suggested actions
278
+ * - `page.agent()` - Create autonomous agents for complex workflows
279
+ *
280
+ * The crawler automatically applies anti-blocking features including browser fingerprinting,
281
+ * making it suitable for crawling sites with bot protection like Cloudflare.
282
+ *
283
+ * @example
284
+ * ```typescript
285
+ * import { StagehandCrawler } from '@crawlee/stagehand';
286
+ * import { z } from 'zod';
287
+ *
288
+ * const crawler = new StagehandCrawler({
289
+ * stagehandOptions: {
290
+ * env: 'LOCAL',
291
+ * model: 'openai/gpt-4.1-mini',
292
+ * verbose: 1,
293
+ * },
294
+ * maxConcurrency: 3,
295
+ * async requestHandler({ page, request, log }) {
296
+ * log.info(`Crawling ${request.url}`);
297
+ *
298
+ * // Use AI to interact with the page
299
+ * await page.act('Click the Products link');
300
+ * await page.act('Scroll to load more items');
301
+ *
302
+ * // Extract structured data
303
+ * const products = await page.extract(
304
+ * 'Get all product names and prices',
305
+ * z.object({
306
+ * items: z.array(z.object({
307
+ * name: z.string(),
308
+ * price: z.number(),
309
+ * })),
310
+ * })
311
+ * );
312
+ *
313
+ * log.info(`Found ${products.items.length} products`);
314
+ * },
315
+ * });
316
+ *
317
+ * await crawler.run(['https://example.com']);
318
+ * ```
319
+ */
320
+ export declare class StagehandCrawler extends BrowserCrawler<{
321
+ browserPlugins: [StagehandPlugin];
322
+ }, LaunchOptions, StagehandCrawlingContext> {
323
+ readonly config: Configuration;
324
+ protected static optionsShape: {
325
+ stagehandOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
326
+ browserPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
327
+ handlePageFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
328
+ navigationTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
329
+ preNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
330
+ postNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
331
+ launchContext: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
332
+ headless: import("ow").AnyPredicate<string | boolean>;
333
+ sessionPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
334
+ persistCookiesPerSession: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
335
+ useSessionPool: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
336
+ proxyConfiguration: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
337
+ ignoreShadowRoots: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
338
+ ignoreIframes: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
339
+ requestList: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
340
+ requestQueue: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
341
+ requestHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
342
+ handleRequestFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
343
+ requestHandlerTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
344
+ handleRequestTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
345
+ errorHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
346
+ failedRequestHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
347
+ handleFailedRequestFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
348
+ maxRequestRetries: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
349
+ sameDomainDelaySecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
350
+ maxSessionRotations: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
351
+ maxRequestsPerCrawl: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
352
+ maxCrawlDepth: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
353
+ autoscaledPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
354
+ statusMessageLoggingInterval: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
355
+ statusMessageCallback: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
356
+ retryOnBlocked: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
357
+ respectRobotsTxtFile: import("ow").AnyPredicate<boolean | object>;
358
+ onSkippedRequest: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
359
+ httpClient: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
360
+ minConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
361
+ maxConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
362
+ maxRequestsPerMinute: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
363
+ keepAlive: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
364
+ log: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
365
+ experiments: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
366
+ statisticsOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
367
+ };
368
+ /**
369
+ * Creates a new instance of StagehandCrawler.
370
+ *
371
+ * @param options - Crawler configuration options
372
+ */
373
+ constructor(options?: StagehandCrawlerOptions, config?: Configuration);
374
+ /**
375
+ * Overrides the request handler to enhance the page with Stagehand AI methods.
376
+ *
377
+ * The pattern here is:
378
+ * 1. Store the original userProvidedRequestHandler
379
+ * 2. Replace it with a wrapper that enhances the page first
380
+ * 3. Call super (which creates page/browserController, then calls our wrapper)
381
+ * 4. Our wrapper enhances the page and calls the original handler
382
+ * 5. Restore the original handler
383
+ *
384
+ * This is similar to how PlaywrightCrawler adds utility methods via registerUtilsToContext,
385
+ * but we need to actually transform the page object to add Stagehand AI methods.
386
+ */
387
+ protected _runRequestHandler(crawlingContext: StagehandCrawlingContext): Promise<void>;
388
+ /**
389
+ * Navigation handler for Stagehand crawler.
390
+ * Uses standard Playwright navigation.
391
+ */
392
+ protected _navigationHandler(crawlingContext: StagehandCrawlingContext, gotoOptions: StagehandGotoOptions): Promise<Response | null>;
393
+ }
394
+ /**
395
+ * Creates a new router for StagehandCrawler with type-safe route handlers.
396
+ *
397
+ * @param options - Router options
398
+ * @returns Configured router instance
399
+ *
400
+ * @example
401
+ * ```typescript
402
+ * const router = createStagehandRouter();
403
+ *
404
+ * router.addHandler('product', async ({ page, request, log }) => {
405
+ * log.info(`Processing product: ${request.url}`);
406
+ * const data = await page.extract('Get product info', schema);
407
+ * });
408
+ *
409
+ * router.addDefaultHandler(async ({ page, enqueueLinks }) => {
410
+ * await enqueueLinks({ globs: ['https://example.com/products/*'] });
411
+ * });
412
+ *
413
+ * const crawler = new StagehandCrawler({
414
+ * requestHandler: router,
415
+ * });
416
+ * ```
417
+ */
418
+ export declare function createStagehandRouter<Context extends StagehandCrawlingContext = StagehandCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): import("@crawlee/browser").RouterHandler<Context>;
419
+ //# sourceMappingURL=stagehand-crawler.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stagehand-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/stagehand-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,MAAM,EACN,UAAU,EACV,SAAS,EACT,WAAW,EACX,cAAc,EACd,SAAS,EACT,kBAAkB,EAClB,yBAAyB,EACzB,cAAc,EACd,SAAS,EACT,sBAAsB,EACzB,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EACR,qBAAqB,EACrB,sBAAsB,EACtB,WAAW,EACX,qBAAqB,EACrB,sBAAsB,EACtB,aAAa,EACb,YAAY,EACf,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,cAAc,EAAE,aAAa,EAAU,MAAM,kBAAkB,CAAC;AACzE,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEjD,OAAO,KAAK,EAAE,aAAa,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAChE,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAE7B,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAClE,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,sBAAsB,CAAC;AAEnE,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAG1D;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC7B;;;;;OAKG;IACH,GAAG,CAAC,EAAE,OAAO,GAAG,aAAa,CAAC;IAE9B;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;;;;;OAMG;IACH,KAAK,CAAC,EAAE,kBAAkB,CAAC;IAE3B;;;;;;OAMG;IACH,OAAO,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAEpB;;;OAGG;IACH,QAAQ,CAAC,EAAE,OAAO,CAAC;IAEnB;;;OAGG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAE1B;;OAEG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IAEtB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;OAGG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAE7B;;OAEG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,aAAc,SAAQ,IAAI;IACvC;;;;;;;;;;;;;OAaG;IACH,GAAG,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC;IAEjF;;;;;;;;;;;;;;;;;;OAkBG;IACH,OAAO,CAAC,CAAC,EAAE,WAAW,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,EAAE,IAAI,CAAC,cAAc,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IAE5G;;;;;;;;;;;OAWG;IACH,OAAO,CAAC,OAAO,CAAC,EAAE,IAAI,CAAC,cAAc,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAEnE;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,MAAM,EAAE,WAAW,GAAG;QAAE,MAAM,EAAE,IAAI,CAAA;KAAE,GAAG,sBAAsB,CAAC;IACtE,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG;QAAE,MAAM,CAAC,EAAE,KAAK,CAAA;KAAE,GAAG,yBAAyB,CAAC;CAC/E;AAED;;GAEG;AACH,MAAM,WAAW,wBAAwB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAC9E,SAAQ,sBAAsB,CAAC,gBAAgB,EAAE,aAAa,EAAE,QAAQ,EAAE,mBAAmB,EAAE,QAAQ,CAAC;IACxG;;;OAGG;IACH,IAAI,EAAE,aAAa,CAAC;IAEpB;;;OAGG;IACH,SAAS,EAAE,SAAS,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,aAAc,SAAQ,WAAW,CAAC,wBAAwB,EAAE,oBAAoB,CAAC;CAAG;AAErG;;GAEG;AACH,MAAM,WAAW,uBAAwB,SAAQ,qBAAqB,CAAC,aAAa,CAAC,wBAAwB,CAAC,CAAC;CAAG;AAElH;;GAEG;AACH,MAAM,MAAM,oBAAoB,GAAG,UAAU,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAE5E;;GAEG;AACH,MAAM,WAAW,uBACb,SAAQ,qBAAqB,CAAC,wBAAwB,EAAE;IAAE,cAAc,EAAE,CAAC,eAAe,CAAC,CAAA;CAAE,CAAC;IAC9F;;;OAGG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IAEpC;;OAEG;IACH,aAAa,CAAC,EAAE,sBAAsB,CAAC;IAEvC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA4CG;IACH,cAAc,CAAC,EAAE,uBAAuB,CAAC;IAEzC;;OAEG;IACH,oBAAoB,CAAC,EAAE,uBAAuB,CAAC;IAE/C;;OAEG;IACH,kBAAkB,CAAC,EAAE,aAAa,EAAE,CAAC;IAErC;;OAEG;IACH,mBAAmB,CAAC,EAAE,aAAa,EAAE,CAAC;CACzC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgDG;AACH,qBAAa,gBAAiB,SAAQ,cAAc,CAChD;IAAE,cAAc,EAAE,CAAC,eAAe,CAAC,CAAA;CAAE,EACrC,aAAa,EACb,wBAAwB,CAC3B;aAcyB,MAAM;IAb5B,iBAA0B,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAIpC;IAEF;;;;OAIG;gBAEC,OAAO,GAAE,uBAA4B,EACnB,MAAM,gBAAkC;IAqC9D;;;;;;;;;;;;OAYG;cACsB,kBAAkB,CAAC,eAAe,EAAE,wBAAwB,GAAG,OAAO,CAAC,IAAI,CAAC;IA0BrG;;;OAGG;cACsB,kBAAkB,CACvC,eAAe,EAAE,wBAAwB,EACzC,WAAW,EAAE,oBAAoB,GAClC,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC;CAI9B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,qBAAqB,CACjC,OAAO,SAAS,wBAAwB,GAAG,wBAAwB,EACnE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,qDAEzC"}