@mastra/stagehand 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,590 @@
1
+ import { Stagehand } from '@browserbasehq/stagehand';
2
+ import { ThreadManager, ThreadSession, ThreadManagerConfig, BrowserConfig, MastraBrowser, BrowserToolError, BrowserState, BrowserTabState, ScreencastOptions, ScreencastStream, MouseEventParams, KeyboardEventParams } from '@mastra/core/browser';
3
+ import { Tool } from '@mastra/core/tools';
4
+ import { z } from 'zod';
5
+
6
+ /**
7
+ * Stagehand Tool Schemas
8
+ *
9
+ * AI-powered browser tools using natural language instructions.
10
+ * These are fundamentally different from the deterministic AgentBrowser tools.
11
+ */
12
+
13
+ /**
14
+ * stagehand_act - Perform an action using natural language
15
+ */
16
+ declare const actInputSchema: z.ZodObject<{
17
+ instruction: z.ZodString;
18
+ variables: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
19
+ useVision: z.ZodOptional<z.ZodBoolean>;
20
+ timeout: z.ZodOptional<z.ZodNumber>;
21
+ }, z.core.$strip>;
22
+ type ActInput = z.output<typeof actInputSchema>;
23
+ /**
24
+ * stagehand_extract - Extract structured data from a page
25
+ */
26
+ declare const extractInputSchema: z.ZodObject<{
27
+ instruction: z.ZodString;
28
+ schema: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
29
+ timeout: z.ZodOptional<z.ZodNumber>;
30
+ }, z.core.$strip>;
31
+ type ExtractInput = z.output<typeof extractInputSchema>;
32
+ /**
33
+ * stagehand_observe - Discover actionable elements on a page
34
+ */
35
+ declare const observeInputSchema: z.ZodObject<{
36
+ instruction: z.ZodOptional<z.ZodString>;
37
+ onlyVisible: z.ZodOptional<z.ZodBoolean>;
38
+ timeout: z.ZodOptional<z.ZodNumber>;
39
+ }, z.core.$strip>;
40
+ type ObserveInput = z.output<typeof observeInputSchema>;
41
+ /**
42
+ * stagehand_navigate - Navigate to a URL
43
+ */
44
+ declare const navigateInputSchema: z.ZodObject<{
45
+ url: z.ZodString;
46
+ waitUntil: z.ZodOptional<z.ZodEnum<{
47
+ load: "load";
48
+ domcontentloaded: "domcontentloaded";
49
+ networkidle: "networkidle";
50
+ }>>;
51
+ }, z.core.$strip>;
52
+ type NavigateInput = z.output<typeof navigateInputSchema>;
53
+ /**
54
+ * stagehand_close - Close the browser
55
+ */
56
+ declare const closeInputSchema: z.ZodObject<{}, z.core.$strip>;
57
+ type CloseInput = z.output<typeof closeInputSchema>;
58
+ /**
59
+ * stagehand_tabs - Manage browser tabs
60
+ */
61
+ declare const tabsInputSchema: z.ZodObject<{
62
+ action: z.ZodEnum<{
63
+ list: "list";
64
+ new: "new";
65
+ switch: "switch";
66
+ close: "close";
67
+ }>;
68
+ index: z.ZodOptional<z.ZodNumber>;
69
+ url: z.ZodOptional<z.ZodString>;
70
+ }, z.core.$strip>;
71
+ type TabsInput = z.output<typeof tabsInputSchema>;
72
+ declare const stagehandSchemas: {
73
+ readonly act: z.ZodObject<{
74
+ instruction: z.ZodString;
75
+ variables: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
76
+ useVision: z.ZodOptional<z.ZodBoolean>;
77
+ timeout: z.ZodOptional<z.ZodNumber>;
78
+ }, z.core.$strip>;
79
+ readonly extract: z.ZodObject<{
80
+ instruction: z.ZodString;
81
+ schema: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
82
+ timeout: z.ZodOptional<z.ZodNumber>;
83
+ }, z.core.$strip>;
84
+ readonly observe: z.ZodObject<{
85
+ instruction: z.ZodOptional<z.ZodString>;
86
+ onlyVisible: z.ZodOptional<z.ZodBoolean>;
87
+ timeout: z.ZodOptional<z.ZodNumber>;
88
+ }, z.core.$strip>;
89
+ readonly navigate: z.ZodObject<{
90
+ url: z.ZodString;
91
+ waitUntil: z.ZodOptional<z.ZodEnum<{
92
+ load: "load";
93
+ domcontentloaded: "domcontentloaded";
94
+ networkidle: "networkidle";
95
+ }>>;
96
+ }, z.core.$strip>;
97
+ readonly tabs: z.ZodObject<{
98
+ action: z.ZodEnum<{
99
+ list: "list";
100
+ new: "new";
101
+ switch: "switch";
102
+ close: "close";
103
+ }>;
104
+ index: z.ZodOptional<z.ZodNumber>;
105
+ url: z.ZodOptional<z.ZodString>;
106
+ }, z.core.$strip>;
107
+ readonly close: z.ZodObject<{}, z.core.$strip>;
108
+ };
109
+
110
+ /**
111
+ * StagehandThreadManager - Thread isolation for StagehandBrowser
112
+ *
113
+ * Supports two scope modes:
114
+ * - 'none': All threads share the same Stagehand instance and page
115
+ * - 'browser': Each thread gets its own Stagehand instance (separate browser)
116
+ *
117
+ * @see AgentBrowserThreadManager for the equivalent implementation.
118
+ */
119
+
120
+ type V3 = Stagehand;
121
+ type V3Page$1 = NonNullable<ReturnType<NonNullable<Stagehand['context']>['activePage']>>;
122
+ /**
123
+ * Extended session info for Stagehand threads.
124
+ */
125
+ interface StagehandThreadSession extends ThreadSession {
126
+ /** For 'thread' mode: dedicated Stagehand instance */
127
+ stagehand?: V3;
128
+ }
129
+ /**
130
+ * Configuration for StagehandThreadManager.
131
+ */
132
+ interface StagehandThreadManagerConfig extends ThreadManagerConfig {
133
+ /** Function to create a new Stagehand instance (for 'thread' mode) */
134
+ createStagehand?: () => Promise<V3>;
135
+ /** Callback when a new browser/Stagehand instance is created for a thread */
136
+ onBrowserCreated?: (stagehand: V3, threadId: string) => void;
137
+ }
138
+ /**
139
+ * Thread manager for StagehandBrowser.
140
+ *
141
+ * Supports two scope modes:
142
+ * - 'none': All threads share the shared Stagehand instance
143
+ * - 'browser': Each thread gets a dedicated Stagehand instance
144
+ */
145
+ declare class StagehandThreadManager extends ThreadManager<V3Page$1 | V3> {
146
+ private sharedStagehand;
147
+ protected sessions: Map<string, StagehandThreadSession>;
148
+ private createStagehand?;
149
+ private onBrowserCreated?;
150
+ /** Map of thread ID to dedicated Stagehand instance (for 'thread' mode) */
151
+ private readonly threadStagehands;
152
+ constructor(config: StagehandThreadManagerConfig);
153
+ /**
154
+ * Set the shared Stagehand instance (called after browser launch).
155
+ */
156
+ setStagehand(instance: V3): void;
157
+ /**
158
+ * Clear the shared Stagehand instance (called when browser disconnects).
159
+ */
160
+ clearStagehand(): void;
161
+ /**
162
+ * Set the factory function for creating new Stagehand instances.
163
+ * Required for 'browser' scope mode.
164
+ */
165
+ setCreateStagehand(factory: () => Promise<V3>): void;
166
+ /**
167
+ * Get the shared Stagehand instance.
168
+ */
169
+ getSharedStagehand(): V3;
170
+ /**
171
+ * Get the Stagehand instance for a specific thread.
172
+ * In 'shared' mode, returns the shared instance.
173
+ * In 'thread' mode, returns the thread's dedicated instance.
174
+ */
175
+ getStagehandForThread(threadId: string): V3 | undefined;
176
+ /**
177
+ * Get the Stagehand page for a thread.
178
+ * Returns the active page from the thread's Stagehand instance.
179
+ */
180
+ getPageForThread(threadId: string): V3Page$1 | undefined;
181
+ /**
182
+ * Get the shared manager - returns the active page or the Stagehand instance.
183
+ */
184
+ protected getSharedManager(): V3Page$1 | V3;
185
+ /**
186
+ * Create a new session for a thread.
187
+ */
188
+ protected createSession(threadId: string): Promise<StagehandThreadSession>;
189
+ /**
190
+ * Restore browser state (multiple tabs) to a Stagehand instance.
191
+ */
192
+ private restoreBrowserState;
193
+ /**
194
+ * Switch to an existing session.
195
+ * For 'thread' mode, no switching needed - each thread has its own instance.
196
+ * For 'shared' mode, nothing to switch.
197
+ */
198
+ protected switchToSession(_session: StagehandThreadSession): Promise<void>;
199
+ /**
200
+ * Get the manager for a specific session.
201
+ */
202
+ protected getManagerForSession(session: StagehandThreadSession): V3Page$1 | V3;
203
+ /**
204
+ * Destroy a session and clean up resources.
205
+ */
206
+ protected doDestroySession(session: StagehandThreadSession): Promise<void>;
207
+ /**
208
+ * Clean up all thread sessions.
209
+ */
210
+ destroyAll(): Promise<void>;
211
+ /**
212
+ * Check if any thread Stagehands are still running.
213
+ */
214
+ hasActiveThreadStagehands(): boolean;
215
+ /**
216
+ * Clear all session tracking without closing browsers.
217
+ * Used when browsers have been externally closed and we just need to reset state.
218
+ */
219
+ clearAllSessions(): void;
220
+ /**
221
+ * Clear a specific thread's session without closing the browser.
222
+ * Used when a thread's browser has been externally closed.
223
+ * Preserves the browser state for potential restoration.
224
+ * @param threadId - The thread ID to clear
225
+ */
226
+ clearSession(threadId: string): void;
227
+ }
228
+
229
+ /**
230
+ * Stagehand Browser Types
231
+ */
232
+
233
+ /**
234
+ * Model configuration for Stagehand AI operations
235
+ */
236
+ type ModelConfiguration = string | {
237
+ modelName: string;
238
+ apiKey?: string;
239
+ baseURL?: string;
240
+ };
241
+ /**
242
+ * Configuration for StagehandBrowser
243
+ */
244
+ interface StagehandBrowserConfig extends BrowserConfig {
245
+ /**
246
+ * Environment to run the browser in
247
+ * - 'LOCAL': Run browser locally
248
+ * - 'BROWSERBASE': Use Browserbase cloud
249
+ * @default 'LOCAL'
250
+ */
251
+ env?: 'LOCAL' | 'BROWSERBASE';
252
+ /**
253
+ * Browserbase API key (required when env = 'BROWSERBASE')
254
+ */
255
+ apiKey?: string;
256
+ /**
257
+ * Browserbase project ID (required when env = 'BROWSERBASE')
258
+ */
259
+ projectId?: string;
260
+ /**
261
+ * Model configuration for AI operations
262
+ * @default 'openai/gpt-4o'
263
+ */
264
+ model?: ModelConfiguration;
265
+ /**
266
+ * Enable self-healing selectors.
267
+ * When enabled, Stagehand uses AI to find elements even when selectors fail.
268
+ * @default true
269
+ */
270
+ selfHeal?: boolean;
271
+ /**
272
+ * Timeout for DOM to settle after actions (ms)
273
+ * @default 5000
274
+ */
275
+ domSettleTimeout?: number;
276
+ /**
277
+ * Logging verbosity level
278
+ * - 0: Silent
279
+ * - 1: Errors only
280
+ * - 2: Verbose
281
+ * @default 1
282
+ */
283
+ verbose?: 0 | 1 | 2;
284
+ /**
285
+ * Custom system prompt for AI operations (act, extract, observe)
286
+ */
287
+ systemPrompt?: string;
288
+ }
289
+ /**
290
+ * Action returned from observe()
291
+ */
292
+ interface StagehandAction {
293
+ /** XPath selector to locate element */
294
+ selector: string;
295
+ /** Human-readable description */
296
+ description: string;
297
+ /** Suggested action method */
298
+ method?: string;
299
+ /** Additional action parameters */
300
+ arguments?: string[];
301
+ }
302
+ /**
303
+ * Result from act()
304
+ */
305
+ interface ActResult {
306
+ success: boolean;
307
+ message?: string;
308
+ action?: string;
309
+ url?: string;
310
+ hint?: string;
311
+ }
312
+ /**
313
+ * Result from extract()
314
+ */
315
+ interface ExtractResult<T = unknown> {
316
+ success: boolean;
317
+ data?: T;
318
+ error?: string;
319
+ url?: string;
320
+ hint?: string;
321
+ }
322
+ /**
323
+ * Result from observe()
324
+ */
325
+ interface ObserveResult {
326
+ success: boolean;
327
+ actions: StagehandAction[];
328
+ url?: string;
329
+ hint?: string;
330
+ }
331
+
332
+ /**
333
+ * StagehandBrowser - AI-powered browser automation using Stagehand v3
334
+ *
335
+ * Uses natural language instructions for browser interactions.
336
+ * Fundamentally different from AgentBrowser's deterministic refs approach.
337
+ *
338
+ * Stagehand v3 is CDP-native and provides direct CDP access for screencast/input injection.
339
+ */
340
+
341
+ type V3Page = NonNullable<ReturnType<NonNullable<Stagehand['context']>['activePage']>>;
342
+ /**
343
+ * StagehandBrowser - AI-powered browser using Stagehand v3
344
+ *
345
+ * Unlike AgentBrowser which uses refs ([ref=e1]), StagehandBrowser uses
346
+ * natural language instructions for all interactions.
347
+ *
348
+ * Supports thread isolation via the scope config:
349
+ * - 'none': All threads share the same Stagehand instance
350
+ * - 'browser': Each thread gets its own Stagehand instance (separate browser)
351
+ */
352
+ declare class StagehandBrowser extends MastraBrowser {
353
+ readonly id: string;
354
+ readonly name = "StagehandBrowser";
355
+ readonly provider = "browserbase/stagehand";
356
+ private stagehand;
357
+ private stagehandConfig;
358
+ /** Thread manager - narrowed type from base class */
359
+ protected threadManager: StagehandThreadManager;
360
+ /** Active screencast streams per thread (for reconnection on tab changes) */
361
+ private activeScreencastStreams;
362
+ /** Debounce timers per thread for tab change reconnection */
363
+ private tabChangeDebounceTimers;
364
+ /** Default key for shared scope */
365
+ private static readonly SHARED_STREAM_KEY;
366
+ constructor(config?: StagehandBrowserConfig);
367
+ /**
368
+ * Close a specific thread's browser session.
369
+ * For 'thread' scope, this closes only that thread's Stagehand instance.
370
+ * For 'shared' scope, this is a no-op (use close() to close the shared browser).
371
+ */
372
+ closeThreadSession(threadId: string): Promise<void>;
373
+ /**
374
+ * Ensure browser is ready and thread session exists.
375
+ * For 'thread' scope, this creates a dedicated Stagehand instance for the thread.
376
+ */
377
+ ensureReady(): Promise<void>;
378
+ /**
379
+ * Build Stagehand options from config.
380
+ * Returns the configuration object expected by Stagehand constructor.
381
+ */
382
+ private buildStagehandOptions;
383
+ /**
384
+ * Create a new Stagehand instance with the current config.
385
+ * Used by thread manager for 'browser' isolation mode.
386
+ */
387
+ private createStagehandInstance;
388
+ protected doLaunch(): Promise<void>;
389
+ /**
390
+ * Set up close event listener for a Stagehand instance.
391
+ * Listens to both context and page close events for robust detection.
392
+ */
393
+ private setupCloseListener;
394
+ /**
395
+ * Set up close event listener for a thread's Stagehand instance.
396
+ * Uses CDP Target.targetDestroyed events to detect when all pages are gone.
397
+ */
398
+ private setupCloseListenerForThread;
399
+ /**
400
+ * Handle browser disconnection for a specific thread.
401
+ * Called when a thread's browser is closed externally.
402
+ */
403
+ private handleThreadBrowserDisconnected;
404
+ protected doClose(): Promise<void>;
405
+ /**
406
+ * Check if the browser is still alive by verifying the context and pages exist.
407
+ * Called by base class ensureReady() to detect externally closed browsers.
408
+ */
409
+ protected checkBrowserAlive(): Promise<boolean>;
410
+ /**
411
+ * Handle browser disconnection by clearing internal state.
412
+ * For 'thread' scope, only notifies the specific thread's callbacks.
413
+ * For 'shared' scope, notifies all callbacks.
414
+ */
415
+ handleBrowserDisconnected(): void;
416
+ /**
417
+ * Create an error response from an exception.
418
+ * Extends base class to add Stagehand-specific error handling.
419
+ */
420
+ protected createErrorFromException(error: unknown, context: string): BrowserToolError;
421
+ /**
422
+ * Get the Stagehand instance for a thread, creating it if needed.
423
+ * For 'browser' isolation, this creates a dedicated Stagehand instance.
424
+ * For 'none' isolation, returns the shared instance.
425
+ */
426
+ private getStagehandForThread;
427
+ /**
428
+ * Require a Stagehand instance for the given or current thread.
429
+ * Throws if no instance is available.
430
+ * @param explicitThreadId - Optional thread ID to use instead of getCurrentThread()
431
+ * Use this to avoid race conditions in concurrent tool calls.
432
+ */
433
+ private requireStagehand;
434
+ /**
435
+ * Get the current page from Stagehand v3, respecting thread isolation.
436
+ * @param explicitThreadId - Optional thread ID to use instead of getCurrentThread()
437
+ * Use this to avoid race conditions in concurrent tool calls.
438
+ */
439
+ private getPage;
440
+ /**
441
+ * Get the page for a specific thread, creating session if needed.
442
+ */
443
+ getPageForThread(threadId: string): Promise<V3Page | null>;
444
+ /**
445
+ * Get a CDP session for a specific page.
446
+ */
447
+ private getCdpSessionForPage;
448
+ getTools(): Record<string, Tool<any, any>>;
449
+ /**
450
+ * Perform an action using natural language instruction
451
+ * @param input - Action input
452
+ * @param threadId - Optional thread ID for thread-safe operation
453
+ */
454
+ act(input: ActInput, threadId?: string): Promise<{
455
+ success: true;
456
+ message?: string;
457
+ action?: string;
458
+ url: string;
459
+ hint: string;
460
+ } | BrowserToolError>;
461
+ /**
462
+ * Extract structured data from a page using natural language
463
+ * @param input - Extract input
464
+ * @param threadId - Optional thread ID for thread-safe operation
465
+ */
466
+ extract(input: ExtractInput, threadId?: string): Promise<{
467
+ success: true;
468
+ data: unknown;
469
+ url: string;
470
+ hint: string;
471
+ } | BrowserToolError>;
472
+ /**
473
+ * Discover actionable elements on a page
474
+ * @param input - Observe input
475
+ * @param threadId - Optional thread ID for thread-safe operation
476
+ */
477
+ observe(input: ObserveInput, threadId?: string): Promise<{
478
+ success: true;
479
+ actions: StagehandAction[];
480
+ url: string;
481
+ hint: string;
482
+ } | BrowserToolError>;
483
+ /**
484
+ * Navigate to a URL
485
+ * @param input - Navigate input
486
+ * @param threadId - Optional thread ID for thread-safe operation
487
+ */
488
+ navigate(input: NavigateInput, threadId?: string): Promise<{
489
+ success: true;
490
+ url: string;
491
+ title: string;
492
+ hint: string;
493
+ } | BrowserToolError>;
494
+ /**
495
+ * Manage browser tabs - list, create, switch, close
496
+ * @param input - Tabs input
497
+ * @param threadId - Optional thread ID for thread-safe operation
498
+ */
499
+ tabs(input: TabsInput, threadId?: string): Promise<{
500
+ success: true;
501
+ tabs?: Array<{
502
+ index: number;
503
+ url: string;
504
+ title: string;
505
+ active: boolean;
506
+ }>;
507
+ hint: string;
508
+ } | {
509
+ success: true;
510
+ index?: number;
511
+ url?: string;
512
+ title?: string;
513
+ remaining?: number;
514
+ hint: string;
515
+ } | BrowserToolError>;
516
+ getCurrentUrl(threadId?: string): Promise<string | null>;
517
+ /**
518
+ * Navigate to a URL (simple version). Used internally for restoring state on relaunch.
519
+ */
520
+ navigateTo(url: string): Promise<void>;
521
+ /**
522
+ * Get the current browser state (all tabs and active tab index).
523
+ */
524
+ getBrowserState(threadId?: string): Promise<BrowserState | null>;
525
+ /**
526
+ * Get browser state from a specific Stagehand instance.
527
+ */
528
+ private getBrowserStateFromStagehand;
529
+ /**
530
+ * Get all open tabs with their URLs and titles.
531
+ */
532
+ getTabState(threadId?: string): Promise<BrowserTabState[]>;
533
+ /**
534
+ * Get the active tab index.
535
+ */
536
+ getActiveTabIndex(threadId?: string): Promise<number>;
537
+ /**
538
+ * Update the browser state in the thread session.
539
+ * Called on navigation, tab open/close to keep state fresh.
540
+ */
541
+ private updateSessionBrowserState;
542
+ /**
543
+ * Get the stream key for a thread (or shared key for shared scope).
544
+ */
545
+ private getStreamKey;
546
+ startScreencast(options?: ScreencastOptions): Promise<ScreencastStream>;
547
+ /**
548
+ * Set up listeners to detect tab changes and reconnect the screencast.
549
+ * Uses CDP Target events since Stagehand doesn't expose page lifecycle events.
550
+ */
551
+ private setupTabChangeDetection;
552
+ /**
553
+ * Reconnect the active screencast for a specific thread.
554
+ */
555
+ private reconnectScreencastForThread;
556
+ /**
557
+ * Reconnect the active screencast for the current thread.
558
+ * Wrapper for reconnectScreencastForThread using getCurrentThread().
559
+ */
560
+ private reconnectScreencast;
561
+ injectMouseEvent(event: MouseEventParams, threadId?: string): Promise<void>;
562
+ injectKeyboardEvent(event: KeyboardEventParams, threadId?: string): Promise<void>;
563
+ }
564
+
565
+ /**
566
+ * Stagehand Tool Constants
567
+ */
568
+ declare const STAGEHAND_TOOLS: {
569
+ readonly ACT: "stagehand_act";
570
+ readonly EXTRACT: "stagehand_extract";
571
+ readonly OBSERVE: "stagehand_observe";
572
+ readonly NAVIGATE: "stagehand_navigate";
573
+ readonly TABS: "stagehand_tabs";
574
+ readonly CLOSE: "stagehand_close";
575
+ };
576
+ type StagehandToolName = (typeof STAGEHAND_TOOLS)[keyof typeof STAGEHAND_TOOLS];
577
+
578
+ /**
579
+ * Stagehand Tools
580
+ *
581
+ * Creates AI-powered browser tools bound to a StagehandBrowser instance.
582
+ */
583
+
584
+ /**
585
+ * Creates all Stagehand tools bound to a StagehandBrowser instance.
586
+ * The browser is lazily initialized on first tool use.
587
+ */
588
+ declare function createStagehandTools(browser: StagehandBrowser): Record<string, Tool<any, any>>;
589
+
590
+ export { type ActInput, type ActResult, type CloseInput, type ExtractInput, type ExtractResult, type ModelConfiguration, type NavigateInput, type ObserveInput, type ObserveResult, STAGEHAND_TOOLS, type StagehandAction, StagehandBrowser, type StagehandBrowserConfig, type StagehandToolName, type TabsInput, actInputSchema, closeInputSchema, createStagehandTools, extractInputSchema, navigateInputSchema, observeInputSchema, stagehandSchemas, tabsInputSchema };