browser-pilot 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1,12 +1,12 @@
1
- export { BatchExecutor, addBatchToPage } from './actions.cjs';
2
- import { R as RequestPattern, a as RequestHandler } from './types-DklIxnbO.cjs';
3
- export { d as ActionOptions, e as ActionResult, A as ActionType, B as BatchOptions, b as BatchResult, Q as ClearCookiesOptions, C as ConsoleHandler, f as ConsoleMessage, g as ConsoleMessageType, z as ContinueRequestOptions, X as Cookie, h as CustomSelectConfig, Y as DeleteCookieOptions, w as DeviceDescriptor, x as DeviceName, D as Dialog, i as DialogHandler, j as DialogType, k as Download, E as ElementInfo, l as ElementNotFoundError, m as EmulationState, n as ErrorHandler, H as FailRequestOptions, F as FileInput, o as FillOptions, J as FulfillRequestOptions, G as GeolocationOptions, I as InteractiveElement, K as InterceptedRequest, N as NavigationError, p as NetworkIdleOptions, P as Page, q as PageError, r as PageSnapshot, L as RequestActions, M as ResourceType, O as RouteOptions, Z as SetCookieOptions, s as SnapshotNode, S as Step, c as StepResult, t as SubmitOptions, T as TimeoutError, u as TypeOptions, U as UserAgentMetadata, v as UserAgentOptions, V as ViewportOptions, W as WaitForOptions, _ as WaitOptions, $ as WaitResult, a0 as WaitState, y as devices, a1 as waitForAnyElement, a2 as waitForElement, a3 as waitForNavigation, a4 as waitForNetworkIdle } from './types-DklIxnbO.cjs';
4
- export { Browser, BrowserOptions, connect } from './browser.cjs';
1
+ export { BatchExecutor, ValidationError, ValidationResult, addBatchToPage, validateSteps } from './actions.cjs';
2
+ import { R as RequestPattern, a as RequestHandler, C as CaptureResult } from './types-CYw-7vx1.cjs';
3
+ export { k as ActionOptions, l as ActionResult, A as ActionType, d as AudioChunk, e as AudioInput, f as AudioInputState, g as AudioOutput, B as BatchOptions, b as BatchResult, h as CaptureOptions, a3 as ClearCookiesOptions, m as ConsoleHandler, n as ConsoleMessage, o as ConsoleMessageType, Y as ContinueRequestOptions, a4 as Cookie, p as CustomSelectConfig, a5 as DeleteCookieOptions, O as DeviceDescriptor, Q as DeviceName, D as Dialog, q as DialogHandler, r as DialogType, s as Download, E as ElementInfo, t as ElementNotFoundError, u as EmulationState, v as ErrorHandler, Z as FailRequestOptions, F as FileInput, w as FillOptions, _ as FulfillRequestOptions, G as GeolocationOptions, I as InteractiveElement, $ as InterceptedRequest, N as NavigationError, x as NetworkIdleOptions, y as Page, z as PageError, H as PageSnapshot, P as PlayOptions, a0 as RequestActions, a1 as ResourceType, i as RoundTripOptions, j as RoundTripResult, a2 as RouteOptions, a6 as SetCookieOptions, J as SnapshotNode, S as Step, c as StepResult, K as SubmitOptions, T as TimeoutError, L as TypeOptions, U as UserAgentMetadata, M as UserAgentOptions, V as ViewportOptions, W as WaitForOptions, a7 as WaitOptions, a8 as WaitResult, a9 as WaitState, X as devices, aa as waitForAnyElement, ab as waitForElement, ac as waitForNavigation, ad as waitForNetworkIdle } from './types-CYw-7vx1.cjs';
5
4
  import { C as CDPClient } from './client-7Nqka5MV.cjs';
6
5
  export { a as CDPClientOptions, c as createCDPClient } from './client-7Nqka5MV.cjs';
6
+ export { Browser, BrowserOptions, connect } from './browser.cjs';
7
7
  export { CDPError } from './cdp.cjs';
8
8
  export { BrowserBaseProvider, BrowserlessProvider, GenericProvider, createProvider, discoverTargets, getBrowserWebSocketUrl } from './providers.cjs';
9
- export { C as ConnectOptions, a as CreateSessionOptions, P as Provider, b as ProviderSession } from './types-D_uDqh0Z.cjs';
9
+ export { b as ConnectOptions, C as CreateSessionOptions, P as Provider, a as ProviderSession } from './types--wXNHUwt.cjs';
10
10
 
11
11
  /**
12
12
  * Request interception implementation
@@ -66,6 +66,142 @@ declare class RequestInterceptor {
66
66
  private failRequest;
67
67
  }
68
68
 
69
+ /**
70
+ * Audio encoding utilities — zero dependencies
71
+ *
72
+ * Handles base64 conversion, PCM/WAV encoding/decoding,
73
+ * RMS calculation, and test signal generation.
74
+ */
75
+
76
+ /**
77
+ * Convert ArrayBuffer or Uint8Array to base64 string.
78
+ */
79
+ declare function bufferToBase64(data: ArrayBuffer | Uint8Array): string;
80
+ /**
81
+ * Calculate RMS (root mean square) of a Float32Array signal.
82
+ * Returns 0 for empty arrays.
83
+ */
84
+ declare function calculateRMS(samples: Float32Array): number;
85
+ /**
86
+ * Generate a WAV file from PCM Float32 data.
87
+ * Encodes as 16-bit PCM WAV.
88
+ */
89
+ declare function pcmToWav(options: {
90
+ left: Float32Array;
91
+ right?: Float32Array;
92
+ sampleRate: number;
93
+ }): ArrayBuffer;
94
+ /**
95
+ * Parse a WAV file header to extract metadata.
96
+ * Throws if the data is not a valid WAV file.
97
+ */
98
+ declare function parseWavHeader(data: ArrayBuffer): {
99
+ sampleRate: number;
100
+ channels: number;
101
+ bitsPerSample: number;
102
+ dataOffset: number;
103
+ dataLength: number;
104
+ };
105
+ /**
106
+ * Generate silence as Float32Array.
107
+ */
108
+ declare function generateSilence(durationMs: number, sampleRate?: number): Float32Array;
109
+ /**
110
+ * Generate a sine wave tone (useful for testing audio pipelines).
111
+ */
112
+ declare function generateTone(frequency: number, durationMs: number, sampleRate?: number, amplitude?: number): Float32Array;
113
+
114
+ /**
115
+ * Chrome launch flags for audio automation.
116
+ *
117
+ * Use these when launching Chrome yourself (not connecting to a remote browser).
118
+ * These provide a complementary path to the JS injection approach — they
119
+ * configure Chrome's built-in fake device support at launch time.
120
+ */
121
+ interface AudioFlagOptions {
122
+ /** Path to WAV file for fake microphone input */
123
+ inputWavPath?: string;
124
+ /** Disable looping of the input file (play once) */
125
+ noLoop?: boolean;
126
+ }
127
+ /**
128
+ * Get Chrome flags needed for audio automation.
129
+ *
130
+ * @example
131
+ * ```typescript
132
+ * import { getAudioChromeFlags } from 'browser-pilot';
133
+ *
134
+ * const flags = getAudioChromeFlags({
135
+ * inputWavPath: '/tmp/prompt.wav',
136
+ * });
137
+ * // Pass to chrome-launcher or similar
138
+ * ```
139
+ */
140
+ declare function getAudioChromeFlags(options?: AudioFlagOptions): string[];
141
+
142
+ /**
143
+ * Audio permission handling via CDP
144
+ *
145
+ * Grants microphone permissions both at the browser level (CDP)
146
+ * and as a JS safety net (for sites that pre-check permissions.query).
147
+ */
148
+
149
+ /**
150
+ * Grant microphone permissions for a page.
151
+ *
152
+ * Uses two layers:
153
+ * 1. CDP Browser.grantPermissions — the primary mechanism
154
+ * 2. JS navigator.permissions.query override — safety net for sites
155
+ * that check permission state before calling getUserMedia
156
+ */
157
+ declare function grantAudioPermissions(cdp: CDPClient, origin?: string): Promise<void>;
158
+
159
+ /**
160
+ * Thin OpenAI Whisper transcription wrapper
161
+ *
162
+ * Zero dependencies — uses only fetch(). Gated on OPENAI_API_KEY env var.
163
+ * Accepts raw PCM/WAV audio and returns transcript text.
164
+ */
165
+
166
+ interface TranscribeOptions {
167
+ /** OpenAI API key. Defaults to OPENAI_API_KEY env var. */
168
+ apiKey?: string;
169
+ /** Whisper model to use. Default: 'whisper-1' */
170
+ model?: string;
171
+ /** Language hint (BCP-47, e.g. 'en'). Optional — Whisper auto-detects. */
172
+ language?: string;
173
+ /** Response format. Default: 'text' */
174
+ responseFormat?: 'text' | 'json' | 'verbose_json' | 'srt' | 'vtt';
175
+ /** Optional prompt to guide the model (e.g. domain terms). */
176
+ prompt?: string;
177
+ }
178
+ interface TranscribeResult {
179
+ /** Transcript text */
180
+ text: string;
181
+ /** Duration of the audio in ms */
182
+ audioDurationMs: number;
183
+ /** Time spent on the API call in ms */
184
+ apiDurationMs: number;
185
+ }
186
+ /**
187
+ * Transcribe a CaptureResult using OpenAI Whisper API.
188
+ *
189
+ * Requires OPENAI_API_KEY env var or apiKey option.
190
+ * Returns the transcript text with timing metadata.
191
+ *
192
+ * @example
193
+ * ```typescript
194
+ * const capture = await page.audioOutput.stop();
195
+ * const result = await transcribe(capture);
196
+ * console.log(result.text);
197
+ * ```
198
+ */
199
+ declare function transcribe(audio: CaptureResult, options?: TranscribeOptions): Promise<TranscribeResult>;
200
+ /**
201
+ * Check if transcription is available (API key is set).
202
+ */
203
+ declare function isTranscriptionAvailable(): boolean;
204
+
69
205
  /**
70
206
  * Execution tracing and logging
71
207
  */
@@ -154,4 +290,4 @@ declare function enableTracing(options?: Partial<Omit<TracerOptions, 'enabled'>>
154
290
  */
155
291
  declare function disableTracing(): void;
156
292
 
157
- export { CDPClient, RequestHandler, RequestInterceptor, RequestPattern, type TraceCategory, type TraceEvent, type TraceLevel, Tracer, type TracerOptions, disableTracing, enableTracing, getTracer };
293
+ export { type AudioFlagOptions, CDPClient, CaptureResult, RequestHandler, RequestInterceptor, RequestPattern, type TraceCategory, type TraceEvent, type TraceLevel, Tracer, type TracerOptions, type TranscribeOptions, type TranscribeResult, bufferToBase64, calculateRMS, disableTracing, enableTracing, generateSilence, generateTone, getAudioChromeFlags, getTracer, grantAudioPermissions, isTranscriptionAvailable, parseWavHeader, pcmToWav, transcribe };
package/dist/index.d.ts CHANGED
@@ -1,12 +1,12 @@
1
- export { BatchExecutor, addBatchToPage } from './actions.js';
2
- import { R as RequestPattern, a as RequestHandler } from './types-Pv8KzZ6l.js';
3
- export { d as ActionOptions, e as ActionResult, A as ActionType, B as BatchOptions, b as BatchResult, Q as ClearCookiesOptions, C as ConsoleHandler, f as ConsoleMessage, g as ConsoleMessageType, z as ContinueRequestOptions, X as Cookie, h as CustomSelectConfig, Y as DeleteCookieOptions, w as DeviceDescriptor, x as DeviceName, D as Dialog, i as DialogHandler, j as DialogType, k as Download, E as ElementInfo, l as ElementNotFoundError, m as EmulationState, n as ErrorHandler, H as FailRequestOptions, F as FileInput, o as FillOptions, J as FulfillRequestOptions, G as GeolocationOptions, I as InteractiveElement, K as InterceptedRequest, N as NavigationError, p as NetworkIdleOptions, P as Page, q as PageError, r as PageSnapshot, L as RequestActions, M as ResourceType, O as RouteOptions, Z as SetCookieOptions, s as SnapshotNode, S as Step, c as StepResult, t as SubmitOptions, T as TimeoutError, u as TypeOptions, U as UserAgentMetadata, v as UserAgentOptions, V as ViewportOptions, W as WaitForOptions, _ as WaitOptions, $ as WaitResult, a0 as WaitState, y as devices, a1 as waitForAnyElement, a2 as waitForElement, a3 as waitForNavigation, a4 as waitForNetworkIdle } from './types-Pv8KzZ6l.js';
4
- export { Browser, BrowserOptions, connect } from './browser.js';
1
+ export { BatchExecutor, ValidationError, ValidationResult, addBatchToPage, validateSteps } from './actions.js';
2
+ import { R as RequestPattern, a as RequestHandler, C as CaptureResult } from './types-DOGsEYQa.js';
3
+ export { k as ActionOptions, l as ActionResult, A as ActionType, d as AudioChunk, e as AudioInput, f as AudioInputState, g as AudioOutput, B as BatchOptions, b as BatchResult, h as CaptureOptions, a3 as ClearCookiesOptions, m as ConsoleHandler, n as ConsoleMessage, o as ConsoleMessageType, Y as ContinueRequestOptions, a4 as Cookie, p as CustomSelectConfig, a5 as DeleteCookieOptions, O as DeviceDescriptor, Q as DeviceName, D as Dialog, q as DialogHandler, r as DialogType, s as Download, E as ElementInfo, t as ElementNotFoundError, u as EmulationState, v as ErrorHandler, Z as FailRequestOptions, F as FileInput, w as FillOptions, _ as FulfillRequestOptions, G as GeolocationOptions, I as InteractiveElement, $ as InterceptedRequest, N as NavigationError, x as NetworkIdleOptions, y as Page, z as PageError, H as PageSnapshot, P as PlayOptions, a0 as RequestActions, a1 as ResourceType, i as RoundTripOptions, j as RoundTripResult, a2 as RouteOptions, a6 as SetCookieOptions, J as SnapshotNode, S as Step, c as StepResult, K as SubmitOptions, T as TimeoutError, L as TypeOptions, U as UserAgentMetadata, M as UserAgentOptions, V as ViewportOptions, W as WaitForOptions, a7 as WaitOptions, a8 as WaitResult, a9 as WaitState, X as devices, aa as waitForAnyElement, ab as waitForElement, ac as waitForNavigation, ad as waitForNetworkIdle } from './types-DOGsEYQa.js';
5
4
  import { C as CDPClient } from './client-7Nqka5MV.js';
6
5
  export { a as CDPClientOptions, c as createCDPClient } from './client-7Nqka5MV.js';
6
+ export { Browser, BrowserOptions, connect } from './browser.js';
7
7
  export { CDPError } from './cdp.js';
8
8
  export { BrowserBaseProvider, BrowserlessProvider, GenericProvider, createProvider, discoverTargets, getBrowserWebSocketUrl } from './providers.js';
9
- export { C as ConnectOptions, a as CreateSessionOptions, P as Provider, b as ProviderSession } from './types-D_uDqh0Z.js';
9
+ export { b as ConnectOptions, C as CreateSessionOptions, P as Provider, a as ProviderSession } from './types--wXNHUwt.js';
10
10
 
11
11
  /**
12
12
  * Request interception implementation
@@ -66,6 +66,142 @@ declare class RequestInterceptor {
66
66
  private failRequest;
67
67
  }
68
68
 
69
+ /**
70
+ * Audio encoding utilities — zero dependencies
71
+ *
72
+ * Handles base64 conversion, PCM/WAV encoding/decoding,
73
+ * RMS calculation, and test signal generation.
74
+ */
75
+
76
+ /**
77
+ * Convert ArrayBuffer or Uint8Array to base64 string.
78
+ */
79
+ declare function bufferToBase64(data: ArrayBuffer | Uint8Array): string;
80
+ /**
81
+ * Calculate RMS (root mean square) of a Float32Array signal.
82
+ * Returns 0 for empty arrays.
83
+ */
84
+ declare function calculateRMS(samples: Float32Array): number;
85
+ /**
86
+ * Generate a WAV file from PCM Float32 data.
87
+ * Encodes as 16-bit PCM WAV.
88
+ */
89
+ declare function pcmToWav(options: {
90
+ left: Float32Array;
91
+ right?: Float32Array;
92
+ sampleRate: number;
93
+ }): ArrayBuffer;
94
+ /**
95
+ * Parse a WAV file header to extract metadata.
96
+ * Throws if the data is not a valid WAV file.
97
+ */
98
+ declare function parseWavHeader(data: ArrayBuffer): {
99
+ sampleRate: number;
100
+ channels: number;
101
+ bitsPerSample: number;
102
+ dataOffset: number;
103
+ dataLength: number;
104
+ };
105
+ /**
106
+ * Generate silence as Float32Array.
107
+ */
108
+ declare function generateSilence(durationMs: number, sampleRate?: number): Float32Array;
109
+ /**
110
+ * Generate a sine wave tone (useful for testing audio pipelines).
111
+ */
112
+ declare function generateTone(frequency: number, durationMs: number, sampleRate?: number, amplitude?: number): Float32Array;
113
+
114
+ /**
115
+ * Chrome launch flags for audio automation.
116
+ *
117
+ * Use these when launching Chrome yourself (not connecting to a remote browser).
118
+ * These provide a complementary path to the JS injection approach — they
119
+ * configure Chrome's built-in fake device support at launch time.
120
+ */
121
+ interface AudioFlagOptions {
122
+ /** Path to WAV file for fake microphone input */
123
+ inputWavPath?: string;
124
+ /** Disable looping of the input file (play once) */
125
+ noLoop?: boolean;
126
+ }
127
+ /**
128
+ * Get Chrome flags needed for audio automation.
129
+ *
130
+ * @example
131
+ * ```typescript
132
+ * import { getAudioChromeFlags } from 'browser-pilot';
133
+ *
134
+ * const flags = getAudioChromeFlags({
135
+ * inputWavPath: '/tmp/prompt.wav',
136
+ * });
137
+ * // Pass to chrome-launcher or similar
138
+ * ```
139
+ */
140
+ declare function getAudioChromeFlags(options?: AudioFlagOptions): string[];
141
+
142
+ /**
143
+ * Audio permission handling via CDP
144
+ *
145
+ * Grants microphone permissions both at the browser level (CDP)
146
+ * and as a JS safety net (for sites that pre-check permissions.query).
147
+ */
148
+
149
+ /**
150
+ * Grant microphone permissions for a page.
151
+ *
152
+ * Uses two layers:
153
+ * 1. CDP Browser.grantPermissions — the primary mechanism
154
+ * 2. JS navigator.permissions.query override — safety net for sites
155
+ * that check permission state before calling getUserMedia
156
+ */
157
+ declare function grantAudioPermissions(cdp: CDPClient, origin?: string): Promise<void>;
158
+
159
+ /**
160
+ * Thin OpenAI Whisper transcription wrapper
161
+ *
162
+ * Zero dependencies — uses only fetch(). Gated on OPENAI_API_KEY env var.
163
+ * Accepts raw PCM/WAV audio and returns transcript text.
164
+ */
165
+
166
+ interface TranscribeOptions {
167
+ /** OpenAI API key. Defaults to OPENAI_API_KEY env var. */
168
+ apiKey?: string;
169
+ /** Whisper model to use. Default: 'whisper-1' */
170
+ model?: string;
171
+ /** Language hint (BCP-47, e.g. 'en'). Optional — Whisper auto-detects. */
172
+ language?: string;
173
+ /** Response format. Default: 'text' */
174
+ responseFormat?: 'text' | 'json' | 'verbose_json' | 'srt' | 'vtt';
175
+ /** Optional prompt to guide the model (e.g. domain terms). */
176
+ prompt?: string;
177
+ }
178
+ interface TranscribeResult {
179
+ /** Transcript text */
180
+ text: string;
181
+ /** Duration of the audio in ms */
182
+ audioDurationMs: number;
183
+ /** Time spent on the API call in ms */
184
+ apiDurationMs: number;
185
+ }
186
+ /**
187
+ * Transcribe a CaptureResult using OpenAI Whisper API.
188
+ *
189
+ * Requires OPENAI_API_KEY env var or apiKey option.
190
+ * Returns the transcript text with timing metadata.
191
+ *
192
+ * @example
193
+ * ```typescript
194
+ * const capture = await page.audioOutput.stop();
195
+ * const result = await transcribe(capture);
196
+ * console.log(result.text);
197
+ * ```
198
+ */
199
+ declare function transcribe(audio: CaptureResult, options?: TranscribeOptions): Promise<TranscribeResult>;
200
+ /**
201
+ * Check if transcription is available (API key is set).
202
+ */
203
+ declare function isTranscriptionAvailable(): boolean;
204
+
69
205
  /**
70
206
  * Execution tracing and logging
71
207
  */
@@ -154,4 +290,4 @@ declare function enableTracing(options?: Partial<Omit<TracerOptions, 'enabled'>>
154
290
  */
155
291
  declare function disableTracing(): void;
156
292
 
157
- export { CDPClient, RequestHandler, RequestInterceptor, RequestPattern, type TraceCategory, type TraceEvent, type TraceLevel, Tracer, type TracerOptions, disableTracing, enableTracing, getTracer };
293
+ export { type AudioFlagOptions, CDPClient, CaptureResult, RequestHandler, RequestInterceptor, RequestPattern, type TraceCategory, type TraceEvent, type TraceLevel, Tracer, type TracerOptions, type TranscribeOptions, type TranscribeResult, bufferToBase64, calculateRMS, disableTracing, enableTracing, generateSilence, generateTone, getAudioChromeFlags, getTracer, grantAudioPermissions, isTranscriptionAvailable, parseWavHeader, pcmToWav, transcribe };