npm - browser-pilot - Versions diffs - 0.0.8 → 0.0.9 - Mend

browser-pilot 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +61 -1
package/dist/actions.cjs +465 -6
package/dist/actions.d.cts +22 -3
package/dist/actions.d.ts +22 -3
package/dist/actions.mjs +5 -3
package/dist/browser.cjs +1350 -14
package/dist/browser.d.cts +3 -3
package/dist/browser.d.ts +3 -3
package/dist/browser.mjs +2 -2
package/dist/{chunk-JN44FHTK.mjs → chunk-7OSR2CAE.mjs} +1429 -14
package/dist/chunk-KKW2SZLV.mjs +741 -0
package/dist/cli.mjs +6150 -103
package/dist/index.cjs +2026 -23
package/dist/index.d.cts +142 -6
package/dist/index.d.ts +142 -6
package/dist/index.mjs +357 -10
package/dist/providers.d.cts +2 -2
package/dist/providers.d.ts +2 -2
package/dist/{types-D_uDqh0Z.d.cts → types--wXNHUwt.d.cts} +1 -1
package/dist/{types-D_uDqh0Z.d.ts → types--wXNHUwt.d.ts} +1 -1
package/dist/{types-DklIxnbO.d.cts → types-CYw-7vx1.d.cts} +244 -1
package/dist/{types-Pv8KzZ6l.d.ts → types-DOGsEYQa.d.ts} +244 -1
package/package.json +3 -3
package/dist/chunk-ZIQA4JOT.mjs +0 -226
package/dist/chunk-ZTQ37YQT.mjs +0 -283
package/dist/cli.cjs +0 -6377
package/dist/cli.d.cts +0 -25
package/dist/cli.d.ts +0 -25

package/dist/index.d.cts CHANGED Viewed

@@ -1,12 +1,12 @@
-export { BatchExecutor, addBatchToPage } from './actions.cjs';
-import { R as RequestPattern, a as RequestHandler } from './types-DklIxnbO.cjs';
-export { d as ActionOptions, e as ActionResult, A as ActionType, B as BatchOptions, b as BatchResult, Q as ClearCookiesOptions, C as ConsoleHandler, f as ConsoleMessage, g as ConsoleMessageType, z as ContinueRequestOptions, X as Cookie, h as CustomSelectConfig, Y as DeleteCookieOptions, w as DeviceDescriptor, x as DeviceName, D as Dialog, i as DialogHandler, j as DialogType, k as Download, E as ElementInfo, l as ElementNotFoundError, m as EmulationState, n as ErrorHandler, H as FailRequestOptions, F as FileInput, o as FillOptions, J as FulfillRequestOptions, G as GeolocationOptions, I as InteractiveElement, K as InterceptedRequest, N as NavigationError, p as NetworkIdleOptions, P as Page, q as PageError, r as PageSnapshot, L as RequestActions, M as ResourceType, O as RouteOptions, Z as SetCookieOptions, s as SnapshotNode, S as Step, c as StepResult, t as SubmitOptions, T as TimeoutError, u as TypeOptions, U as UserAgentMetadata, v as UserAgentOptions, V as ViewportOptions, W as WaitForOptions, _ as WaitOptions, $ as WaitResult, a0 as WaitState, y as devices, a1 as waitForAnyElement, a2 as waitForElement, a3 as waitForNavigation, a4 as waitForNetworkIdle } from './types-DklIxnbO.cjs';
-export { Browser, BrowserOptions, connect } from './browser.cjs';
+export { BatchExecutor, ValidationError, ValidationResult, addBatchToPage, validateSteps } from './actions.cjs';
+import { R as RequestPattern, a as RequestHandler, C as CaptureResult } from './types-CYw-7vx1.cjs';
+export { k as ActionOptions, l as ActionResult, A as ActionType, d as AudioChunk, e as AudioInput, f as AudioInputState, g as AudioOutput, B as BatchOptions, b as BatchResult, h as CaptureOptions, a3 as ClearCookiesOptions, m as ConsoleHandler, n as ConsoleMessage, o as ConsoleMessageType, Y as ContinueRequestOptions, a4 as Cookie, p as CustomSelectConfig, a5 as DeleteCookieOptions, O as DeviceDescriptor, Q as DeviceName, D as Dialog, q as DialogHandler, r as DialogType, s as Download, E as ElementInfo, t as ElementNotFoundError, u as EmulationState, v as ErrorHandler, Z as FailRequestOptions, F as FileInput, w as FillOptions, _ as FulfillRequestOptions, G as GeolocationOptions, I as InteractiveElement, $ as InterceptedRequest, N as NavigationError, x as NetworkIdleOptions, y as Page, z as PageError, H as PageSnapshot, P as PlayOptions, a0 as RequestActions, a1 as ResourceType, i as RoundTripOptions, j as RoundTripResult, a2 as RouteOptions, a6 as SetCookieOptions, J as SnapshotNode, S as Step, c as StepResult, K as SubmitOptions, T as TimeoutError, L as TypeOptions, U as UserAgentMetadata, M as UserAgentOptions, V as ViewportOptions, W as WaitForOptions, a7 as WaitOptions, a8 as WaitResult, a9 as WaitState, X as devices, aa as waitForAnyElement, ab as waitForElement, ac as waitForNavigation, ad as waitForNetworkIdle } from './types-CYw-7vx1.cjs';
 import { C as CDPClient } from './client-7Nqka5MV.cjs';
 export { a as CDPClientOptions, c as createCDPClient } from './client-7Nqka5MV.cjs';
+export { Browser, BrowserOptions, connect } from './browser.cjs';
 export { CDPError } from './cdp.cjs';
 export { BrowserBaseProvider, BrowserlessProvider, GenericProvider, createProvider, discoverTargets, getBrowserWebSocketUrl } from './providers.cjs';
-export { C as ConnectOptions, a as CreateSessionOptions, P as Provider, b as ProviderSession } from './types-D_uDqh0Z.cjs';
+export { b as ConnectOptions, C as CreateSessionOptions, P as Provider, a as ProviderSession } from './types--wXNHUwt.cjs';
 /**
  * Request interception implementation
@@ -66,6 +66,142 @@ declare class RequestInterceptor {
     private failRequest;
 }
+/**
+ * Audio encoding utilities — zero dependencies
+ *
+ * Handles base64 conversion, PCM/WAV encoding/decoding,
+ * RMS calculation, and test signal generation.
+ */
+/**
+ * Convert ArrayBuffer or Uint8Array to base64 string.
+ */
+declare function bufferToBase64(data: ArrayBuffer | Uint8Array): string;
+/**
+ * Calculate RMS (root mean square) of a Float32Array signal.
+ * Returns 0 for empty arrays.
+ */
+declare function calculateRMS(samples: Float32Array): number;
+/**
+ * Generate a WAV file from PCM Float32 data.
+ * Encodes as 16-bit PCM WAV.
+ */
+declare function pcmToWav(options: {
+    left: Float32Array;
+    right?: Float32Array;
+    sampleRate: number;
+}): ArrayBuffer;
+/**
+ * Parse a WAV file header to extract metadata.
+ * Throws if the data is not a valid WAV file.
+ */
+declare function parseWavHeader(data: ArrayBuffer): {
+    sampleRate: number;
+    channels: number;
+    bitsPerSample: number;
+    dataOffset: number;
+    dataLength: number;
+};
+/**
+ * Generate silence as Float32Array.
+ */
+declare function generateSilence(durationMs: number, sampleRate?: number): Float32Array;
+/**
+ * Generate a sine wave tone (useful for testing audio pipelines).
+ */
+declare function generateTone(frequency: number, durationMs: number, sampleRate?: number, amplitude?: number): Float32Array;
+/**
+ * Chrome launch flags for audio automation.
+ *
+ * Use these when launching Chrome yourself (not connecting to a remote browser).
+ * These provide a complementary path to the JS injection approach — they
+ * configure Chrome's built-in fake device support at launch time.
+ */
+interface AudioFlagOptions {
+    /** Path to WAV file for fake microphone input */
+    inputWavPath?: string;
+    /** Disable looping of the input file (play once) */
+    noLoop?: boolean;
+}
+/**
+ * Get Chrome flags needed for audio automation.
+ *
+ * @example
+ * ```typescript
+ * import { getAudioChromeFlags } from 'browser-pilot';
+ *
+ * const flags = getAudioChromeFlags({
+ *   inputWavPath: '/tmp/prompt.wav',
+ * });
+ * // Pass to chrome-launcher or similar
+ * ```
+ */
+declare function getAudioChromeFlags(options?: AudioFlagOptions): string[];
+/**
+ * Audio permission handling via CDP
+ *
+ * Grants microphone permissions both at the browser level (CDP)
+ * and as a JS safety net (for sites that pre-check permissions.query).
+ */
+/**
+ * Grant microphone permissions for a page.
+ *
+ * Uses two layers:
+ * 1. CDP Browser.grantPermissions — the primary mechanism
+ * 2. JS navigator.permissions.query override — safety net for sites
+ *    that check permission state before calling getUserMedia
+ */
+declare function grantAudioPermissions(cdp: CDPClient, origin?: string): Promise<void>;
+/**
+ * Thin OpenAI Whisper transcription wrapper
+ *
+ * Zero dependencies — uses only fetch(). Gated on OPENAI_API_KEY env var.
+ * Accepts raw PCM/WAV audio and returns transcript text.
+ */
+interface TranscribeOptions {
+    /** OpenAI API key. Defaults to OPENAI_API_KEY env var. */
+    apiKey?: string;
+    /** Whisper model to use. Default: 'whisper-1' */
+    model?: string;
+    /** Language hint (BCP-47, e.g. 'en'). Optional — Whisper auto-detects. */
+    language?: string;
+    /** Response format. Default: 'text' */
+    responseFormat?: 'text' | 'json' | 'verbose_json' | 'srt' | 'vtt';
+    /** Optional prompt to guide the model (e.g. domain terms). */
+    prompt?: string;
+}
+interface TranscribeResult {
+    /** Transcript text */
+    text: string;
+    /** Duration of the audio in ms */
+    audioDurationMs: number;
+    /** Time spent on the API call in ms */
+    apiDurationMs: number;
+}
+/**
+ * Transcribe a CaptureResult using OpenAI Whisper API.
+ *
+ * Requires OPENAI_API_KEY env var or apiKey option.
+ * Returns the transcript text with timing metadata.
+ *
+ * @example
+ * ```typescript
+ * const capture = await page.audioOutput.stop();
+ * const result = await transcribe(capture);
+ * console.log(result.text);
+ * ```
+ */
+declare function transcribe(audio: CaptureResult, options?: TranscribeOptions): Promise<TranscribeResult>;
+/**
+ * Check if transcription is available (API key is set).
+ */
+declare function isTranscriptionAvailable(): boolean;
 /**
  * Execution tracing and logging
  */
@@ -154,4 +290,4 @@ declare function enableTracing(options?: Partial<Omit<TracerOptions, 'enabled'>>
  */
 declare function disableTracing(): void;
-export { CDPClient, RequestHandler, RequestInterceptor, RequestPattern, type TraceCategory, type TraceEvent, type TraceLevel, Tracer, type TracerOptions, disableTracing, enableTracing, getTracer };
+export { type AudioFlagOptions, CDPClient, CaptureResult, RequestHandler, RequestInterceptor, RequestPattern, type TraceCategory, type TraceEvent, type TraceLevel, Tracer, type TracerOptions, type TranscribeOptions, type TranscribeResult, bufferToBase64, calculateRMS, disableTracing, enableTracing, generateSilence, generateTone, getAudioChromeFlags, getTracer, grantAudioPermissions, isTranscriptionAvailable, parseWavHeader, pcmToWav, transcribe };

package/dist/index.d.ts CHANGED Viewed

@@ -1,12 +1,12 @@
-export { BatchExecutor, addBatchToPage } from './actions.js';
-import { R as RequestPattern, a as RequestHandler } from './types-Pv8KzZ6l.js';
-export { d as ActionOptions, e as ActionResult, A as ActionType, B as BatchOptions, b as BatchResult, Q as ClearCookiesOptions, C as ConsoleHandler, f as ConsoleMessage, g as ConsoleMessageType, z as ContinueRequestOptions, X as Cookie, h as CustomSelectConfig, Y as DeleteCookieOptions, w as DeviceDescriptor, x as DeviceName, D as Dialog, i as DialogHandler, j as DialogType, k as Download, E as ElementInfo, l as ElementNotFoundError, m as EmulationState, n as ErrorHandler, H as FailRequestOptions, F as FileInput, o as FillOptions, J as FulfillRequestOptions, G as GeolocationOptions, I as InteractiveElement, K as InterceptedRequest, N as NavigationError, p as NetworkIdleOptions, P as Page, q as PageError, r as PageSnapshot, L as RequestActions, M as ResourceType, O as RouteOptions, Z as SetCookieOptions, s as SnapshotNode, S as Step, c as StepResult, t as SubmitOptions, T as TimeoutError, u as TypeOptions, U as UserAgentMetadata, v as UserAgentOptions, V as ViewportOptions, W as WaitForOptions, _ as WaitOptions, $ as WaitResult, a0 as WaitState, y as devices, a1 as waitForAnyElement, a2 as waitForElement, a3 as waitForNavigation, a4 as waitForNetworkIdle } from './types-Pv8KzZ6l.js';
-export { Browser, BrowserOptions, connect } from './browser.js';
+export { BatchExecutor, ValidationError, ValidationResult, addBatchToPage, validateSteps } from './actions.js';
+import { R as RequestPattern, a as RequestHandler, C as CaptureResult } from './types-DOGsEYQa.js';
+export { k as ActionOptions, l as ActionResult, A as ActionType, d as AudioChunk, e as AudioInput, f as AudioInputState, g as AudioOutput, B as BatchOptions, b as BatchResult, h as CaptureOptions, a3 as ClearCookiesOptions, m as ConsoleHandler, n as ConsoleMessage, o as ConsoleMessageType, Y as ContinueRequestOptions, a4 as Cookie, p as CustomSelectConfig, a5 as DeleteCookieOptions, O as DeviceDescriptor, Q as DeviceName, D as Dialog, q as DialogHandler, r as DialogType, s as Download, E as ElementInfo, t as ElementNotFoundError, u as EmulationState, v as ErrorHandler, Z as FailRequestOptions, F as FileInput, w as FillOptions, _ as FulfillRequestOptions, G as GeolocationOptions, I as InteractiveElement, $ as InterceptedRequest, N as NavigationError, x as NetworkIdleOptions, y as Page, z as PageError, H as PageSnapshot, P as PlayOptions, a0 as RequestActions, a1 as ResourceType, i as RoundTripOptions, j as RoundTripResult, a2 as RouteOptions, a6 as SetCookieOptions, J as SnapshotNode, S as Step, c as StepResult, K as SubmitOptions, T as TimeoutError, L as TypeOptions, U as UserAgentMetadata, M as UserAgentOptions, V as ViewportOptions, W as WaitForOptions, a7 as WaitOptions, a8 as WaitResult, a9 as WaitState, X as devices, aa as waitForAnyElement, ab as waitForElement, ac as waitForNavigation, ad as waitForNetworkIdle } from './types-DOGsEYQa.js';
 import { C as CDPClient } from './client-7Nqka5MV.js';
 export { a as CDPClientOptions, c as createCDPClient } from './client-7Nqka5MV.js';
+export { Browser, BrowserOptions, connect } from './browser.js';
 export { CDPError } from './cdp.js';
 export { BrowserBaseProvider, BrowserlessProvider, GenericProvider, createProvider, discoverTargets, getBrowserWebSocketUrl } from './providers.js';
-export { C as ConnectOptions, a as CreateSessionOptions, P as Provider, b as ProviderSession } from './types-D_uDqh0Z.js';
+export { b as ConnectOptions, C as CreateSessionOptions, P as Provider, a as ProviderSession } from './types--wXNHUwt.js';
 /**
  * Request interception implementation
@@ -66,6 +66,142 @@ declare class RequestInterceptor {
     private failRequest;
 }
+/**
+ * Audio encoding utilities — zero dependencies
+ *
+ * Handles base64 conversion, PCM/WAV encoding/decoding,
+ * RMS calculation, and test signal generation.
+ */
+/**
+ * Convert ArrayBuffer or Uint8Array to base64 string.
+ */
+declare function bufferToBase64(data: ArrayBuffer | Uint8Array): string;
+/**
+ * Calculate RMS (root mean square) of a Float32Array signal.
+ * Returns 0 for empty arrays.
+ */
+declare function calculateRMS(samples: Float32Array): number;
+/**
+ * Generate a WAV file from PCM Float32 data.
+ * Encodes as 16-bit PCM WAV.
+ */
+declare function pcmToWav(options: {
+    left: Float32Array;
+    right?: Float32Array;
+    sampleRate: number;
+}): ArrayBuffer;
+/**
+ * Parse a WAV file header to extract metadata.
+ * Throws if the data is not a valid WAV file.
+ */
+declare function parseWavHeader(data: ArrayBuffer): {
+    sampleRate: number;
+    channels: number;
+    bitsPerSample: number;
+    dataOffset: number;
+    dataLength: number;
+};
+/**
+ * Generate silence as Float32Array.
+ */
+declare function generateSilence(durationMs: number, sampleRate?: number): Float32Array;
+/**
+ * Generate a sine wave tone (useful for testing audio pipelines).
+ */
+declare function generateTone(frequency: number, durationMs: number, sampleRate?: number, amplitude?: number): Float32Array;
+/**
+ * Chrome launch flags for audio automation.
+ *
+ * Use these when launching Chrome yourself (not connecting to a remote browser).
+ * These provide a complementary path to the JS injection approach — they
+ * configure Chrome's built-in fake device support at launch time.
+ */
+interface AudioFlagOptions {
+    /** Path to WAV file for fake microphone input */
+    inputWavPath?: string;
+    /** Disable looping of the input file (play once) */
+    noLoop?: boolean;
+}
+/**
+ * Get Chrome flags needed for audio automation.
+ *
+ * @example
+ * ```typescript
+ * import { getAudioChromeFlags } from 'browser-pilot';
+ *
+ * const flags = getAudioChromeFlags({
+ *   inputWavPath: '/tmp/prompt.wav',
+ * });
+ * // Pass to chrome-launcher or similar
+ * ```
+ */
+declare function getAudioChromeFlags(options?: AudioFlagOptions): string[];
+/**
+ * Audio permission handling via CDP
+ *
+ * Grants microphone permissions both at the browser level (CDP)
+ * and as a JS safety net (for sites that pre-check permissions.query).
+ */
+/**
+ * Grant microphone permissions for a page.
+ *
+ * Uses two layers:
+ * 1. CDP Browser.grantPermissions — the primary mechanism
+ * 2. JS navigator.permissions.query override — safety net for sites
+ *    that check permission state before calling getUserMedia
+ */
+declare function grantAudioPermissions(cdp: CDPClient, origin?: string): Promise<void>;
+/**
+ * Thin OpenAI Whisper transcription wrapper
+ *
+ * Zero dependencies — uses only fetch(). Gated on OPENAI_API_KEY env var.
+ * Accepts raw PCM/WAV audio and returns transcript text.
+ */
+interface TranscribeOptions {
+    /** OpenAI API key. Defaults to OPENAI_API_KEY env var. */
+    apiKey?: string;
+    /** Whisper model to use. Default: 'whisper-1' */
+    model?: string;
+    /** Language hint (BCP-47, e.g. 'en'). Optional — Whisper auto-detects. */
+    language?: string;
+    /** Response format. Default: 'text' */
+    responseFormat?: 'text' | 'json' | 'verbose_json' | 'srt' | 'vtt';
+    /** Optional prompt to guide the model (e.g. domain terms). */
+    prompt?: string;
+}
+interface TranscribeResult {
+    /** Transcript text */
+    text: string;
+    /** Duration of the audio in ms */
+    audioDurationMs: number;
+    /** Time spent on the API call in ms */
+    apiDurationMs: number;
+}
+/**
+ * Transcribe a CaptureResult using OpenAI Whisper API.
+ *
+ * Requires OPENAI_API_KEY env var or apiKey option.
+ * Returns the transcript text with timing metadata.
+ *
+ * @example
+ * ```typescript
+ * const capture = await page.audioOutput.stop();
+ * const result = await transcribe(capture);
+ * console.log(result.text);
+ * ```
+ */
+declare function transcribe(audio: CaptureResult, options?: TranscribeOptions): Promise<TranscribeResult>;
+/**
+ * Check if transcription is available (API key is set).
+ */
+declare function isTranscriptionAvailable(): boolean;
 /**
  * Execution tracing and logging
  */
@@ -154,4 +290,4 @@ declare function enableTracing(options?: Partial<Omit<TracerOptions, 'enabled'>>
  */
 declare function disableTracing(): void;
-export { CDPClient, RequestHandler, RequestInterceptor, RequestPattern, type TraceCategory, type TraceEvent, type TraceLevel, Tracer, type TracerOptions, disableTracing, enableTracing, getTracer };
+export { type AudioFlagOptions, CDPClient, CaptureResult, RequestHandler, RequestInterceptor, RequestPattern, type TraceCategory, type TraceEvent, type TraceLevel, Tracer, type TracerOptions, type TranscribeOptions, type TranscribeResult, bufferToBase64, calculateRMS, disableTracing, enableTracing, generateSilence, generateTone, getAudioChromeFlags, getTracer, grantAudioPermissions, isTranscriptionAvailable, parseWavHeader, pcmToWav, transcribe };