npm - @oh-my-pi/pi-web-ui - Versions diffs - 3.15.1 → 3.20.0 - Mend

@oh-my-pi/pi-web-ui 3.15.1 → 3.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +10 -1
package/README.md +2 -1
package/example/src/main.ts +3 -3
package/example/tsconfig.json +1 -1
package/package.json +4 -4
package/src/components/AgentInterface.ts +326 -3
package/src/utils/voice.ts +272 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,15 @@
 ## [Unreleased]
+## [3.20.0] - 2026-01-06
+### Added
+- Added voice input and output support using OpenAI Whisper for transcription and TTS for speech synthesis
+- Added `enableVoice` property to toggle voice features in the agent interface
+- Added Caps Lock key as push-to-talk trigger for voice capture
+- Added automatic voice summarization and playback of assistant responses
+- Added silence detection to automatically stop voice recording after speech ends
 ## [3.15.1] - 2026-01-05
 ## [3.15.0] - 2026-01-05
@@ -148,4 +157,4 @@ declare module "@oh-my-pi/pi-agent-core" {
 		"my-message": MyMessage;
 	}
 }
-```
+```

package/README.md CHANGED Viewed

@@ -209,7 +209,8 @@ agent.abort();
 agent.setModel(newModel);
 agent.setThinkingLevel('medium');
 agent.setTools([...]);
-agent.queueMessage(customMessage);
+agent.steer(customMessage);
+agent.followUp(customMessage);
 ```
 ## Message Types

package/example/src/main.ts CHANGED Viewed

@@ -343,11 +343,11 @@ const renderApp = () => {
 						size: "sm",
 						children: icon(Bell, "sm"),
 						onClick: () => {
-							// Demo: Inject custom message (will appear on next agent run)
+							// Demo: Queue custom message for the next turn
 							if (agent) {
-								agent.queueMessage(
+								agent.followUp(
 									createSystemNotification(
-										"This is a custom message! It appears in the UI but is never sent to the LLM.",
+										"This is a custom message! It appears in the UI and is sent to the LLM.",
 									),
 								);
 							}

package/example/tsconfig.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"compilerOptions": {
 		"target": "ES2022",
-		"module": "ES2022",
+		"module": "ESNext",
 		"lib": ["ES2022", "DOM", "DOM.Iterable"],
 		"moduleResolution": "bundler",
 		"paths": {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@oh-my-pi/pi-web-ui",
-	"version": "3.15.1",
+	"version": "3.20.0",
 	"description": "Reusable web UI components for AI chat interfaces powered by @oh-my-pi/pi-ai",
 	"type": "module",
 	"main": "src/index.ts",
@@ -19,9 +19,9 @@
 	},
 	"dependencies": {
 		"@lmstudio/sdk": "^1.5.0",
-		"@oh-my-pi/pi-agent-core": "3.15.1",
-		"@oh-my-pi/pi-ai": "3.15.1",
-		"@oh-my-pi/pi-tui": "3.15.1",
+		"@oh-my-pi/pi-agent-core": "3.20.0",
+		"@oh-my-pi/pi-ai": "3.20.0",
+		"@oh-my-pi/pi-tui": "3.20.0",
 		"docx-preview": "^0.3.7",
 		"highlight.js": "^11.11.1",
 		"jszip": "^3.10.1",

package/src/components/AgentInterface.ts CHANGED Viewed

@@ -1,6 +1,15 @@
-import { streamSimple, type ToolResultMessage, type Usage } from "@oh-my-pi/pi-ai";
+import {
+	type AssistantMessage,
+	completeSimple,
+	getModels,
+	type Model,
+	streamSimple,
+	type TextContent,
+	type ToolResultMessage,
+	type Usage,
+} from "@oh-my-pi/pi-ai";
 import { html, LitElement } from "lit";
-import { customElement, property, query } from "lit/decorators.js";
+import { customElement, property, query, state } from "lit/decorators.js";
 import { ModelSelector } from "../dialogs/ModelSelector";
 import type { MessageEditor } from "./MessageEditor";
 import "./MessageEditor.js";
@@ -13,6 +22,7 @@ import type { Attachment } from "../utils/attachment-utils";
 import { formatUsage } from "../utils/format";
 import { i18n } from "../utils/i18n";
 import { createStreamFn } from "../utils/proxy-utils";
+import { synthesizeOpenAI, transcribeOpenAI, VoiceCapture, type VoiceCaptureResult } from "../utils/voice";
 import type { UserMessageWithAttachments } from "./Messages";
 import type { StreamingMessageContainer } from "./StreamingMessageContainer";
@@ -24,6 +34,7 @@ export class AgentInterface extends LitElement {
 	@property({ type: Boolean }) enableModelSelector = true;
 	@property({ type: Boolean }) enableThinkingSelector = true;
 	@property({ type: Boolean }) showThemeToggle = false;
+	@property({ type: Boolean }) enableVoice = true;
 	// Optional custom API key prompt handler - if not provided, uses default dialog
 	@property({ attribute: false }) onApiKeyRequired?: (provider: string) => Promise<boolean>;
 	// Optional callback called before sending a message
@@ -37,12 +48,21 @@ export class AgentInterface extends LitElement {
 	@query("message-editor") private _messageEditor!: MessageEditor;
 	@query("streaming-message-container") private _streamingContainer!: StreamingMessageContainer;
+	@state() private voiceStatus: "idle" | "listening" | "transcribing" | "speaking" | "error" = "idle";
 	private _autoScroll = true;
 	private _lastScrollTop = 0;
 	private _lastClientHeight = 0;
 	private _scrollContainer?: HTMLElement;
 	private _resizeObserver?: ResizeObserver;
 	private _unsubscribeSession?: () => void;
+	private voiceCapture?: VoiceCapture;
+	private voiceAudio?: HTMLAudioElement;
+	private voiceSummaryAbort?: AbortController;
+	private voiceLastSpokenTimestamp = 0;
+	private voiceKeyListener = (event: KeyboardEvent) => {
+		void this.handleVoiceKey(event);
+	};
 	public setInput(text: string, attachments?: Attachment[]) {
 		const update = () => {
@@ -104,6 +124,8 @@ export class AgentInterface extends LitElement {
 		// Subscribe to external session if provided
 		this.setupSessionSubscription();
+		window.addEventListener("keydown", this.voiceKeyListener);
 	}
 	override disconnectedCallback() {
@@ -123,6 +145,11 @@ export class AgentInterface extends LitElement {
 			this._unsubscribeSession();
 			this._unsubscribeSession = undefined;
 		}
+		window.removeEventListener("keydown", this.voiceKeyListener);
+		void this.stopVoiceCapture("manual");
+		this.stopVoicePlayback();
+		this.voiceSummaryAbort?.abort();
 	}
 	private setupSessionSubscription() {
@@ -164,6 +191,7 @@ export class AgentInterface extends LitElement {
 						this._streamingContainer.setMessage(null, true);
 					}
 					this.requestUpdate();
+					void this.handleAgentEnd();
 					break;
 				case "message_update":
 					if (this._streamingContainer) {
@@ -203,6 +231,287 @@ export class AgentInterface extends LitElement {
 		this._lastClientHeight = clientHeight;
 	};
+	private async handleVoiceKey(event: KeyboardEvent): Promise<void> {
+		if (!this.enableVoice) return;
+		if (event.code !== "CapsLock" && event.key !== "CapsLock") return;
+		if (event.repeat) return;
+		event.preventDefault();
+		if (this.voiceStatus === "transcribing" || this.voiceStatus === "speaking") {
+			return;
+		}
+		if (this.voiceCapture?.isRecording) {
+			await this.stopVoiceCapture("manual");
+		} else {
+			await this.startVoiceCapture();
+		}
+	}
+	private async startVoiceCapture(): Promise<void> {
+		if (!this.enableVoice || this.voiceCapture?.isRecording) return;
+		const apiKey = await this.resolveApiKey("openai");
+		if (!apiKey) {
+			this.voiceStatus = "error";
+			console.error("Voice capture requires an OpenAI API key.");
+			return;
+		}
+		this.voiceStatus = "listening";
+		this.voiceCapture = new VoiceCapture();
+		const resultPromise = this.voiceCapture.start();
+		void resultPromise
+			.then((result) => this.handleVoiceCaptureResult(result))
+			.catch((error) => {
+				this.voiceStatus = "error";
+				this.voiceCapture = undefined;
+				console.error("Failed to start voice capture:", error);
+			});
+	}
+	private async stopVoiceCapture(reason: "manual" | "silence" | "max" | "error"): Promise<void> {
+		if (!this.voiceCapture) return;
+		await this.voiceCapture.stop(reason);
+	}
+	private async handleVoiceCaptureResult(result: VoiceCaptureResult): Promise<void> {
+		if (this.voiceStatus === "transcribing" || this.voiceStatus === "speaking") return;
+		if (!result.blob) {
+			this.voiceStatus = "idle";
+			this.voiceCapture = undefined;
+			return;
+		}
+		this.voiceStatus = "transcribing";
+		try {
+			const transcript = await this.transcribeAudio(result.blob);
+			if (transcript.trim()) {
+				await this.sendVoiceMessage(transcript);
+			}
+			this.voiceStatus = "idle";
+			this.voiceCapture = undefined;
+		} catch (error) {
+			this.voiceStatus = "error";
+			this.voiceCapture = undefined;
+			console.error("Voice transcription failed:", error);
+		}
+	}
+	private async transcribeAudio(blob: Blob): Promise<string> {
+		const apiKey = await this.resolveApiKey("openai");
+		if (!apiKey) {
+			throw new Error("OpenAI API key required for transcription.");
+		}
+		return transcribeOpenAI(blob, {
+			apiKey,
+			model: "whisper-1",
+			prompt: "Short voice command or question.",
+		});
+	}
+	private async sendVoiceMessage(input: string): Promise<void> {
+		const session = this.session;
+		if (!session) return;
+		// Design choice: keep the agent's full response for the UI, and generate short voice summaries separately.
+		const text = input.trim();
+		if (!text) return;
+		if (!(await this.ensureApiKeyForProvider(session.state.model?.provider))) {
+			return;
+		}
+		if (session.state.isStreaming) {
+			if (this.onBeforeSend) {
+				await this.onBeforeSend();
+			}
+			session.steer({ role: "user", content: text, timestamp: Date.now() });
+			return;
+		}
+		await this.sendMessage(text);
+	}
+	private async handleAgentEnd(): Promise<void> {
+		if (!this.enableVoice) return;
+		const session = this.session;
+		if (!session) return;
+		const lastAssistant = this.getLastAssistantMessage(session.state.messages);
+		if (!lastAssistant) return;
+		if (lastAssistant.timestamp <= this.voiceLastSpokenTimestamp) return;
+		const assistantText = this.extractAssistantText(lastAssistant);
+		if (!assistantText.trim()) return;
+		this.voiceLastSpokenTimestamp = lastAssistant.timestamp;
+		try {
+			const summary = await this.summarizeForVoice(assistantText);
+			if (summary.trim()) {
+				await this.speak(summary);
+			}
+		} catch (error) {
+			this.voiceStatus = "error";
+			console.error("Voice summary failed:", error);
+		}
+	}
+	private async summarizeForVoice(text: string): Promise<string> {
+		const model = this.resolveFastModel() || this.session?.state.model;
+		if (!model) {
+			return this.fallbackSummary(text);
+		}
+		const apiKey = await this.resolveApiKey(model.provider);
+		if (!apiKey) {
+			return this.fallbackSummary(text);
+		}
+		this.voiceSummaryAbort?.abort();
+		this.voiceSummaryAbort = new AbortController();
+		const prompt = [
+			"Summarize the assistant response for voice playback.",
+			"Keep it to 1-3 short sentences, conversational tone.",
+			"Preserve any question the assistant asked.",
+			'Keep uncertainty if present (e.g. "hmm... maybe...").',
+			"Do not use bullet points.",
+			"",
+			"Assistant response:",
+			text,
+		].join("\n");
+		const result = await completeSimple(
+			model,
+			{
+				messages: [
+					{
+						role: "user",
+						content: prompt,
+						timestamp: Date.now(),
+					},
+				],
+			},
+			{
+				apiKey,
+				maxTokens: 200,
+				temperature: 0.2,
+				signal: this.voiceSummaryAbort.signal,
+			},
+		);
+		const summary = this.extractAssistantText(result);
+		return summary.trim() || this.fallbackSummary(text);
+	}
+	private async speak(text: string): Promise<void> {
+		const apiKey = await this.resolveApiKey("openai");
+		if (!apiKey) {
+			console.error("OpenAI API key required for speech synthesis.");
+			return;
+		}
+		this.stopVoicePlayback();
+		this.voiceStatus = "speaking";
+		const audioBlob = await synthesizeOpenAI(text, {
+			apiKey,
+			model: "tts-1",
+			voice: "alloy",
+			responseFormat: "mp3",
+		});
+		const audioUrl = URL.createObjectURL(audioBlob);
+		const audio = new Audio(audioUrl);
+		this.voiceAudio = audio;
+		try {
+			await audio.play();
+			await new Promise<void>((resolve, reject) => {
+				audio.onended = () => resolve();
+				audio.onerror = () => reject(new Error("Audio playback failed"));
+			});
+		} finally {
+			URL.revokeObjectURL(audioUrl);
+			if (this.voiceAudio === audio) {
+				this.voiceAudio = undefined;
+			}
+			this.voiceStatus = "idle";
+		}
+	}
+	private stopVoicePlayback(): void {
+		if (this.voiceAudio) {
+			this.voiceAudio.pause();
+			this.voiceAudio.currentTime = 0;
+			this.voiceAudio = undefined;
+		}
+	}
+	private resolveFastModel(): Model<any> | undefined {
+		const openAiModels = getModels("openai");
+		const preferred = ["gpt-5-mini", "gpt-4o-mini", "gpt-4.1-mini"];
+		for (const id of preferred) {
+			const found = openAiModels.find((m) => m.id === id);
+			if (found) return found;
+		}
+		return openAiModels.find((m) => m.id.includes("mini")) || undefined;
+	}
+	private async resolveApiKey(provider: string): Promise<string | undefined> {
+		if (this.session?.getApiKey) {
+			const key = await this.session.getApiKey(provider);
+			if (key) return key;
+		}
+		const stored = await getAppStorage().providerKeys.get(provider);
+		return stored ?? undefined;
+	}
+	private async ensureApiKeyForProvider(provider?: string): Promise<boolean> {
+		if (!provider) return false;
+		const apiKey = await getAppStorage().providerKeys.get(provider);
+		if (apiKey) return true;
+		if (!this.onApiKeyRequired) {
+			console.error("No API key configured and no onApiKeyRequired handler set");
+			return false;
+		}
+		const success = await this.onApiKeyRequired(provider);
+		return success;
+	}
+	private getLastAssistantMessage(messages: readonly unknown[]): AssistantMessage | undefined {
+		for (let i = messages.length - 1; i >= 0; i -= 1) {
+			const message = messages[i];
+			if (this.isAssistantMessage(message)) {
+				return message;
+			}
+		}
+		return undefined;
+	}
+	private isAssistantMessage(message: unknown): message is AssistantMessage {
+		if (!message || typeof message !== "object") return false;
+		return (message as AssistantMessage).role === "assistant";
+	}
+	private extractAssistantText(message: AssistantMessage): string {
+		return message.content
+			.filter((part): part is TextContent => part.type === "text")
+			.map((part) => part.text)
+			.join("\n");
+	}
+	private fallbackSummary(text: string): string {
+		const cleaned = text.replace(/\s+/g, " ").trim();
+		const matches = cleaned.match(/[^.!?]+[.!?]+/g);
+		if (matches && matches.length > 0) {
+			return matches.slice(0, 2).join(" ").trim();
+		}
+		return cleaned.slice(0, 240);
+	}
 	public async sendMessage(input: string, attachments?: Attachment[]) {
 		if ((!input.trim() && attachments?.length === 0) || this.session?.state.isStreaming) return;
 		const session = this.session;
@@ -320,8 +629,9 @@ export class AgentInterface extends LitElement {
 		return html`
 			<div class="text-xs text-muted-foreground flex justify-between items-center h-5">
-				<div class="flex items-center gap-1">
+				<div class="flex items-center gap-2">
 					${this.showThemeToggle ? html`<theme-toggle></theme-toggle>` : html``}
+					${this.renderVoiceStatus()}
 				</div>
 				<div class="flex ml-auto items-center gap-3">
 					${
@@ -338,6 +648,19 @@ export class AgentInterface extends LitElement {
 		`;
 	}
+	private renderVoiceStatus() {
+		if (!this.enableVoice || this.voiceStatus === "idle") return html``;
+		const statusText =
+			this.voiceStatus === "listening"
+				? "Listening... (Caps Lock to stop)"
+				: this.voiceStatus === "transcribing"
+					? "Transcribing..."
+					: this.voiceStatus === "speaking"
+						? "Speaking..."
+						: "Voice error";
+		return html`<span>${statusText}</span>`;
+	}
 	override render() {
 		if (!this.session)
 			return html`<div class="p-4 text-center text-muted-foreground">${i18n("No session set")}</div>`;

package/src/utils/voice.ts ADDED Viewed

@@ -0,0 +1,272 @@
+export type VoiceStopReason = "manual" | "silence" | "max" | "error";
+export interface VoiceCaptureOptions {
+	silenceMs: number;
+	maxDurationMs: number;
+	minSpeechMs: number;
+	levelThreshold: number;
+	chunkMs: number;
+	preferredMimeType?: string;
+}
+export interface VoiceCaptureResult {
+	blob: Blob | null;
+	reason: VoiceStopReason;
+	durationMs: number;
+	mimeType: string | null;
+}
+const DEFAULT_CAPTURE_OPTIONS: VoiceCaptureOptions = {
+	silenceMs: 1200,
+	maxDurationMs: 20000,
+	minSpeechMs: 500,
+	levelThreshold: 0.02,
+	chunkMs: 250,
+	preferredMimeType: "audio/webm;codecs=opus",
+};
+export class VoiceCapture {
+	private options: VoiceCaptureOptions;
+	private stream?: MediaStream;
+	private recorder?: MediaRecorder;
+	private audioContext?: AudioContext;
+	private analyser?: AnalyserNode;
+	private data?: Float32Array<ArrayBuffer>;
+	private chunks: Blob[] = [];
+	private startedAt = 0;
+	private speechDetectedAt?: number;
+	private silenceStartedAt?: number;
+	private stopResolver?: (result: VoiceCaptureResult) => void;
+	private stopPromise?: Promise<VoiceCaptureResult>;
+	private stopReason: VoiceStopReason = "manual";
+	private monitorTimer?: number;
+	private isStopping = false;
+	private _isRecording = false;
+	public get isRecording(): boolean {
+		return this._isRecording;
+	}
+	constructor(options?: Partial<VoiceCaptureOptions>) {
+		this.options = { ...DEFAULT_CAPTURE_OPTIONS, ...options };
+	}
+	async start(): Promise<VoiceCaptureResult> {
+		if (this._isRecording && this.stopPromise) return this.stopPromise;
+		if (!navigator.mediaDevices?.getUserMedia) {
+			throw new Error("Media devices not available.");
+		}
+		if (typeof MediaRecorder === "undefined") {
+			throw new Error("MediaRecorder not supported.");
+		}
+		this.stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+		const mimeType = pickSupportedMimeType(this.options.preferredMimeType);
+		this.recorder = mimeType ? new MediaRecorder(this.stream, { mimeType }) : new MediaRecorder(this.stream);
+		this.chunks = [];
+		this.startedAt = performance.now();
+		this.speechDetectedAt = undefined;
+		this.silenceStartedAt = undefined;
+		this.stopReason = "manual";
+		this.isStopping = false;
+		this._isRecording = true;
+		this.stopPromise = new Promise((resolve) => {
+			this.stopResolver = resolve;
+		});
+		this.recorder.ondataavailable = (event: BlobEvent) => {
+			if (event.data && event.data.size > 0) {
+				this.chunks.push(event.data);
+			}
+		};
+		this.recorder.onstop = () => {
+			this.finish();
+		};
+		this.recorder.start(this.options.chunkMs);
+		this.setupAnalyzer();
+		this.startMonitor();
+		return this.stopPromise;
+	}
+	async stop(reason: VoiceStopReason = "manual"): Promise<VoiceCaptureResult> {
+		if (!this.recorder || !this._isRecording) {
+			return { blob: null, reason, durationMs: 0, mimeType: null };
+		}
+		if (this.isStopping && this.stopPromise) {
+			return this.stopPromise;
+		}
+		this.isStopping = true;
+		this.stopReason = reason;
+		this.recorder.stop();
+		return this.stopPromise!;
+	}
+	private finish(): void {
+		this._isRecording = false;
+		const durationMs = Math.max(0, performance.now() - this.startedAt);
+		const mimeType = this.recorder?.mimeType || null;
+		const hasSpeech = this.speechDetectedAt !== undefined;
+		const blob =
+			hasSpeech && this.chunks.length > 0 ? new Blob(this.chunks, { type: mimeType || "audio/webm" }) : null;
+		const result: VoiceCaptureResult = {
+			blob,
+			reason: this.stopReason,
+			durationMs,
+			mimeType,
+		};
+		this.cleanup();
+		this.stopResolver?.(result);
+	}
+	private startMonitor(): void {
+		this.monitorTimer = window.setInterval(() => {
+			if (!this.analyser || !this.data || this.isStopping) return;
+			this.analyser.getFloatTimeDomainData(this.data);
+			const rms = getRms(this.data);
+			const now = performance.now();
+			if (rms >= this.options.levelThreshold) {
+				if (!this.speechDetectedAt) {
+					this.speechDetectedAt = now;
+				}
+				this.silenceStartedAt = undefined;
+			} else if (this.speechDetectedAt) {
+				if (!this.silenceStartedAt) {
+					this.silenceStartedAt = now;
+				} else if (
+					now - this.silenceStartedAt >= this.options.silenceMs &&
+					now - this.speechDetectedAt >= this.options.minSpeechMs
+				) {
+					void this.stop("silence");
+					return;
+				}
+			}
+			if (now - this.startedAt >= this.options.maxDurationMs) {
+				void this.stop("max");
+			}
+		}, 100);
+	}
+	private setupAnalyzer(): void {
+		if (!this.stream) return;
+		this.audioContext = new AudioContext();
+		this.analyser = this.audioContext.createAnalyser();
+		this.analyser.fftSize = 2048;
+		this.data = new Float32Array(this.analyser.fftSize);
+		const source = this.audioContext.createMediaStreamSource(this.stream);
+		source.connect(this.analyser);
+	}
+	private cleanup(): void {
+		if (this.monitorTimer !== undefined) {
+			clearInterval(this.monitorTimer);
+			this.monitorTimer = undefined;
+		}
+		if (this.stream) {
+			for (const track of this.stream.getTracks()) {
+				track.stop();
+			}
+			this.stream = undefined;
+		}
+		if (this.audioContext) {
+			void this.audioContext.close();
+			this.audioContext = undefined;
+		}
+		this.analyser = undefined;
+		this.data = undefined;
+		this.recorder = undefined;
+		this.chunks = [];
+		this.isStopping = false;
+	}
+}
+export interface TranscribeOptions {
+	apiKey: string;
+	model?: string;
+	language?: string;
+	prompt?: string;
+}
+export async function transcribeOpenAI(blob: Blob, options: TranscribeOptions): Promise<string> {
+	const formData = new FormData();
+	const filename = blob.type.includes("ogg") ? "voice.ogg" : "voice.webm";
+	const file = new File([blob], filename, { type: blob.type || "audio/webm" });
+	formData.append("file", file);
+	formData.append("model", options.model || "whisper-1");
+	if (options.language) formData.append("language", options.language);
+	if (options.prompt) formData.append("prompt", options.prompt);
+	const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
+		method: "POST",
+		headers: {
+			Authorization: `Bearer ${options.apiKey}`,
+		},
+		body: formData,
+	});
+	if (!response.ok) {
+		throw new Error(`Transcription failed (${response.status})`);
+	}
+	const data = (await response.json()) as { text?: string };
+	return data.text?.trim() || "";
+}
+export interface SpeechOptions {
+	apiKey: string;
+	model?: string;
+	voice?: string;
+	responseFormat?: "mp3" | "wav" | "opus";
+	speed?: number;
+}
+export async function synthesizeOpenAI(text: string, options: SpeechOptions): Promise<Blob> {
+	const response = await fetch("https://api.openai.com/v1/audio/speech", {
+		method: "POST",
+		headers: {
+			Authorization: `Bearer ${options.apiKey}`,
+			"Content-Type": "application/json",
+		},
+		body: JSON.stringify({
+			model: options.model || "tts-1",
+			voice: options.voice || "alloy",
+			input: text,
+			response_format: options.responseFormat || "mp3",
+			speed: options.speed,
+		}),
+	});
+	if (!response.ok) {
+		throw new Error(`Speech synthesis failed (${response.status})`);
+	}
+	return response.blob();
+}
+function pickSupportedMimeType(preferred?: string): string | undefined {
+	const candidates = [preferred, "audio/webm;codecs=opus", "audio/webm", "audio/ogg;codecs=opus", "audio/ogg"].filter(
+		(value): value is string => typeof value === "string",
+	);
+	for (const candidate of candidates) {
+		if (MediaRecorder.isTypeSupported(candidate)) {
+			return candidate;
+		}
+	}
+	return undefined;
+}
+function getRms(data: Float32Array): number {
+	let sum = 0;
+	for (let i = 0; i < data.length; i += 1) {
+		const sample = data[i] ?? 0;
+		sum += sample * sample;
+	}
+	return Math.sqrt(sum / data.length);
+}