npm - opencode-telegram-mirror - Versions diffs - 0.3.0 → 0.4.2 - Mend

opencode-telegram-mirror 0.3.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -2,6 +2,19 @@
 A standalone bot that mirrors OpenCode sessions to Telegram topics, enabling collaborative AI-assisted coding conversations in Telegram.
+## ✨ Features
+| Feature | Description |
+|---------|-------------|
+| **📱 Real-time Streaming** | Live responses with typing indicators, markdown, code blocks, and inline diffs |
+| **🎯 Interactive Controls** | Buttons for questions, permissions, mode switching, and session control |
+| **📋 Slash Commands** | `/interrupt`, `/plan`, `/build`, `/review`, `/rename` for quick actions |
+| **🔍 Diff Viewer** | Automatic diff generation with syntax highlighting and shareable links |
+| **📸 Media Support** | Send images and voice messages (transcribed via Whisper) as prompts |
+| **🧵 Thread Support** | Telegram forum threads with automatic title sync from OpenCode sessions |
+| **💾 Session Persistence** | Resume sessions across devices and restarts |
+| **🔄 Multi-instance** | Run multiple mirrors for different sessions/channels |
 ## Installation
 ```bash
@@ -19,19 +32,19 @@ npm install -g opencode-telegram-mirror
    - Message [@userinfobot](https://t.me/userinfobot)
    - Copy your chat ID
-3. **Run the mirror**:
-   ```bash
-   opencode-telegram-mirror
-   ```
-4. **Configure environment variables**:
+3. **Configure environment variables**:
    ```bash
    export TELEGRAM_BOT_TOKEN="your-bot-token"
    export TELEGRAM_CHAT_ID="your-chat-id"
    # Optional: export TELEGRAM_THREAD_ID="your-thread-id"
    ```
-That's it! Your OpenCode sessions will now be mirrored to Telegram.
+4. **Run the mirror in your project**:
+   ```bash
+   opencode-telegram-mirror .
+   ```
+That's it! Your OpenCode session will now be mirrored to Telegram.
 ## How it works
@@ -138,6 +151,7 @@ opencode-telegram-mirror [directory] [session-id]
 | `TELEGRAM_UPDATES_URL` | Central updates endpoint for multi-instance deployments | No |
 | `TELEGRAM_SEND_URL` | Custom Telegram API endpoint (defaults to api.telegram.org) | No |
 | `OPENCODE_URL` | External OpenCode server URL (if not set, spawns local server) | No |
+| `OPENAI_API_KEY` | OpenAI API key for voice message transcription (Whisper) | No |
 ### Configuration Files
@@ -165,8 +179,31 @@ Example config file:
 Send messages in Telegram to interact with OpenCode:
 - **Text messages**: Sent as prompts to OpenCode
 - **Photos**: Attached as image files to prompts
+- **Voice messages**: Transcribed via OpenAI Whisper and sent as text prompts
 - **"x"**: Interrupt the current session
-- **"/connect"**: Get the OpenCode server URL
+- **/connect**: Get the OpenCode server URL
+- **/interrupt**: Stop the current operation
+- **/plan**: Switch to plan mode
+- **/build**: Switch to build mode
+- **/review**: Review changes (accepts optional argument: commit, branch, or pr)
+- **/rename `<title>`**: Rename the session and sync to Telegram thread
+### Title Sync
+Session titles are automatically synchronized between OpenCode and Telegram:
+- **On startup**: If resuming an existing session, the thread title syncs from the session
+- **On auto-title**: When OpenCode generates a title, it updates the Telegram thread
+- **On /rename**: Manually set a title that updates both OpenCode and Telegram
+### Voice Messages
+Voice messages are transcribed using OpenAI's Whisper API. To enable:
+1. Get an API key from [OpenAI Platform](https://platform.openai.com/api-keys)
+2. Set `OPENAI_API_KEY` in your environment
+3. Send voice messages to the bot - they'll be transcribed and sent to OpenCode
+If `OPENAI_API_KEY` is not set, the bot will respond with setup instructions when a voice message is received.
 ### Interactive Controls

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "opencode-telegram-mirror",
-	"version": "0.3.0",
+	"version": "0.4.2",
 	"description": "Standalone bot that mirrors OpenCode sessions to Telegram topics",
 	"type": "module",
 	"main": "src/main.ts",

package/src/main.ts CHANGED Viewed

@@ -20,7 +20,7 @@ import {
 	getServer,
 	type OpenCodeServer,
 } from "./opencode"
-import { TelegramClient } from "./telegram"
+import { TelegramClient, type TelegramVoice } from "./telegram"
 import { loadConfig } from "./config"
 import { createLogger } from "./log"
 import {
@@ -49,6 +49,11 @@ import {
 	createDiffFromEdit,
 	generateInlineDiffPreview,
 } from "./diff-service"
+import {
+	isVoiceTranscriptionAvailable,
+	transcribeVoice,
+	getVoiceNotSupportedMessage,
+} from "./voice"
 const log = createLogger()
@@ -93,6 +98,10 @@ interface BotState {
 	assistantMessageIds: Set<string>;
 	pendingParts: Map<string, Part[]>;
 	sentPartIds: Set<string>;
+	typingIndicators: Map<
+		string,
+		{ stop: () => void; timeout: ReturnType<typeof setTimeout> | null; mode: "idle" | "tool" }
+	>;
 }
 async function main() {
@@ -206,12 +215,12 @@ async function main() {
 		botId: botInfo.id,
 	})
-	// Register bot commands (menu)
 	const commandsResult = await telegram.setMyCommands([
 		{ command: "interrupt", description: "Stop the current operation" },
 		{ command: "plan", description: "Switch to plan mode" },
 		{ command: "build", description: "Switch to build mode" },
 		{ command: "review", description: "Review changes [commit|branch|pr]" },
+		{ command: "rename", description: "Rename the session" },
 	])
 	if (commandsResult.status === "error") {
 		log("warn", "Failed to set bot commands", { error: commandsResult.error.message })
@@ -221,10 +230,11 @@ async function main() {
 	log("info", "Checking for existing session...")
 	let sessionId: string | null = sessionIdArg || getSessionId(log)
+	let initialThreadTitle: string | null = null
 	if (sessionId) {
 		log("info", "Found existing session ID, validating...", { sessionId })
 		const sessionCheck = await server.client.session.get({
-			path: { id: sessionId },
+			sessionID: sessionId,
 		})
 		if (!sessionCheck.data) {
 			log("warn", "Stored session not found on server, will create new", {
@@ -233,6 +243,7 @@ async function main() {
 			sessionId = null
 		} else {
 			log("info", "Session validated successfully", { sessionId })
+			initialThreadTitle = sessionCheck.data.title || null
 		}
 	} else {
 		log("info", "No existing session found, will create on first message")
@@ -245,13 +256,23 @@ async function main() {
 		directory,
 		chatId: config.chatId,
 		threadId: config.threadId ?? null,
-		threadTitle: null,
+		threadTitle: initialThreadTitle,
 		updatesUrl: config.updatesUrl || null,
 		botUserId: botInfo.id,
 		sessionId,
 		assistantMessageIds: new Set(),
 		pendingParts: new Map(),
 		sentPartIds: new Set(),
+		typingIndicators: new Map(),
+	}
+	if (initialThreadTitle && config.threadId) {
+		const renameResult = await telegram.editForumTopic(config.threadId, initialThreadTitle)
+		if (renameResult.status === "ok") {
+			log("info", "Synced thread title from session", { title: initialThreadTitle })
+		} else {
+			log("warn", "Failed to sync thread title", { error: renameResult.error.message })
+		}
 	}
 	log("info", "Bot state initialized", {
@@ -263,7 +284,6 @@ async function main() {
 		pollSource: state.updatesUrl ? "Cloudflare DO" : "Telegram API",
 	})
-	// Start polling for updates
 	log("info", "Starting updates poller...")
 	startUpdatesPoller(state)
@@ -357,10 +377,9 @@ async function main() {
 Do not start implementing until you have clarity on what needs to be done.`
-		// Create session and send prompt
 		try {
 			const sessionResult = await state.server.client.session.create({
-				body: { title: `Telegram: ${branchName || "session"}` },
+				title: `Telegram: ${branchName || "session"}`,
 			})
 			if (sessionResult.data?.id) {
@@ -368,10 +387,9 @@ Do not start implementing until you have clarity on what needs to be done.`
 				setSessionId(sessionResult.data.id, log)
 				log("info", "Created OpenCode session", { sessionId: state.sessionId })
-				// Send the initial prompt
 				await state.server.client.session.prompt({
-					path: { id: state.sessionId },
-					body: { parts: [{ type: "text", text: prompt }] },
+					sessionID: state.sessionId,
+					parts: [{ type: "text", text: prompt }],
 				})
 				log("info", "Sent initial prompt to OpenCode")
 			}
@@ -401,6 +419,13 @@ interface TelegramUpdate {
 			width: number
 			height: number
 		}>
+		voice?: {
+			file_id: string
+			file_unique_id: string
+			duration: number
+			mime_type?: string
+			file_size?: number
+		}
 		from?: { id: number; username?: string }
 		chat: { id: number }
 	}
@@ -599,7 +624,7 @@ async function handleTelegramMessage(
 	msg: NonNullable<TelegramUpdate["message"]>,
 ) {
 	const messageText = msg.text || msg.caption
-	if (!messageText && !msg.photo) return
+	if (!messageText && !msg.photo && !msg.voice) return
 	// Ignore all bot messages - context is sent directly via OpenCode API
 	if (msg.from?.id === state.botUserId) {
@@ -619,12 +644,12 @@ async function handleTelegramMessage(
 	if (messageText?.trim().toLowerCase() === "x") {
 		log("info", "Received interrupt command 'x'")
 		if (state.sessionId) {
-			const abortResult = await state.server.clientV2.session.abort({
+			const abortResult = await state.server.client.session.abort({
 				sessionID: state.sessionId,
 				directory: state.directory,
 			})
 			if (abortResult.data) {
-				await state.telegram.sendMessage("Interrupted.")
+				log("info", "Abort request sent", { sessionId: state.sessionId })
 			} else {
 				log("error", "Failed to abort session", {
 					sessionId: state.sessionId,
@@ -665,12 +690,12 @@ async function handleTelegramMessage(
 	if (messageText?.trim() === "/interrupt") {
 		log("info", "Received /interrupt command")
 		if (state.sessionId) {
-			const abortResult = await state.server.clientV2.session.abort({
+			const abortResult = await state.server.client.session.abort({
 				sessionID: state.sessionId,
 				directory: state.directory,
 			})
 			if (abortResult.data) {
-				await state.telegram.sendMessage("Interrupted.")
+				log("info", "Abort request sent", { sessionId: state.sessionId })
 			} else {
 				log("error", "Failed to abort session", {
 					sessionId: state.sessionId,
@@ -684,6 +709,34 @@ async function handleTelegramMessage(
 		return
 	}
+	const renameMatch = messageText?.trim().match(/^\/rename(?:\s+(.+))?$/)
+	if (renameMatch) {
+		const newTitle = renameMatch[1]?.trim()
+		if (!newTitle) {
+			await state.telegram.sendMessage("Usage: /rename <new title>")
+			return
+		}
+		if (!state.sessionId) {
+			await state.telegram.sendMessage("No active session to rename.")
+			return
+		}
+		const updateResult = await state.server.client.session.update({
+			sessionID: state.sessionId,
+			title: newTitle,
+		})
+		if (updateResult.data) {
+			state.threadTitle = newTitle
+			if (state.threadId) {
+				await state.telegram.editForumTopic(state.threadId, newTitle)
+			}
+			await state.telegram.sendMessage(`Session renamed to: ${newTitle}`)
+		} else {
+			await state.telegram.sendMessage("Failed to rename session.")
+		}
+		return
+	}
 	const commandMatch = messageText?.trim().match(/^\/(build|plan|review)(?:\s+(.*))?$/)
 	if (commandMatch) {
 		const [, command, args] = commandMatch
@@ -691,7 +744,7 @@ async function handleTelegramMessage(
 		if (!state.sessionId) {
 			const result = await state.server.client.session.create({
-				body: { title: "Telegram" },
+				title: "Telegram",
 			})
 			if (result.data) {
 				state.sessionId = result.data.id
@@ -704,7 +757,7 @@ async function handleTelegramMessage(
 			}
 		}
-		state.server.clientV2.session
+		state.server.client.session
 			.command({
 				sessionID: state.sessionId,
 				directory: state.directory,
@@ -721,7 +774,7 @@ async function handleTelegramMessage(
 	log("info", "Received message", {
 		from: msg.from?.username,
-		preview: messageText?.slice(0, 50) ?? "[photo]",
+		preview: messageText?.slice(0, 50) ?? (msg.voice ? "[voice]" : "[photo]"),
 	})
 	// Check for freetext answer
@@ -737,7 +790,7 @@ async function handleTelegramMessage(
 		})
 		if (result) {
-			await state.server.clientV2.question.reply({
+			await state.server.client.question.reply({
 				requestID: result.requestId,
 				answers: result.answers,
 			})
@@ -748,23 +801,22 @@ async function handleTelegramMessage(
 	// Cancel pending questions/permissions
 	const cancelledQ = cancelPendingQuestion(msg.chat.id, threadId)
 	if (cancelledQ) {
-		await state.server.clientV2.question.reject({
+		await state.server.client.question.reject({
 			requestID: cancelledQ.requestId,
 		})
 	}
 	const cancelledP = cancelPendingPermission(msg.chat.id, threadId)
 	if (cancelledP) {
-		await state.server.clientV2.permission.reply({
+		await state.server.client.permission.reply({
 			requestID: cancelledP.requestId,
 			reply: "reject",
 		})
 	}
-	// Create session if needed
 	if (!state.sessionId) {
 		const result = await state.server.client.session.create({
-			body: { title: "Telegram" },
+			title: "Telegram",
 		})
 		if (result.data) {
@@ -804,6 +856,51 @@ async function handleTelegramMessage(
 		}
 	}
+	if (msg.voice) {
+		if (!isVoiceTranscriptionAvailable()) {
+			await state.telegram.sendMessage(getVoiceNotSupportedMessage())
+			return
+		}
+		log("info", "Processing voice message", {
+			duration: msg.voice.duration,
+			fileId: msg.voice.file_id,
+		})
+		const fileUrlResult = await state.telegram.getFileUrl(msg.voice.file_id)
+		if (fileUrlResult.status === "error") {
+			log("error", "Failed to get voice file URL", {
+				error: fileUrlResult.error.message,
+			})
+			await state.telegram.sendMessage("Failed to download voice message.")
+			return
+		}
+		const audioResponse = await fetch(fileUrlResult.value)
+		if (!audioResponse.ok) {
+			log("error", "Failed to download voice file", { status: audioResponse.status })
+			await state.telegram.sendMessage("Failed to download voice message.")
+			return
+		}
+		const audioBuffer = await audioResponse.arrayBuffer()
+		const transcriptionResult = await transcribeVoice(audioBuffer, log)
+		if (transcriptionResult.status === "error") {
+			log("error", "Voice transcription failed", {
+				error: transcriptionResult.error.message,
+			})
+			await state.telegram.sendMessage(
+				`Failed to transcribe voice message: ${transcriptionResult.error.message}`
+			)
+			return
+		}
+		const transcribedText = transcriptionResult.value
+		log("info", "Voice transcribed", { preview: transcribedText.slice(0, 50) })
+		parts.push({ type: "text", text: transcribedText })
+	}
 	if (messageText) {
 		parts.push({ type: "text", text: messageText })
 	}
@@ -811,7 +908,7 @@ async function handleTelegramMessage(
 	if (parts.length === 0) return
 	// Send to OpenCode
-	state.server.clientV2.session
+	state.server.client.session
 		.prompt({
 			sessionID: state.sessionId,
 			directory: state.directory,
@@ -846,7 +943,7 @@ async function handleTelegramCallback(
 	if (questionResult) {
 		if ("awaitingFreetext" in questionResult) return
-		await state.server.clientV2.question.reply({
+		await state.server.client.question.reply({
 			requestID: questionResult.requestId,
 			answers: questionResult.answers,
 		})
@@ -860,7 +957,7 @@ async function handleTelegramCallback(
 	})
 	if (permResult) {
-		await state.server.clientV2.permission.reply({
+		await state.server.client.permission.reply({
 			requestID: permResult.requestId,
 			reply: permResult.reply,
 		})
@@ -888,7 +985,7 @@ async function subscribeToEvents(state: BotState) {
 	log("info", "Subscribing to OpenCode events")
 	try {
-		const eventsResult = await state.server.clientV2.event.subscribe(
+		const eventsResult = await state.server.client.event.subscribe(
 			{ directory: state.directory },
 			{}
 		)
@@ -922,16 +1019,36 @@ async function handleOpenCodeEvent(state: BotState, ev: OpenCodeEvent) {
 	const sessionId =
 		ev.properties?.sessionID ??
 		ev.properties?.info?.sessionID ??
-		ev.properties?.part?.sessionID
+		ev.properties?.part?.sessionID ??
+		ev.properties?.session?.id
 	const sessionTitle = ev.properties?.session?.title
 	// Log errors in full and send to Telegram
 	if (ev.type === "session.error") {
 		const errorMsg = JSON.stringify(ev.properties, null, 2)
+		const error = ev.properties?.error as
+			| { name?: string; data?: { message?: string } }
+			| undefined
+		const errorName = error?.name
+		const errorText = error?.data?.message
+		const isInterrupted =
+			errorName === "MessageAbortedError" || errorText === "The operation was aborted."
 		log("error", "OpenCode session error", {
 			sessionId,
 			error: ev.properties,
 		})
+		if (isInterrupted) {
+			const sendResult = await state.telegram.sendMessage("Interrupted.")
+			if (sendResult.status === "error") {
+				log("error", "Failed to send interrupt message", {
+					error: sendResult.error.message,
+				})
+			}
+			return
+		}
 		// Send error to Telegram for visibility
 		const sendResult = await state.telegram.sendMessage(
 			`OpenCode Error:\n${errorMsg.slice(0, 3500)}`
@@ -984,6 +1101,12 @@ async function handleOpenCodeEvent(state: BotState, ev: OpenCodeEvent) {
 			const key = `${info.sessionID}:${info.id}`
 			state.assistantMessageIds.add(key)
 			log("debug", "Registered assistant message", { key })
+			const entry = state.typingIndicators.get(key)
+			if (entry && entry.mode === "tool") {
+				if (entry.timeout) clearTimeout(entry.timeout)
+				entry.stop()
+				state.typingIndicators.delete(key)
+			}
 		}
 	}
@@ -1001,6 +1124,39 @@ async function handleOpenCodeEvent(state: BotState, ev: OpenCodeEvent) {
 			return
 		}
+		const stopTypingIndicator = (targetKey: string) => {
+			const entry = state.typingIndicators.get(targetKey)
+			if (!entry) return
+			if (entry.timeout) clearTimeout(entry.timeout)
+			entry.stop()
+			state.typingIndicators.delete(targetKey)
+		}
+		const startTypingIndicator = (targetKey: string, mode: "idle" | "tool") => {
+			const existing = state.typingIndicators.get(targetKey)
+			if (existing && existing.mode === mode) return
+			if (existing) {
+				if (existing.timeout) clearTimeout(existing.timeout)
+				existing.stop()
+			}
+			const stop = state.telegram.startTyping(mode === "tool" ? 2000 : 4000)
+			state.typingIndicators.set(targetKey, { stop, timeout: null, mode })
+		}
+		const bumpTypingIndicator = (targetKey: string, mode: "idle" | "tool") => {
+			const existing = state.typingIndicators.get(targetKey)
+			if (!existing || existing.mode !== mode) {
+				startTypingIndicator(targetKey, mode)
+				return
+			}
+			if (existing.timeout) clearTimeout(existing.timeout)
+			existing.timeout = setTimeout(() => {
+				stopTypingIndicator(targetKey)
+			}, 12000)
+		}
 		log("debug", "Processing message part", {
 			key,
 			partType: part.type,
@@ -1014,12 +1170,11 @@ async function handleOpenCodeEvent(state: BotState, ev: OpenCodeEvent) {
 		state.pendingParts.set(key, existing)
 		if (part.type !== "step-finish") {
-			const typingResult = await state.telegram.sendTypingAction()
-			if (typingResult.status === "error") {
-				log("debug", "Typing action failed", {
-					error: typingResult.error.message,
-				})
-			}
+			const typingMode =
+				part.type === "tool" && (part.tool === "edit" || part.tool === "write")
+					? "tool"
+					: "idle"
+			bumpTypingIndicator(key, typingMode)
 		}
 		// Send tools/reasoning immediately (except edit/write tools - wait for completion to get diff data)
@@ -1047,6 +1202,7 @@ async function handleOpenCodeEvent(state: BotState, ev: OpenCodeEvent) {
 		// On step-finish, send remaining parts
 		if (part.type === "step-finish") {
+			stopTypingIndicator(key)
 			for (const p of existing) {
 				if (p.type === "step-start" || p.type === "step-finish") continue
 				if (state.sentPartIds.has(p.id)) continue
@@ -1149,6 +1305,24 @@ async function handleOpenCodeEvent(state: BotState, ev: OpenCodeEvent) {
 		}
 	}
+	if (ev.type === "message.updated") {
+		const info = ev.properties.info
+		if (info?.role === "assistant") {
+			const key = `${info.sessionID}:${info.id}`
+			const entry = state.typingIndicators.get(key)
+			if (entry && entry.mode === "tool") {
+				const stopTypingIndicator = (targetKey: string) => {
+					const existing = state.typingIndicators.get(targetKey)
+					if (!existing) return
+					if (existing.timeout) clearTimeout(existing.timeout)
+					existing.stop()
+					state.typingIndicators.delete(targetKey)
+				}
+				stopTypingIndicator(key)
+			}
+		}
+	}
 	const threadId = state.threadId ?? 0
 	if (ev.type === "question.asked") {

package/src/opencode.ts CHANGED Viewed

@@ -10,10 +10,6 @@ import {
   createOpencodeClient,
   type OpencodeClient,
   type Config,
-} from "@opencode-ai/sdk"
-import {
-  createOpencodeClient as createOpencodeClientV2,
-  type OpencodeClient as OpencodeClientV2,
 } from "@opencode-ai/sdk/v2"
 import { Result, TaggedError } from "better-result"
 import { createLogger } from "./log"
@@ -23,7 +19,6 @@ const log = createLogger()
 export interface OpenCodeServer {
   process: ChildProcess | null  // null when connecting to external server
   client: OpencodeClient
-  clientV2: OpencodeClientV2
   port: number
   directory: string
   baseUrl: string
@@ -114,6 +109,22 @@ async function waitForServer(
   )
 }
+/**
+ * Build auth headers for OpenCode server if credentials are configured.
+ * Uses OPENCODE_SERVER_USERNAME and OPENCODE_SERVER_PASSWORD env vars.
+ * If only password is set, username defaults to "opencode".
+ */
+function getAuthHeaders(): Record<string, string> {
+  const password = process.env.OPENCODE_SERVER_PASSWORD
+  if (!password) {
+    return {}
+  }
+  const username = process.env.OPENCODE_SERVER_USERNAME || "opencode"
+  const credentials = btoa(`${username}:${password}`)
+  return { Authorization: `Basic ${credentials}` }
+}
 /**
  * Connect to an already-running OpenCode server
  */
@@ -141,6 +152,14 @@ export async function connectToServer(
   log("info", "External server ready", { baseUrl })
+  const authHeaders = getAuthHeaders()
+  const hasAuth = Object.keys(authHeaders).length > 0
+  if (hasAuth) {
+    log("info", "Using basic auth for OpenCode server", {
+      username: process.env.OPENCODE_SERVER_USERNAME || "opencode",
+    })
+  }
   const fetchWithTimeout = (request: Request) =>
     fetch(request, {
       // @ts-ignore - bun supports timeout
@@ -148,19 +167,14 @@ export async function connectToServer(
     })
   const client = createOpencodeClient({
-    baseUrl,
-    fetch: fetchWithTimeout,
-  })
-  const clientV2 = createOpencodeClientV2({
     baseUrl,
     fetch: fetchWithTimeout as typeof fetch,
+    headers: authHeaders,
   })
   server = {
     process: null,  // No process - external server
     client,
-    clientV2,
     port,
     directory,
     baseUrl,
@@ -261,11 +275,6 @@ export async function startServer(
     })
   const client = createOpencodeClient({
-    baseUrl,
-    fetch: fetchWithTimeout,
-  })
-  const clientV2 = createOpencodeClientV2({
     baseUrl,
     fetch: fetchWithTimeout as typeof fetch,
   })
@@ -273,7 +282,6 @@ export async function startServer(
   server = {
     process: serverProcess,
     client,
-    clientV2,
     port,
     directory,
     baseUrl,

package/src/telegram.ts CHANGED Viewed

@@ -21,6 +21,14 @@ export interface TelegramPhotoSize {
   file_size?: number
 }
+export interface TelegramVoice {
+  file_id: string
+  file_unique_id: string
+  duration: number
+  mime_type?: string
+  file_size?: number
+}
 export interface TelegramMessage {
   message_id: number
   from?: {
@@ -38,6 +46,7 @@ export interface TelegramMessage {
   text?: string
   caption?: string
   photo?: TelegramPhotoSize[]
+  voice?: TelegramVoice
   reply_to_message?: TelegramMessage
 }

package/src/voice.ts ADDED Viewed

@@ -0,0 +1,81 @@
+import { Result, TaggedError } from "better-result"
+import type { LogFn } from "./log"
+export class VoiceTranscriptionError extends TaggedError("VoiceTranscriptionError")<{
+  message: string
+  cause?: unknown
+}>() {}
+export class NoApiKeyError extends TaggedError("NoApiKeyError")<{
+  message: string
+}>() {
+  constructor() {
+    super({ message: "No OPENAI_API_KEY set" })
+  }
+}
+export type TranscriptionResult = Result<string, VoiceTranscriptionError | NoApiKeyError>
+export function isVoiceTranscriptionAvailable(): boolean {
+  return !!process.env.OPENAI_API_KEY
+}
+export async function transcribeVoice(
+  audioBuffer: ArrayBuffer,
+  log?: LogFn
+): Promise<TranscriptionResult> {
+  const apiKey = process.env.OPENAI_API_KEY
+  if (!apiKey) {
+    return Result.err(new NoApiKeyError())
+  }
+  log?.("debug", "Transcribing voice message", { size: audioBuffer.byteLength })
+  try {
+    const formData = new FormData()
+    const blob = new Blob([audioBuffer], { type: "audio/ogg" })
+    formData.append("file", blob, "voice.ogg")
+    formData.append("model", "whisper-1")
+    const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+      },
+      body: formData,
+    })
+    if (!response.ok) {
+      const errorText = await response.text()
+      log?.("error", "Whisper API error", { status: response.status, error: errorText })
+      return Result.err(
+        new VoiceTranscriptionError({
+          message: `Whisper API error: ${response.status} - ${errorText}`,
+        })
+      )
+    }
+    const data = (await response.json()) as { text: string }
+    log?.("info", "Voice transcription complete", { textLength: data.text.length })
+    return Result.ok(data.text)
+  } catch (error) {
+    log?.("error", "Voice transcription failed", { error: String(error) })
+    return Result.err(
+      new VoiceTranscriptionError({
+        message: `Transcription failed: ${String(error)}`,
+        cause: error,
+      })
+    )
+  }
+}
+export function getVoiceNotSupportedMessage(): string {
+  return `Cannot transcribe voice message - no OPENAI_API_KEY set.
+To enable voice message support:
+1. Get an API key from https://platform.openai.com/api-keys
+2. Add OPENAI_API_KEY to opencode-telegram-mirror's environment
+3. Restart the bot and try again`
+}