npm - @ozaiya/openclaw-channel - Versions diffs - 0.10.8 → 0.10.11 - Mend

@ozaiya/openclaw-channel 0.10.8 → 0.10.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/src/actionConfirmation.d.ts +37 -0
package/dist/src/actionConfirmation.js +128 -0
package/dist/src/actionConfirmation.js.map +1 -0
package/dist/src/channel.js +402 -110
package/dist/src/channel.js.map +1 -1
package/dist/src/phoneCall.js +0 -2
package/dist/src/phoneCall.js.map +1 -1
package/dist/src/voiceCall.d.ts +0 -1
package/dist/src/voiceCall.js +0 -2
package/dist/src/voiceCall.js.map +1 -1
package/package.json +1 -1

package/dist/src/channel.js CHANGED Viewed

@@ -26,6 +26,7 @@ import { summarizeWithYuanbao } from "./yuanbao.js";
 import { summarizeWithDoubao } from "./doubao.js";
 import { fetchXueqiuPost, searchXueqiuPosts } from "./xueqiu.js";
 import { fetchSocialMediaPost, searchSocialMedia, extractSocialMediaContent } from "./socialMedia.js";
+import { requestConfirmation, parseActionCallback, resolveConfirmation } from "./actionConfirmation.js";
 const DEFAULT_API_BASE_URL = "https://api.ozai.dev";
 const DEFAULT_WEBHOOK_PATH = "/ozaiya/webhook";
 const DEFAULT_ACCOUNT_ID = "default";
@@ -39,8 +40,65 @@ const RICH_MESSAGE_GUIDANCE = "Prefer plain text for normal prose, code, markdow
 const unwrappedKeys = new Map();
 // Track which bot account handles each group (groupId → botAccountId) — used for gateway mode
 const groupToBotAccountId = new Map();
+// Track the originating groupId for each bot account's current dispatch
+// (accountId → groupId). Used by tools that don't receive groupId
+// (message_user, create_group) to know which group to send confirmation cards to.
+const accountToOriginGroupId = new Map();
 // Active voice call sessions keyed by callId
 const activeVoiceCalls = new Map();
+// --- Task Progress Tracking ---
+// Tools that are read-only and should not produce progress steps
+const READ_ONLY_TOOLS = new Set([
+    "search_users",
+    "list_groups",
+    "fetch_xueqiu_post",
+    "search_xueqiu",
+    "scrape_social_media_post",
+    "summarize_url",
+]);
+// Active dispatches keyed by accountId (one dispatch at a time per account)
+const activeDispatches = new Map();
+function buildTaskProgressContent(dispatch, completed) {
+    return {
+        taskProgress: {
+            taskId: dispatch.taskId,
+            steps: dispatch.steps.map((s) => ({ toolName: s.toolName, label: s.label, status: s.status })),
+            completed,
+        },
+    };
+}
+async function sendOrEditProgressMessage(dispatch, completed) {
+    const content = buildTaskProgressContent(dispatch, completed);
+    const groupKey = await getGroupKeyOrThrow(dispatch.account, dispatch.groupId);
+    const encrypted = encryptMessage(content, groupKey);
+    if (!dispatch.progressMessageId) {
+        const result = await sendMessage(dispatch.account.apiBaseUrl, dispatch.account.botToken, dispatch.groupId, encrypted);
+        dispatch.progressMessageId = result.message.id;
+    }
+    else {
+        await editMessage(dispatch.account.apiBaseUrl, dispatch.account.botToken, dispatch.progressMessageId, encrypted);
+    }
+}
+function onToolCallStart(dispatch, toolName, toolLabel) {
+    dispatch.steps.push({ toolName, label: toolLabel, status: "in_progress" });
+    sendOrEditProgressMessage(dispatch, false).catch(() => { });
+}
+function onToolCallComplete(dispatch, toolName, success) {
+    const step = dispatch.steps.find((s) => s.toolName === toolName && s.status === "in_progress");
+    if (step) {
+        step.status = success ? "completed" : "failed";
+    }
+    sendOrEditProgressMessage(dispatch, false).catch(() => { });
+}
+async function finalizeTaskProgress(dispatch) {
+    for (const step of dispatch.steps) {
+        if (step.status === "in_progress")
+            step.status = "completed";
+    }
+    if (dispatch.steps.length > 0) {
+        await sendOrEditProgressMessage(dispatch, true).catch(() => { });
+    }
+}
 /**
  * Report TTS/STT usage to the server for billing (fire-and-forget).
  * Bot owner is charged based on usage type and amount.
@@ -656,6 +714,17 @@ async function sendEncryptedChatContent(params) {
     recordState(account.accountId, { lastOutboundAt: Date.now() });
     return result;
 }
+/** Build ConfirmationDeps for the action confirmation module. */
+function makeConfirmationDeps(account) {
+    return {
+        sendEncrypted: (groupId, content) => sendEncryptedChatContent({ account, groupId, content }),
+        editEncrypted: async (groupId, messageId, content) => {
+            const groupKey = await getGroupKeyOrThrow(account, groupId);
+            const encrypted = encryptMessage(content, groupKey);
+            await editMessage(account.apiBaseUrl, account.botToken, messageId, encrypted);
+        },
+    };
+}
 async function prepareOutboundAttachment(account, groupId, attachment) {
     if (attachment.url) {
         const mediaRes = await fetch(attachment.url, { signal: AbortSignal.timeout(60_000) });
@@ -714,10 +783,124 @@ async function stageInboundAttachmentsForAgent(params) {
     }
     return staged;
 }
+/**
+ * Auto-discover an OpenAI-compatible provider (e.g. clider) and a vision-capable
+ * model from the OpenClaw model config to use for native PDF reading. No extra
+ * channel config required — it reuses whatever chat provider the bot already has.
+ */
+function resolveNativeDocReader(cfg) {
+    const providers = cfg?.models?.providers;
+    if (!providers)
+        return null;
+    for (const provider of Object.values(providers)) {
+        if (!provider?.baseUrl?.trim() || !provider?.apiKey?.trim())
+            continue;
+        if (!(provider.api ?? "").toLowerCase().includes("openai"))
+            continue;
+        const vision = (provider.models ?? []).find((m) => m?.id?.trim() && Array.isArray(m.input) && m.input.includes("image"));
+        if (vision?.id?.trim()) {
+            return {
+                baseUrl: provider.baseUrl.trim().replace(/\/+$/, ""),
+                apiKey: provider.apiKey.trim(),
+                model: vision.id.trim(),
+            };
+        }
+    }
+    return null;
+}
+const NATIVE_DOC_MAX_BYTES = 15 * 1024 * 1024;
+const NATIVE_DOC_PROMPT = "请完整、逐字地提取这个文档的全部文字内容，保留标题、段落、列表和表格结构，用 Markdown 输出。" +
+    "这可能是扫描件，请用视觉识别（OCR）。直接输出文档内容本身，不要添加任何前言或说明。";
+/** Read a PDF natively via an OpenAI-compatible model's `input_file` content block. */
+async function readDocumentNatively(params) {
+    const { reader, buffer, filename, mimeType } = params;
+    const dataUrl = `data:${mimeType};base64,${buffer.toString("base64")}`;
+    const res = await fetch(`${reader.baseUrl}/chat/completions`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json", Authorization: `Bearer ${reader.apiKey}` },
+        body: JSON.stringify({
+            model: reader.model,
+            max_tokens: 16000,
+            messages: [
+                {
+                    role: "user",
+                    content: [
+                        { type: "text", text: NATIVE_DOC_PROMPT },
+                        { type: "file", file: { filename, file_data: dataUrl } },
+                    ],
+                },
+            ],
+        }),
+        signal: AbortSignal.timeout(params.timeoutMs ?? 120_000),
+    });
+    if (!res.ok) {
+        const body = await res.text().catch(() => "");
+        throw new Error(`document read failed (${res.status})${body ? `: ${body.slice(0, 200)}` : ""}`);
+    }
+    const json = (await res.json().catch(() => null));
+    const content = json?.choices?.[0]?.message?.content;
+    const text = typeof content === "string"
+        ? content
+        : Array.isArray(content)
+            ? content
+                .map((b) => b && typeof b === "object" && "text" in b
+                ? String(b.text ?? "")
+                : "")
+                .join("")
+            : "";
+    if (!text.trim())
+        throw new Error("document read returned empty content");
+    return text.trim();
+}
+/**
+ * For each staged PDF, read it natively and return a text block to append to the
+ * agent's turn so it can answer inline without a tool round-trip or file output.
+ * Returns "" when there are no PDFs, no usable provider, or all reads fail (in
+ * which case the agent falls back to its normal pdf-tool path).
+ */
+async function buildNativeDocumentContext(params) {
+    const pdfs = params.staged.filter((a) => a.kind === "pdf" || a.mime === "application/pdf");
+    if (pdfs.length === 0)
+        return "";
+    const reader = resolveNativeDocReader(params.cfg);
+    if (!reader)
+        return "";
+    const blocks = [];
+    for (const att of pdfs) {
+        try {
+            const buffer = await fs.readFile(att.stagedPath);
+            if (buffer.byteLength > NATIVE_DOC_MAX_BYTES) {
+                params.log?.warn?.(`ozaiya: skipping native read of ${att.name} (too large: ${buffer.byteLength} bytes)`);
+                continue;
+            }
+            const content = await readDocumentNatively({
+                reader,
+                buffer,
+                filename: att.name || "document.pdf",
+                mimeType: "application/pdf",
+            });
+            params.log?.info?.(`ozaiya: natively read PDF "${att.name}" via ${reader.model} (${content.length} chars)`);
+            blocks.push(`【系统已自动读取附件「${att.name}」的完整内容（已用视觉模型识别，扫描件也已 OCR）】：\n\n${content}`);
+        }
+        catch (err) {
+            params.log?.warn?.(`ozaiya: native read of ${att.name} failed: ${err instanceof Error ? err.message : String(err)}`);
+        }
+    }
+    if (blocks.length === 0)
+        return "";
+    return ("\n\n" +
+        blocks.join("\n\n---\n\n") +
+        "\n\n[以上为附件的完整原文，已替你读取完毕。请据此直接在聊天里用文字回答用户的请求；" +
+        "不要再调用 pdf 或任何文件读取工具，也不要用 exec/python 生成或发送任何文件" +
+        "（.doc/.docx/.rtf/.txt 等）——把内容或结果直接发在消息里即可。]");
+}
 /**
  * Build the full set of channel agent tools for a given bot account.
  * Used by both the plugin's agentTools factory (for OpenClaw tool registration)
  * and the text-based tool call fallback in deliver().
+ *
+ * Non-read-only tools are wrapped to report progress steps to the active
+ * dispatch's task progress card (when one exists for this account).
  */
 function buildChannelTools(account, cfg) {
     const resolveForGroup = (groupId) => resolveAccountForGroup(cfg, groupId) ?? account;
@@ -753,6 +936,36 @@ function buildChannelTools(account, cfg) {
     const summarizeTool = createSummarizeUrlTool(cfg);
     if (summarizeTool)
         tools.push(summarizeTool);
+    // Wrap non-read-only tools with progress tracking.
+    // When the tool executes, it looks up the current active dispatch for this account.
+    const accountId = account.accountId;
+    for (let i = 0; i < tools.length; i++) {
+        const tool = tools[i];
+        if (READ_ONLY_TOOLS.has(tool.name))
+            continue;
+        const originalExecute = tool.execute;
+        tools[i] = {
+            ...tool,
+            execute: async (toolCallId, rawArgs) => {
+                const dispatch = activeDispatches.get(accountId);
+                if (dispatch)
+                    onToolCallStart(dispatch, tool.name, tool.label);
+                try {
+                    const result = await originalExecute(toolCallId, rawArgs);
+                    const dispatchAfter = activeDispatches.get(accountId);
+                    if (dispatchAfter)
+                        onToolCallComplete(dispatchAfter, tool.name, true);
+                    return result;
+                }
+                catch (err) {
+                    const dispatchAfter = activeDispatches.get(accountId);
+                    if (dispatchAfter)
+                        onToolCallComplete(dispatchAfter, tool.name, false);
+                    throw err;
+                }
+            },
+        };
+    }
     return tools;
 }
 export const ozaiyaPlugin = {
@@ -1351,6 +1564,18 @@ function createSendDirectMessageTool(account) {
             try {
                 const args = rawArgs;
                 const { userId, message } = args;
+                // Request user confirmation before sending a DM
+                const originGroupId = accountToOriginGroupId.get(account.accountId);
+                if (originGroupId) {
+                    const deps = makeConfirmationDeps(account);
+                    const approved = await requestConfirmation(deps, originGroupId, "message_user", {
+                        userId,
+                        message: message.length > 100 ? message.slice(0, 100) + "…" : message,
+                    });
+                    if (!approved) {
+                        return { content: [{ type: "text", text: "Direct message was not approved by the user." }] };
+                    }
+                }
                 // Try to resolve as ozaiyaId first, fall back to treating as internal accountId
                 let accountId = userId;
                 const users = await searchUsers(account.apiBaseUrl, account.botToken, userId).catch(() => []);
@@ -1566,6 +1791,18 @@ function createCreateGroupTool(account) {
                 if (!groupName) {
                     return { content: [{ type: "text", text: "Error: group name is required." }] };
                 }
+                // Request user confirmation before creating a group
+                const originGroupId = accountToOriginGroupId.get(account.accountId);
+                if (originGroupId) {
+                    const deps = makeConfirmationDeps(account);
+                    const approved = await requestConfirmation(deps, originGroupId, "create_group", {
+                        name: groupName,
+                        ...(args.memberIds?.length ? { members: args.memberIds.join(", ") } : {}),
+                    });
+                    if (!approved) {
+                        return { content: [{ type: "text", text: "Group creation was not approved by the user." }] };
+                    }
+                }
                 // Resolve ozaiyaIds to account IDs
                 let accountIds;
                 if (args.memberIds && args.memberIds.length > 0) {
@@ -1934,6 +2171,15 @@ function createStartInAppCallTool(account, cfg) {
         execute: async (_toolCallId, rawArgs) => {
             const args = rawArgs;
             try {
+                // Request user confirmation before starting the call
+                const deps = makeConfirmationDeps(account);
+                const approved = await requestConfirmation(deps, args.groupId, "start_in_app_call", {
+                    groupId: args.groupId,
+                    type: args.type ?? "voice",
+                });
+                if (!approved) {
+                    return { content: [{ type: "text", text: "Call was not approved by the user." }] };
+                }
                 const result = await startCall(account.apiBaseUrl, account.botToken, args.groupId, args.type ?? "voice");
                 if (!result) {
                     return { content: [{ type: "text", text: "Failed to start call. The bot may not have permission or is not a member of the group." }] };
@@ -2039,6 +2285,16 @@ function createMakePhoneCallTool(account, cfg) {
         execute: async (_toolCallId, rawArgs) => {
             const args = rawArgs;
             try {
+                // Request user confirmation before making the phone call
+                const deps = makeConfirmationDeps(account);
+                const approved = await requestConfirmation(deps, args.groupId, "make_phone_call", {
+                    phoneNumber: args.phoneNumber,
+                    mode: args.mode ?? "auto",
+                    ...(args.purpose ? { purpose: args.purpose } : {}),
+                });
+                if (!approved) {
+                    return { content: [{ type: "text", text: "Phone call was not approved by the user." }] };
+                }
                 const mode = args.mode ?? "auto";
                 const result = await startPhoneCall(account.apiBaseUrl, account.botToken, args.groupId, args.phoneNumber, mode, args.purpose);
                 // Report connected status
@@ -2637,6 +2893,14 @@ ctx) {
     if (inboundAttachments.length > 0) {
         ctx.log?.info?.(`ozaiya: staged ${stagedInboundAttachments.length}/${inboundAttachments.length} inbound attachments for OpenClaw media context`);
     }
+    // Read inbound PDFs natively (feed straight to a vision model) and inject their
+    // content into the agent's turn, so it answers inline without a pdf-tool round
+    // trip or file generation. Empty string when there are no PDFs / no provider.
+    const nativeDocContext = await buildNativeDocumentContext({
+        cfg: ctx.cfg,
+        staged: stagedInboundAttachments,
+        log: ctx.log,
+    });
     const transcriptionResult = await maybeTranscribeInboundAudio({
         stt: account.stt,
         messageText,
@@ -2685,6 +2949,9 @@ ctx) {
             id: groupId,
         },
     });
+    // Track originating group for this account so tools without groupId
+    // (message_user, create_group) can send confirmation cards to the right group.
+    accountToOriginGroupId.set(account.accountId, groupId);
     const fromAddress = `ozaiya:group:${groupId}`;
     const conversationLabel = `group:${groupId}`;
     // Build inbound session envelope context
@@ -2702,9 +2969,10 @@ ctx) {
         "Rules: respond concisely (1-3 sentences), use natural spoken language, " +
         "never use markdown/code blocks/bullet lists/URLs/emojis. " +
         'Do not say "sure" or "of course" — just answer directly.';
+    const agentInputWithDocs = nativeDocContext ? `${agentInput}${nativeDocContext}` : agentInput;
     const effectiveAgentInput = voiceReply
-        ? `${voiceReplyPrompt || DEFAULT_VOICE_REPLY_PROMPT}\n\n${agentInput}`
-        : agentInput;
+        ? `${voiceReplyPrompt || DEFAULT_VOICE_REPLY_PROMPT}\n\n${agentInputWithDocs}`
+        : agentInputWithDocs;
     const body = ch.reply.formatAgentEnvelope({
         channel: "Ozaiya",
         from: `${message.senderName} (${conversationLabel})`,
@@ -2788,6 +3056,15 @@ ctx) {
     }).catch((err) => {
         ctx.log?.warn?.(`ozaiya: failed recording session: ${String(err)}`);
     });
+    // Create dispatch tracking for task progress card
+    const dispatch = {
+        taskId: `${account.accountId}:${groupId}:${Date.now()}`,
+        groupId,
+        account,
+        steps: [],
+        progressMessageId: null,
+    };
+    activeDispatches.set(account.accountId, dispatch);
     // Build channel tools map for text-based tool call fallback.
     // When a model outputs tool calls as plain text instead of structured API tool_calls,
     // we match against registered tool names and execute via their .execute() method.
@@ -2796,136 +3073,142 @@ ctx) {
     const channelToolsByName = new Map(channelTools.map((t) => [t.name, t]));
     ctx.log?.info?.(`ozaiya: text fallback tools loaded: ${channelToolsByName.size} tools [${[...channelToolsByName.keys()].join(", ")}]`);
     // Dispatch to agent with buffered block dispatcher
-    await ch.reply.dispatchReplyWithBufferedBlockDispatcher({
-        ctx: msgCtx,
-        cfg: ctx.cfg,
-        dispatcherOptions: {
-            deliver: async (replyPayload, _info) => {
-                let replyText = replyPayload.text;
-                ctx.log?.info?.(`ozaiya: deliver called, text length=${replyText?.length ?? 0}, empty=${!replyText?.trim()}, voiceReply=${voiceReply}, voiceReplyVoice=${voiceReplyVoice ?? 'none'}`);
-                if (!replyText?.trim())
-                    return;
-                // Generic fallback: intercept tool calls that models output as text
-                // instead of structured API tool_calls. Supports two formats:
-                // 1. JSON function syntax: tool_name({"arg":"value"}) or tool_name({arg: "value"})
-                // 2. XML: <function_calls><invoke name="tool_name"><parameter name="arg">value</parameter></invoke></function_calls>
-                let textToolsExecuted = false;
-                // --- Format 1: JSON function syntax  tool_name({"key":"val"}) ---
-                if (channelToolsByName.size > 0) {
-                    const toolNames = [...channelToolsByName.keys()].map((n) => n.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
-                    const jsonFnRegex = new RegExp(`\\b(${toolNames.join("|")})\\s*\\(\\s*(\\{[\\s\\S]*?\\})\\s*\\)`, "g");
-                    let jsonMatch;
-                    while ((jsonMatch = jsonFnRegex.exec(replyText)) !== null) {
-                        const toolName = jsonMatch[1];
-                        const tool = channelToolsByName.get(toolName);
-                        if (!tool)
-                            continue;
-                        // Always strip the tool call text from the message — users should never
-                        // see raw tool_name({...}) syntax even if execution fails.
-                        textToolsExecuted = true;
-                        try {
-                            // Try strict JSON first, then lenient (unquoted keys, trailing commas)
-                            let args;
+    try {
+        await ch.reply.dispatchReplyWithBufferedBlockDispatcher({
+            ctx: msgCtx,
+            cfg: ctx.cfg,
+            dispatcherOptions: {
+                deliver: async (replyPayload, _info) => {
+                    let replyText = replyPayload.text;
+                    ctx.log?.info?.(`ozaiya: deliver called, text length=${replyText?.length ?? 0}, empty=${!replyText?.trim()}, voiceReply=${voiceReply}, voiceReplyVoice=${voiceReplyVoice ?? 'none'}`);
+                    if (!replyText?.trim())
+                        return;
+                    // Generic fallback: intercept tool calls that models output as text
+                    // instead of structured API tool_calls. Supports two formats:
+                    // 1. JSON function syntax: tool_name({"arg":"value"}) or tool_name({arg: "value"})
+                    // 2. XML: <function_calls><invoke name="tool_name"><parameter name="arg">value</parameter></invoke></function_calls>
+                    let textToolsExecuted = false;
+                    // --- Format 1: JSON function syntax  tool_name({"key":"val"}) ---
+                    if (channelToolsByName.size > 0) {
+                        const toolNames = [...channelToolsByName.keys()].map((n) => n.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
+                        const jsonFnRegex = new RegExp(`\\b(${toolNames.join("|")})\\s*\\(\\s*(\\{[\\s\\S]*?\\})\\s*\\)`, "g");
+                        let jsonMatch;
+                        while ((jsonMatch = jsonFnRegex.exec(replyText)) !== null) {
+                            const toolName = jsonMatch[1];
+                            const tool = channelToolsByName.get(toolName);
+                            if (!tool)
+                                continue;
+                            // Always strip the tool call text from the message — users should never
+                            // see raw tool_name({...}) syntax even if execution fails.
+                            textToolsExecuted = true;
                             try {
-                                args = JSON.parse(jsonMatch[2]);
+                                // Try strict JSON first, then lenient (unquoted keys, trailing commas)
+                                let args;
+                                try {
+                                    args = JSON.parse(jsonMatch[2]);
+                                }
+                                catch {
+                                    // Handle JS-style object literals: unquoted keys, trailing commas
+                                    const lenient = jsonMatch[2]
+                                        .replace(/([{,]\s*)([a-zA-Z_]\w*)\s*:/g, '$1"$2":')
+                                        .replace(/,\s*}/g, "}");
+                                    args = JSON.parse(lenient);
+                                }
+                                ctx.log?.info?.(`ozaiya: text fallback — executing ${toolName}(${JSON.stringify(args)})`);
+                                await tool.execute(`text-fallback-${Date.now()}`, args);
                             }
-                            catch {
-                                // Handle JS-style object literals: unquoted keys, trailing commas
-                                const lenient = jsonMatch[2]
-                                    .replace(/([{,]\s*)([a-zA-Z_]\w*)\s*:/g, '$1"$2":')
-                                    .replace(/,\s*}/g, "}");
-                                args = JSON.parse(lenient);
+                            catch (err) {
+                                ctx.log?.warn?.(`ozaiya: text fallback — ${toolName} failed: ${String(err)}`);
                             }
-                            ctx.log?.info?.(`ozaiya: text fallback — executing ${toolName}(${JSON.stringify(args)})`);
-                            await tool.execute(`text-fallback-${Date.now()}`, args);
                         }
-                        catch (err) {
-                            ctx.log?.warn?.(`ozaiya: text fallback — ${toolName} failed: ${String(err)}`);
+                        if (textToolsExecuted) {
+                            replyText = replyText.replace(jsonFnRegex, "").trim();
+                            if (!replyText)
+                                return;
                         }
                     }
-                    if (textToolsExecuted) {
-                        replyText = replyText.replace(jsonFnRegex, "").trim();
+                    // --- Format 2: XML <function_calls> ---
+                    if (replyText.includes("<function_calls>") && replyText.includes("<invoke")) {
+                        const invokeRegex = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g;
+                        let match;
+                        while ((match = invokeRegex.exec(replyText)) !== null) {
+                            const toolName = match[1];
+                            const tool = channelToolsByName.get(toolName);
+                            if (!tool)
+                                continue;
+                            const paramsXml = match[2];
+                            const paramRegex = /<parameter\s+name="([^"]+)">([^<]*)<\/parameter>/g;
+                            const args = {};
+                            let pm;
+                            while ((pm = paramRegex.exec(paramsXml)) !== null) {
+                                args[pm[1]] = pm[2];
+                            }
+                            ctx.log?.info?.(`ozaiya: text fallback (XML) — executing ${toolName}(${JSON.stringify(args)})`);
+                            try {
+                                await tool.execute(`text-fallback-xml-${Date.now()}`, args);
+                                textToolsExecuted = true;
+                            }
+                            catch (err) {
+                                ctx.log?.warn?.(`ozaiya: text fallback (XML) — ${toolName} failed: ${String(err)}`);
+                            }
+                        }
+                        replyText = replyText
+                            .replace(/<function_calls>[\s\S]*?<\/function_calls>/g, "")
+                            .replace(/<function_results>[\s\S]*?<\/function_results>/g, "")
+                            .replace(/\[\[reply_to_current\]\]/g, "")
+                            .replace(/NO_REPLY/g, "")
+                            .trim();
+                        if (!replyText && textToolsExecuted)
+                            return;
                         if (!replyText)
                             return;
                     }
-                }
-                // --- Format 2: XML <function_calls> ---
-                if (replyText.includes("<function_calls>") && replyText.includes("<invoke")) {
-                    const invokeRegex = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g;
-                    let match;
-                    while ((match = invokeRegex.exec(replyText)) !== null) {
-                        const toolName = match[1];
-                        const tool = channelToolsByName.get(toolName);
-                        if (!tool)
-                            continue;
-                        const paramsXml = match[2];
-                        const paramRegex = /<parameter\s+name="([^"]+)">([^<]*)<\/parameter>/g;
-                        const args = {};
-                        let pm;
-                        while ((pm = paramRegex.exec(paramsXml)) !== null) {
-                            args[pm[1]] = pm[2];
-                        }
-                        ctx.log?.info?.(`ozaiya: text fallback (XML) — executing ${toolName}(${JSON.stringify(args)})`);
-                        try {
-                            await tool.execute(`text-fallback-xml-${Date.now()}`, args);
-                            textToolsExecuted = true;
-                        }
-                        catch (err) {
-                            ctx.log?.warn?.(`ozaiya: text fallback (XML) — ${toolName} failed: ${String(err)}`);
+                    // Voice reply: synthesize TTS audio and send as voice message
+                    if (voiceReply) {
+                        ctx.log?.info?.(`ozaiya: voice reply — synthesizing TTS for group ${groupId}`);
+                        const audioBuffer = await synthesizeVoiceReply(replyText, ctx, voiceReplyVoice ?? undefined);
+                        if (audioBuffer) {
+                            const ext = audioBuffer.ext;
+                            const mime = ext === ".mp3" ? "audio/mpeg" : ext === ".opus" ? "audio/ogg" : ext === ".wav" ? "audio/wav" : "audio/mpeg";
+                            const fileInfo = await uploadFile(account.apiBaseUrl, account.botToken, groupId, `voice${ext}`, mime, audioBuffer.data);
+                            await sendEncryptedChatContent({
+                                account,
+                                groupId,
+                                content: { text: replyText, files: [fileInfo] },
+                                log: ctx.log,
+                            });
+                            ctx.log?.info?.(`ozaiya: voice reply sent successfully (${ext}, ${audioBuffer.data.length} bytes)`);
+                            reportUsage(account, "tts", { provider: "voice-reply" });
+                            return;
                         }
-                    }
-                    replyText = replyText
-                        .replace(/<function_calls>[\s\S]*?<\/function_calls>/g, "")
-                        .replace(/<function_results>[\s\S]*?<\/function_results>/g, "")
-                        .replace(/\[\[reply_to_current\]\]/g, "")
-                        .replace(/NO_REPLY/g, "")
-                        .trim();
-                    if (!replyText && textToolsExecuted)
-                        return;
-                    if (!replyText)
-                        return;
-                }
-                // Voice reply: synthesize TTS audio and send as voice message
-                if (voiceReply) {
-                    ctx.log?.info?.(`ozaiya: voice reply — synthesizing TTS for group ${groupId}`);
-                    const audioBuffer = await synthesizeVoiceReply(replyText, ctx, voiceReplyVoice ?? undefined);
-                    if (audioBuffer) {
-                        const ext = audioBuffer.ext;
-                        const mime = ext === ".mp3" ? "audio/mpeg" : ext === ".opus" ? "audio/ogg" : ext === ".wav" ? "audio/wav" : "audio/mpeg";
-                        const fileInfo = await uploadFile(account.apiBaseUrl, account.botToken, groupId, `voice${ext}`, mime, audioBuffer.data);
+                        ctx.log?.warn?.(`ozaiya: TTS failed, falling back to text reply`);
                         await sendEncryptedChatContent({
                             account,
                             groupId,
-                            content: { text: replyText, files: [fileInfo] },
+                            content: { text: "⚠️ Voice synthesis failed, falling back to text." },
                             log: ctx.log,
                         });
-                        ctx.log?.info?.(`ozaiya: voice reply sent successfully (${ext}, ${audioBuffer.data.length} bytes)`);
-                        reportUsage(account, "tts", { provider: "voice-reply" });
-                        return;
                     }
-                    ctx.log?.warn?.(`ozaiya: TTS failed, falling back to text reply`);
+                    ctx.log?.info?.(`ozaiya: sending reply to group ${groupId}`);
                     await sendEncryptedChatContent({
                         account,
                         groupId,
-                        content: { text: "⚠️ Voice synthesis failed, falling back to text." },
+                        content: { text: replyText },
                         log: ctx.log,
                     });
-                }
-                ctx.log?.info?.(`ozaiya: sending reply to group ${groupId}`);
-                await sendEncryptedChatContent({
-                    account,
-                    groupId,
-                    content: { text: replyText },
-                    log: ctx.log,
-                });
-                ctx.log?.info?.(`ozaiya: reply sent successfully`);
-            },
-            onError: (err) => {
-                ctx.log?.warn?.(`ozaiya: reply dispatch error: ${String(err)}`);
+                    ctx.log?.info?.(`ozaiya: reply sent successfully`);
+                },
+                onError: (err) => {
+                    ctx.log?.warn?.(`ozaiya: reply dispatch error: ${String(err)}`);
+                },
             },
-        },
-        replyOptions: account.model ? { isHeartbeat: true, heartbeatModelOverride: account.model } : undefined,
-    });
+            replyOptions: account.model ? { isHeartbeat: true, heartbeatModelOverride: account.model } : undefined,
+        });
+    }
+    finally {
+        await finalizeTaskProgress(dispatch);
+        activeDispatches.delete(account.accountId);
+    }
 }
 async function resetRouteSession(route, ctx) {
     const runtime = getOzaiyaRuntime();
@@ -2983,6 +3266,15 @@ async function handleCallbackQuery(payload,
 ctx) {
     const { groupId, messageId, callbackData, buttonText, buttonRowIndex, buttonIndex, from } = payload;
     const account = ctx.account;
+    // Intercept action confirmation callbacks (oz_ac: prefix) — resolve the
+    // pending confirmation promise and don't dispatch to the agent.
+    const actionCallback = parseActionCallback(callbackData);
+    if (actionCallback) {
+        const deps = makeConfirmationDeps(account);
+        const resolved = await resolveConfirmation(deps, actionCallback.actionId, actionCallback.approved);
+        if (resolved)
+            return; // intercepted — don't dispatch to agent
+    }
     const runtime = getOzaiyaRuntime();
     const ch = runtime.channel;
     const { callbackBody, callbackCommand, callbackLabel, callbackOptionIndex } = normalizeCallbackQueryPayload({