@ozaiya/openclaw-channel 0.10.8 → 0.10.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,6 +26,7 @@ import { summarizeWithYuanbao } from "./yuanbao.js";
26
26
  import { summarizeWithDoubao } from "./doubao.js";
27
27
  import { fetchXueqiuPost, searchXueqiuPosts } from "./xueqiu.js";
28
28
  import { fetchSocialMediaPost, searchSocialMedia, extractSocialMediaContent } from "./socialMedia.js";
29
+ import { requestConfirmation, parseActionCallback, resolveConfirmation } from "./actionConfirmation.js";
29
30
  const DEFAULT_API_BASE_URL = "https://api.ozai.dev";
30
31
  const DEFAULT_WEBHOOK_PATH = "/ozaiya/webhook";
31
32
  const DEFAULT_ACCOUNT_ID = "default";
@@ -39,8 +40,65 @@ const RICH_MESSAGE_GUIDANCE = "Prefer plain text for normal prose, code, markdow
39
40
  const unwrappedKeys = new Map();
40
41
  // Track which bot account handles each group (groupId → botAccountId) — used for gateway mode
41
42
  const groupToBotAccountId = new Map();
43
+ // Track the originating groupId for each bot account's current dispatch
44
+ // (accountId → groupId). Used by tools that don't receive groupId
45
+ // (message_user, create_group) to know which group to send confirmation cards to.
46
+ const accountToOriginGroupId = new Map();
42
47
  // Active voice call sessions keyed by callId
43
48
  const activeVoiceCalls = new Map();
49
+ // --- Task Progress Tracking ---
50
+ // Tools that are read-only and should not produce progress steps
51
+ const READ_ONLY_TOOLS = new Set([
52
+ "search_users",
53
+ "list_groups",
54
+ "fetch_xueqiu_post",
55
+ "search_xueqiu",
56
+ "scrape_social_media_post",
57
+ "summarize_url",
58
+ ]);
59
+ // Active dispatches keyed by accountId (one dispatch at a time per account)
60
+ const activeDispatches = new Map();
61
+ function buildTaskProgressContent(dispatch, completed) {
62
+ return {
63
+ taskProgress: {
64
+ taskId: dispatch.taskId,
65
+ steps: dispatch.steps.map((s) => ({ toolName: s.toolName, label: s.label, status: s.status })),
66
+ completed,
67
+ },
68
+ };
69
+ }
70
+ async function sendOrEditProgressMessage(dispatch, completed) {
71
+ const content = buildTaskProgressContent(dispatch, completed);
72
+ const groupKey = await getGroupKeyOrThrow(dispatch.account, dispatch.groupId);
73
+ const encrypted = encryptMessage(content, groupKey);
74
+ if (!dispatch.progressMessageId) {
75
+ const result = await sendMessage(dispatch.account.apiBaseUrl, dispatch.account.botToken, dispatch.groupId, encrypted);
76
+ dispatch.progressMessageId = result.message.id;
77
+ }
78
+ else {
79
+ await editMessage(dispatch.account.apiBaseUrl, dispatch.account.botToken, dispatch.progressMessageId, encrypted);
80
+ }
81
+ }
82
+ function onToolCallStart(dispatch, toolName, toolLabel) {
83
+ dispatch.steps.push({ toolName, label: toolLabel, status: "in_progress" });
84
+ sendOrEditProgressMessage(dispatch, false).catch(() => { });
85
+ }
86
+ function onToolCallComplete(dispatch, toolName, success) {
87
+ const step = dispatch.steps.find((s) => s.toolName === toolName && s.status === "in_progress");
88
+ if (step) {
89
+ step.status = success ? "completed" : "failed";
90
+ }
91
+ sendOrEditProgressMessage(dispatch, false).catch(() => { });
92
+ }
93
+ async function finalizeTaskProgress(dispatch) {
94
+ for (const step of dispatch.steps) {
95
+ if (step.status === "in_progress")
96
+ step.status = "completed";
97
+ }
98
+ if (dispatch.steps.length > 0) {
99
+ await sendOrEditProgressMessage(dispatch, true).catch(() => { });
100
+ }
101
+ }
44
102
  /**
45
103
  * Report TTS/STT usage to the server for billing (fire-and-forget).
46
104
  * Bot owner is charged based on usage type and amount.
@@ -656,6 +714,17 @@ async function sendEncryptedChatContent(params) {
656
714
  recordState(account.accountId, { lastOutboundAt: Date.now() });
657
715
  return result;
658
716
  }
717
+ /** Build ConfirmationDeps for the action confirmation module. */
718
+ function makeConfirmationDeps(account) {
719
+ return {
720
+ sendEncrypted: (groupId, content) => sendEncryptedChatContent({ account, groupId, content }),
721
+ editEncrypted: async (groupId, messageId, content) => {
722
+ const groupKey = await getGroupKeyOrThrow(account, groupId);
723
+ const encrypted = encryptMessage(content, groupKey);
724
+ await editMessage(account.apiBaseUrl, account.botToken, messageId, encrypted);
725
+ },
726
+ };
727
+ }
659
728
  async function prepareOutboundAttachment(account, groupId, attachment) {
660
729
  if (attachment.url) {
661
730
  const mediaRes = await fetch(attachment.url, { signal: AbortSignal.timeout(60_000) });
@@ -714,10 +783,124 @@ async function stageInboundAttachmentsForAgent(params) {
714
783
  }
715
784
  return staged;
716
785
  }
786
+ /**
787
+ * Auto-discover an OpenAI-compatible provider (e.g. clider) and a vision-capable
788
+ * model from the OpenClaw model config to use for native PDF reading. No extra
789
+ * channel config required — it reuses whatever chat provider the bot already has.
790
+ */
791
+ function resolveNativeDocReader(cfg) {
792
+ const providers = cfg?.models?.providers;
793
+ if (!providers)
794
+ return null;
795
+ for (const provider of Object.values(providers)) {
796
+ if (!provider?.baseUrl?.trim() || !provider?.apiKey?.trim())
797
+ continue;
798
+ if (!(provider.api ?? "").toLowerCase().includes("openai"))
799
+ continue;
800
+ const vision = (provider.models ?? []).find((m) => m?.id?.trim() && Array.isArray(m.input) && m.input.includes("image"));
801
+ if (vision?.id?.trim()) {
802
+ return {
803
+ baseUrl: provider.baseUrl.trim().replace(/\/+$/, ""),
804
+ apiKey: provider.apiKey.trim(),
805
+ model: vision.id.trim(),
806
+ };
807
+ }
808
+ }
809
+ return null;
810
+ }
811
+ const NATIVE_DOC_MAX_BYTES = 15 * 1024 * 1024;
812
+ const NATIVE_DOC_PROMPT = "请完整、逐字地提取这个文档的全部文字内容,保留标题、段落、列表和表格结构,用 Markdown 输出。" +
813
+ "这可能是扫描件,请用视觉识别(OCR)。直接输出文档内容本身,不要添加任何前言或说明。";
814
+ /** Read a PDF natively via an OpenAI-compatible model's `input_file` content block. */
815
+ async function readDocumentNatively(params) {
816
+ const { reader, buffer, filename, mimeType } = params;
817
+ const dataUrl = `data:${mimeType};base64,${buffer.toString("base64")}`;
818
+ const res = await fetch(`${reader.baseUrl}/chat/completions`, {
819
+ method: "POST",
820
+ headers: { "Content-Type": "application/json", Authorization: `Bearer ${reader.apiKey}` },
821
+ body: JSON.stringify({
822
+ model: reader.model,
823
+ max_tokens: 16000,
824
+ messages: [
825
+ {
826
+ role: "user",
827
+ content: [
828
+ { type: "text", text: NATIVE_DOC_PROMPT },
829
+ { type: "file", file: { filename, file_data: dataUrl } },
830
+ ],
831
+ },
832
+ ],
833
+ }),
834
+ signal: AbortSignal.timeout(params.timeoutMs ?? 120_000),
835
+ });
836
+ if (!res.ok) {
837
+ const body = await res.text().catch(() => "");
838
+ throw new Error(`document read failed (${res.status})${body ? `: ${body.slice(0, 200)}` : ""}`);
839
+ }
840
+ const json = (await res.json().catch(() => null));
841
+ const content = json?.choices?.[0]?.message?.content;
842
+ const text = typeof content === "string"
843
+ ? content
844
+ : Array.isArray(content)
845
+ ? content
846
+ .map((b) => b && typeof b === "object" && "text" in b
847
+ ? String(b.text ?? "")
848
+ : "")
849
+ .join("")
850
+ : "";
851
+ if (!text.trim())
852
+ throw new Error("document read returned empty content");
853
+ return text.trim();
854
+ }
855
+ /**
856
+ * For each staged PDF, read it natively and return a text block to append to the
857
+ * agent's turn so it can answer inline without a tool round-trip or file output.
858
+ * Returns "" when there are no PDFs, no usable provider, or all reads fail (in
859
+ * which case the agent falls back to its normal pdf-tool path).
860
+ */
861
+ async function buildNativeDocumentContext(params) {
862
+ const pdfs = params.staged.filter((a) => a.kind === "pdf" || a.mime === "application/pdf");
863
+ if (pdfs.length === 0)
864
+ return "";
865
+ const reader = resolveNativeDocReader(params.cfg);
866
+ if (!reader)
867
+ return "";
868
+ const blocks = [];
869
+ for (const att of pdfs) {
870
+ try {
871
+ const buffer = await fs.readFile(att.stagedPath);
872
+ if (buffer.byteLength > NATIVE_DOC_MAX_BYTES) {
873
+ params.log?.warn?.(`ozaiya: skipping native read of ${att.name} (too large: ${buffer.byteLength} bytes)`);
874
+ continue;
875
+ }
876
+ const content = await readDocumentNatively({
877
+ reader,
878
+ buffer,
879
+ filename: att.name || "document.pdf",
880
+ mimeType: "application/pdf",
881
+ });
882
+ params.log?.info?.(`ozaiya: natively read PDF "${att.name}" via ${reader.model} (${content.length} chars)`);
883
+ blocks.push(`【系统已自动读取附件「${att.name}」的完整内容(已用视觉模型识别,扫描件也已 OCR)】:\n\n${content}`);
884
+ }
885
+ catch (err) {
886
+ params.log?.warn?.(`ozaiya: native read of ${att.name} failed: ${err instanceof Error ? err.message : String(err)}`);
887
+ }
888
+ }
889
+ if (blocks.length === 0)
890
+ return "";
891
+ return ("\n\n" +
892
+ blocks.join("\n\n---\n\n") +
893
+ "\n\n[以上为附件的完整原文,已替你读取完毕。请据此直接在聊天里用文字回答用户的请求;" +
894
+ "不要再调用 pdf 或任何文件读取工具,也不要用 exec/python 生成或发送任何文件" +
895
+ "(.doc/.docx/.rtf/.txt 等)——把内容或结果直接发在消息里即可。]");
896
+ }
717
897
  /**
718
898
  * Build the full set of channel agent tools for a given bot account.
719
899
  * Used by both the plugin's agentTools factory (for OpenClaw tool registration)
720
900
  * and the text-based tool call fallback in deliver().
901
+ *
902
+ * Non-read-only tools are wrapped to report progress steps to the active
903
+ * dispatch's task progress card (when one exists for this account).
721
904
  */
722
905
  function buildChannelTools(account, cfg) {
723
906
  const resolveForGroup = (groupId) => resolveAccountForGroup(cfg, groupId) ?? account;
@@ -753,6 +936,36 @@ function buildChannelTools(account, cfg) {
753
936
  const summarizeTool = createSummarizeUrlTool(cfg);
754
937
  if (summarizeTool)
755
938
  tools.push(summarizeTool);
939
+ // Wrap non-read-only tools with progress tracking.
940
+ // When the tool executes, it looks up the current active dispatch for this account.
941
+ const accountId = account.accountId;
942
+ for (let i = 0; i < tools.length; i++) {
943
+ const tool = tools[i];
944
+ if (READ_ONLY_TOOLS.has(tool.name))
945
+ continue;
946
+ const originalExecute = tool.execute;
947
+ tools[i] = {
948
+ ...tool,
949
+ execute: async (toolCallId, rawArgs) => {
950
+ const dispatch = activeDispatches.get(accountId);
951
+ if (dispatch)
952
+ onToolCallStart(dispatch, tool.name, tool.label);
953
+ try {
954
+ const result = await originalExecute(toolCallId, rawArgs);
955
+ const dispatchAfter = activeDispatches.get(accountId);
956
+ if (dispatchAfter)
957
+ onToolCallComplete(dispatchAfter, tool.name, true);
958
+ return result;
959
+ }
960
+ catch (err) {
961
+ const dispatchAfter = activeDispatches.get(accountId);
962
+ if (dispatchAfter)
963
+ onToolCallComplete(dispatchAfter, tool.name, false);
964
+ throw err;
965
+ }
966
+ },
967
+ };
968
+ }
756
969
  return tools;
757
970
  }
758
971
  export const ozaiyaPlugin = {
@@ -1351,6 +1564,18 @@ function createSendDirectMessageTool(account) {
1351
1564
  try {
1352
1565
  const args = rawArgs;
1353
1566
  const { userId, message } = args;
1567
+ // Request user confirmation before sending a DM
1568
+ const originGroupId = accountToOriginGroupId.get(account.accountId);
1569
+ if (originGroupId) {
1570
+ const deps = makeConfirmationDeps(account);
1571
+ const approved = await requestConfirmation(deps, originGroupId, "message_user", {
1572
+ userId,
1573
+ message: message.length > 100 ? message.slice(0, 100) + "…" : message,
1574
+ });
1575
+ if (!approved) {
1576
+ return { content: [{ type: "text", text: "Direct message was not approved by the user." }] };
1577
+ }
1578
+ }
1354
1579
  // Try to resolve as ozaiyaId first, fall back to treating as internal accountId
1355
1580
  let accountId = userId;
1356
1581
  const users = await searchUsers(account.apiBaseUrl, account.botToken, userId).catch(() => []);
@@ -1566,6 +1791,18 @@ function createCreateGroupTool(account) {
1566
1791
  if (!groupName) {
1567
1792
  return { content: [{ type: "text", text: "Error: group name is required." }] };
1568
1793
  }
1794
+ // Request user confirmation before creating a group
1795
+ const originGroupId = accountToOriginGroupId.get(account.accountId);
1796
+ if (originGroupId) {
1797
+ const deps = makeConfirmationDeps(account);
1798
+ const approved = await requestConfirmation(deps, originGroupId, "create_group", {
1799
+ name: groupName,
1800
+ ...(args.memberIds?.length ? { members: args.memberIds.join(", ") } : {}),
1801
+ });
1802
+ if (!approved) {
1803
+ return { content: [{ type: "text", text: "Group creation was not approved by the user." }] };
1804
+ }
1805
+ }
1569
1806
  // Resolve ozaiyaIds to account IDs
1570
1807
  let accountIds;
1571
1808
  if (args.memberIds && args.memberIds.length > 0) {
@@ -1934,6 +2171,15 @@ function createStartInAppCallTool(account, cfg) {
1934
2171
  execute: async (_toolCallId, rawArgs) => {
1935
2172
  const args = rawArgs;
1936
2173
  try {
2174
+ // Request user confirmation before starting the call
2175
+ const deps = makeConfirmationDeps(account);
2176
+ const approved = await requestConfirmation(deps, args.groupId, "start_in_app_call", {
2177
+ groupId: args.groupId,
2178
+ type: args.type ?? "voice",
2179
+ });
2180
+ if (!approved) {
2181
+ return { content: [{ type: "text", text: "Call was not approved by the user." }] };
2182
+ }
1937
2183
  const result = await startCall(account.apiBaseUrl, account.botToken, args.groupId, args.type ?? "voice");
1938
2184
  if (!result) {
1939
2185
  return { content: [{ type: "text", text: "Failed to start call. The bot may not have permission or is not a member of the group." }] };
@@ -2039,6 +2285,16 @@ function createMakePhoneCallTool(account, cfg) {
2039
2285
  execute: async (_toolCallId, rawArgs) => {
2040
2286
  const args = rawArgs;
2041
2287
  try {
2288
+ // Request user confirmation before making the phone call
2289
+ const deps = makeConfirmationDeps(account);
2290
+ const approved = await requestConfirmation(deps, args.groupId, "make_phone_call", {
2291
+ phoneNumber: args.phoneNumber,
2292
+ mode: args.mode ?? "auto",
2293
+ ...(args.purpose ? { purpose: args.purpose } : {}),
2294
+ });
2295
+ if (!approved) {
2296
+ return { content: [{ type: "text", text: "Phone call was not approved by the user." }] };
2297
+ }
2042
2298
  const mode = args.mode ?? "auto";
2043
2299
  const result = await startPhoneCall(account.apiBaseUrl, account.botToken, args.groupId, args.phoneNumber, mode, args.purpose);
2044
2300
  // Report connected status
@@ -2637,6 +2893,14 @@ ctx) {
2637
2893
  if (inboundAttachments.length > 0) {
2638
2894
  ctx.log?.info?.(`ozaiya: staged ${stagedInboundAttachments.length}/${inboundAttachments.length} inbound attachments for OpenClaw media context`);
2639
2895
  }
2896
+ // Read inbound PDFs natively (feed straight to a vision model) and inject their
2897
+ // content into the agent's turn, so it answers inline without a pdf-tool round
2898
+ // trip or file generation. Empty string when there are no PDFs / no provider.
2899
+ const nativeDocContext = await buildNativeDocumentContext({
2900
+ cfg: ctx.cfg,
2901
+ staged: stagedInboundAttachments,
2902
+ log: ctx.log,
2903
+ });
2640
2904
  const transcriptionResult = await maybeTranscribeInboundAudio({
2641
2905
  stt: account.stt,
2642
2906
  messageText,
@@ -2685,6 +2949,9 @@ ctx) {
2685
2949
  id: groupId,
2686
2950
  },
2687
2951
  });
2952
+ // Track originating group for this account so tools without groupId
2953
+ // (message_user, create_group) can send confirmation cards to the right group.
2954
+ accountToOriginGroupId.set(account.accountId, groupId);
2688
2955
  const fromAddress = `ozaiya:group:${groupId}`;
2689
2956
  const conversationLabel = `group:${groupId}`;
2690
2957
  // Build inbound session envelope context
@@ -2702,9 +2969,10 @@ ctx) {
2702
2969
  "Rules: respond concisely (1-3 sentences), use natural spoken language, " +
2703
2970
  "never use markdown/code blocks/bullet lists/URLs/emojis. " +
2704
2971
  'Do not say "sure" or "of course" — just answer directly.';
2972
+ const agentInputWithDocs = nativeDocContext ? `${agentInput}${nativeDocContext}` : agentInput;
2705
2973
  const effectiveAgentInput = voiceReply
2706
- ? `${voiceReplyPrompt || DEFAULT_VOICE_REPLY_PROMPT}\n\n${agentInput}`
2707
- : agentInput;
2974
+ ? `${voiceReplyPrompt || DEFAULT_VOICE_REPLY_PROMPT}\n\n${agentInputWithDocs}`
2975
+ : agentInputWithDocs;
2708
2976
  const body = ch.reply.formatAgentEnvelope({
2709
2977
  channel: "Ozaiya",
2710
2978
  from: `${message.senderName} (${conversationLabel})`,
@@ -2788,6 +3056,15 @@ ctx) {
2788
3056
  }).catch((err) => {
2789
3057
  ctx.log?.warn?.(`ozaiya: failed recording session: ${String(err)}`);
2790
3058
  });
3059
+ // Create dispatch tracking for task progress card
3060
+ const dispatch = {
3061
+ taskId: `${account.accountId}:${groupId}:${Date.now()}`,
3062
+ groupId,
3063
+ account,
3064
+ steps: [],
3065
+ progressMessageId: null,
3066
+ };
3067
+ activeDispatches.set(account.accountId, dispatch);
2791
3068
  // Build channel tools map for text-based tool call fallback.
2792
3069
  // When a model outputs tool calls as plain text instead of structured API tool_calls,
2793
3070
  // we match against registered tool names and execute via their .execute() method.
@@ -2796,136 +3073,142 @@ ctx) {
2796
3073
  const channelToolsByName = new Map(channelTools.map((t) => [t.name, t]));
2797
3074
  ctx.log?.info?.(`ozaiya: text fallback tools loaded: ${channelToolsByName.size} tools [${[...channelToolsByName.keys()].join(", ")}]`);
2798
3075
  // Dispatch to agent with buffered block dispatcher
2799
- await ch.reply.dispatchReplyWithBufferedBlockDispatcher({
2800
- ctx: msgCtx,
2801
- cfg: ctx.cfg,
2802
- dispatcherOptions: {
2803
- deliver: async (replyPayload, _info) => {
2804
- let replyText = replyPayload.text;
2805
- ctx.log?.info?.(`ozaiya: deliver called, text length=${replyText?.length ?? 0}, empty=${!replyText?.trim()}, voiceReply=${voiceReply}, voiceReplyVoice=${voiceReplyVoice ?? 'none'}`);
2806
- if (!replyText?.trim())
2807
- return;
2808
- // Generic fallback: intercept tool calls that models output as text
2809
- // instead of structured API tool_calls. Supports two formats:
2810
- // 1. JSON function syntax: tool_name({"arg":"value"}) or tool_name({arg: "value"})
2811
- // 2. XML: <function_calls><invoke name="tool_name"><parameter name="arg">value</parameter></invoke></function_calls>
2812
- let textToolsExecuted = false;
2813
- // --- Format 1: JSON function syntax tool_name({"key":"val"}) ---
2814
- if (channelToolsByName.size > 0) {
2815
- const toolNames = [...channelToolsByName.keys()].map((n) => n.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
2816
- const jsonFnRegex = new RegExp(`\\b(${toolNames.join("|")})\\s*\\(\\s*(\\{[\\s\\S]*?\\})\\s*\\)`, "g");
2817
- let jsonMatch;
2818
- while ((jsonMatch = jsonFnRegex.exec(replyText)) !== null) {
2819
- const toolName = jsonMatch[1];
2820
- const tool = channelToolsByName.get(toolName);
2821
- if (!tool)
2822
- continue;
2823
- // Always strip the tool call text from the message — users should never
2824
- // see raw tool_name({...}) syntax even if execution fails.
2825
- textToolsExecuted = true;
2826
- try {
2827
- // Try strict JSON first, then lenient (unquoted keys, trailing commas)
2828
- let args;
3076
+ try {
3077
+ await ch.reply.dispatchReplyWithBufferedBlockDispatcher({
3078
+ ctx: msgCtx,
3079
+ cfg: ctx.cfg,
3080
+ dispatcherOptions: {
3081
+ deliver: async (replyPayload, _info) => {
3082
+ let replyText = replyPayload.text;
3083
+ ctx.log?.info?.(`ozaiya: deliver called, text length=${replyText?.length ?? 0}, empty=${!replyText?.trim()}, voiceReply=${voiceReply}, voiceReplyVoice=${voiceReplyVoice ?? 'none'}`);
3084
+ if (!replyText?.trim())
3085
+ return;
3086
+ // Generic fallback: intercept tool calls that models output as text
3087
+ // instead of structured API tool_calls. Supports two formats:
3088
+ // 1. JSON function syntax: tool_name({"arg":"value"}) or tool_name({arg: "value"})
3089
+ // 2. XML: <function_calls><invoke name="tool_name"><parameter name="arg">value</parameter></invoke></function_calls>
3090
+ let textToolsExecuted = false;
3091
+ // --- Format 1: JSON function syntax tool_name({"key":"val"}) ---
3092
+ if (channelToolsByName.size > 0) {
3093
+ const toolNames = [...channelToolsByName.keys()].map((n) => n.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
3094
+ const jsonFnRegex = new RegExp(`\\b(${toolNames.join("|")})\\s*\\(\\s*(\\{[\\s\\S]*?\\})\\s*\\)`, "g");
3095
+ let jsonMatch;
3096
+ while ((jsonMatch = jsonFnRegex.exec(replyText)) !== null) {
3097
+ const toolName = jsonMatch[1];
3098
+ const tool = channelToolsByName.get(toolName);
3099
+ if (!tool)
3100
+ continue;
3101
+ // Always strip the tool call text from the message — users should never
3102
+ // see raw tool_name({...}) syntax even if execution fails.
3103
+ textToolsExecuted = true;
2829
3104
  try {
2830
- args = JSON.parse(jsonMatch[2]);
3105
+ // Try strict JSON first, then lenient (unquoted keys, trailing commas)
3106
+ let args;
3107
+ try {
3108
+ args = JSON.parse(jsonMatch[2]);
3109
+ }
3110
+ catch {
3111
+ // Handle JS-style object literals: unquoted keys, trailing commas
3112
+ const lenient = jsonMatch[2]
3113
+ .replace(/([{,]\s*)([a-zA-Z_]\w*)\s*:/g, '$1"$2":')
3114
+ .replace(/,\s*}/g, "}");
3115
+ args = JSON.parse(lenient);
3116
+ }
3117
+ ctx.log?.info?.(`ozaiya: text fallback — executing ${toolName}(${JSON.stringify(args)})`);
3118
+ await tool.execute(`text-fallback-${Date.now()}`, args);
2831
3119
  }
2832
- catch {
2833
- // Handle JS-style object literals: unquoted keys, trailing commas
2834
- const lenient = jsonMatch[2]
2835
- .replace(/([{,]\s*)([a-zA-Z_]\w*)\s*:/g, '$1"$2":')
2836
- .replace(/,\s*}/g, "}");
2837
- args = JSON.parse(lenient);
3120
+ catch (err) {
3121
+ ctx.log?.warn?.(`ozaiya: text fallback ${toolName} failed: ${String(err)}`);
2838
3122
  }
2839
- ctx.log?.info?.(`ozaiya: text fallback — executing ${toolName}(${JSON.stringify(args)})`);
2840
- await tool.execute(`text-fallback-${Date.now()}`, args);
2841
3123
  }
2842
- catch (err) {
2843
- ctx.log?.warn?.(`ozaiya: text fallback — ${toolName} failed: ${String(err)}`);
3124
+ if (textToolsExecuted) {
3125
+ replyText = replyText.replace(jsonFnRegex, "").trim();
3126
+ if (!replyText)
3127
+ return;
2844
3128
  }
2845
3129
  }
2846
- if (textToolsExecuted) {
2847
- replyText = replyText.replace(jsonFnRegex, "").trim();
3130
+ // --- Format 2: XML <function_calls> ---
3131
+ if (replyText.includes("<function_calls>") && replyText.includes("<invoke")) {
3132
+ const invokeRegex = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g;
3133
+ let match;
3134
+ while ((match = invokeRegex.exec(replyText)) !== null) {
3135
+ const toolName = match[1];
3136
+ const tool = channelToolsByName.get(toolName);
3137
+ if (!tool)
3138
+ continue;
3139
+ const paramsXml = match[2];
3140
+ const paramRegex = /<parameter\s+name="([^"]+)">([^<]*)<\/parameter>/g;
3141
+ const args = {};
3142
+ let pm;
3143
+ while ((pm = paramRegex.exec(paramsXml)) !== null) {
3144
+ args[pm[1]] = pm[2];
3145
+ }
3146
+ ctx.log?.info?.(`ozaiya: text fallback (XML) — executing ${toolName}(${JSON.stringify(args)})`);
3147
+ try {
3148
+ await tool.execute(`text-fallback-xml-${Date.now()}`, args);
3149
+ textToolsExecuted = true;
3150
+ }
3151
+ catch (err) {
3152
+ ctx.log?.warn?.(`ozaiya: text fallback (XML) — ${toolName} failed: ${String(err)}`);
3153
+ }
3154
+ }
3155
+ replyText = replyText
3156
+ .replace(/<function_calls>[\s\S]*?<\/function_calls>/g, "")
3157
+ .replace(/<function_results>[\s\S]*?<\/function_results>/g, "")
3158
+ .replace(/\[\[reply_to_current\]\]/g, "")
3159
+ .replace(/NO_REPLY/g, "")
3160
+ .trim();
3161
+ if (!replyText && textToolsExecuted)
3162
+ return;
2848
3163
  if (!replyText)
2849
3164
  return;
2850
3165
  }
2851
- }
2852
- // --- Format 2: XML <function_calls> ---
2853
- if (replyText.includes("<function_calls>") && replyText.includes("<invoke")) {
2854
- const invokeRegex = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g;
2855
- let match;
2856
- while ((match = invokeRegex.exec(replyText)) !== null) {
2857
- const toolName = match[1];
2858
- const tool = channelToolsByName.get(toolName);
2859
- if (!tool)
2860
- continue;
2861
- const paramsXml = match[2];
2862
- const paramRegex = /<parameter\s+name="([^"]+)">([^<]*)<\/parameter>/g;
2863
- const args = {};
2864
- let pm;
2865
- while ((pm = paramRegex.exec(paramsXml)) !== null) {
2866
- args[pm[1]] = pm[2];
2867
- }
2868
- ctx.log?.info?.(`ozaiya: text fallback (XML) — executing ${toolName}(${JSON.stringify(args)})`);
2869
- try {
2870
- await tool.execute(`text-fallback-xml-${Date.now()}`, args);
2871
- textToolsExecuted = true;
2872
- }
2873
- catch (err) {
2874
- ctx.log?.warn?.(`ozaiya: text fallback (XML) — ${toolName} failed: ${String(err)}`);
3166
+ // Voice reply: synthesize TTS audio and send as voice message
3167
+ if (voiceReply) {
3168
+ ctx.log?.info?.(`ozaiya: voice reply — synthesizing TTS for group ${groupId}`);
3169
+ const audioBuffer = await synthesizeVoiceReply(replyText, ctx, voiceReplyVoice ?? undefined);
3170
+ if (audioBuffer) {
3171
+ const ext = audioBuffer.ext;
3172
+ const mime = ext === ".mp3" ? "audio/mpeg" : ext === ".opus" ? "audio/ogg" : ext === ".wav" ? "audio/wav" : "audio/mpeg";
3173
+ const fileInfo = await uploadFile(account.apiBaseUrl, account.botToken, groupId, `voice${ext}`, mime, audioBuffer.data);
3174
+ await sendEncryptedChatContent({
3175
+ account,
3176
+ groupId,
3177
+ content: { text: replyText, files: [fileInfo] },
3178
+ log: ctx.log,
3179
+ });
3180
+ ctx.log?.info?.(`ozaiya: voice reply sent successfully (${ext}, ${audioBuffer.data.length} bytes)`);
3181
+ reportUsage(account, "tts", { provider: "voice-reply" });
3182
+ return;
2875
3183
  }
2876
- }
2877
- replyText = replyText
2878
- .replace(/<function_calls>[\s\S]*?<\/function_calls>/g, "")
2879
- .replace(/<function_results>[\s\S]*?<\/function_results>/g, "")
2880
- .replace(/\[\[reply_to_current\]\]/g, "")
2881
- .replace(/NO_REPLY/g, "")
2882
- .trim();
2883
- if (!replyText && textToolsExecuted)
2884
- return;
2885
- if (!replyText)
2886
- return;
2887
- }
2888
- // Voice reply: synthesize TTS audio and send as voice message
2889
- if (voiceReply) {
2890
- ctx.log?.info?.(`ozaiya: voice reply — synthesizing TTS for group ${groupId}`);
2891
- const audioBuffer = await synthesizeVoiceReply(replyText, ctx, voiceReplyVoice ?? undefined);
2892
- if (audioBuffer) {
2893
- const ext = audioBuffer.ext;
2894
- const mime = ext === ".mp3" ? "audio/mpeg" : ext === ".opus" ? "audio/ogg" : ext === ".wav" ? "audio/wav" : "audio/mpeg";
2895
- const fileInfo = await uploadFile(account.apiBaseUrl, account.botToken, groupId, `voice${ext}`, mime, audioBuffer.data);
3184
+ ctx.log?.warn?.(`ozaiya: TTS failed, falling back to text reply`);
2896
3185
  await sendEncryptedChatContent({
2897
3186
  account,
2898
3187
  groupId,
2899
- content: { text: replyText, files: [fileInfo] },
3188
+ content: { text: "⚠️ Voice synthesis failed, falling back to text." },
2900
3189
  log: ctx.log,
2901
3190
  });
2902
- ctx.log?.info?.(`ozaiya: voice reply sent successfully (${ext}, ${audioBuffer.data.length} bytes)`);
2903
- reportUsage(account, "tts", { provider: "voice-reply" });
2904
- return;
2905
3191
  }
2906
- ctx.log?.warn?.(`ozaiya: TTS failed, falling back to text reply`);
3192
+ ctx.log?.info?.(`ozaiya: sending reply to group ${groupId}`);
2907
3193
  await sendEncryptedChatContent({
2908
3194
  account,
2909
3195
  groupId,
2910
- content: { text: "⚠️ Voice synthesis failed, falling back to text." },
3196
+ content: { text: replyText },
2911
3197
  log: ctx.log,
2912
3198
  });
2913
- }
2914
- ctx.log?.info?.(`ozaiya: sending reply to group ${groupId}`);
2915
- await sendEncryptedChatContent({
2916
- account,
2917
- groupId,
2918
- content: { text: replyText },
2919
- log: ctx.log,
2920
- });
2921
- ctx.log?.info?.(`ozaiya: reply sent successfully`);
2922
- },
2923
- onError: (err) => {
2924
- ctx.log?.warn?.(`ozaiya: reply dispatch error: ${String(err)}`);
3199
+ ctx.log?.info?.(`ozaiya: reply sent successfully`);
3200
+ },
3201
+ onError: (err) => {
3202
+ ctx.log?.warn?.(`ozaiya: reply dispatch error: ${String(err)}`);
3203
+ },
2925
3204
  },
2926
- },
2927
- replyOptions: account.model ? { isHeartbeat: true, heartbeatModelOverride: account.model } : undefined,
2928
- });
3205
+ replyOptions: account.model ? { isHeartbeat: true, heartbeatModelOverride: account.model } : undefined,
3206
+ });
3207
+ }
3208
+ finally {
3209
+ await finalizeTaskProgress(dispatch);
3210
+ activeDispatches.delete(account.accountId);
3211
+ }
2929
3212
  }
2930
3213
  async function resetRouteSession(route, ctx) {
2931
3214
  const runtime = getOzaiyaRuntime();
@@ -2983,6 +3266,15 @@ async function handleCallbackQuery(payload,
2983
3266
  ctx) {
2984
3267
  const { groupId, messageId, callbackData, buttonText, buttonRowIndex, buttonIndex, from } = payload;
2985
3268
  const account = ctx.account;
3269
+ // Intercept action confirmation callbacks (oz_ac: prefix) — resolve the
3270
+ // pending confirmation promise and don't dispatch to the agent.
3271
+ const actionCallback = parseActionCallback(callbackData);
3272
+ if (actionCallback) {
3273
+ const deps = makeConfirmationDeps(account);
3274
+ const resolved = await resolveConfirmation(deps, actionCallback.actionId, actionCallback.approved);
3275
+ if (resolved)
3276
+ return; // intercepted — don't dispatch to agent
3277
+ }
2986
3278
  const runtime = getOzaiyaRuntime();
2987
3279
  const ch = runtime.channel;
2988
3280
  const { callbackBody, callbackCommand, callbackLabel, callbackOptionIndex } = normalizeCallbackQueryPayload({