@ozaiya/openclaw-channel 0.10.8 → 0.10.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/actionConfirmation.d.ts +37 -0
- package/dist/src/actionConfirmation.js +128 -0
- package/dist/src/actionConfirmation.js.map +1 -0
- package/dist/src/channel.js +402 -110
- package/dist/src/channel.js.map +1 -1
- package/dist/src/phoneCall.js +0 -2
- package/dist/src/phoneCall.js.map +1 -1
- package/dist/src/voiceCall.d.ts +0 -1
- package/dist/src/voiceCall.js +0 -2
- package/dist/src/voiceCall.js.map +1 -1
- package/package.json +1 -1
package/dist/src/channel.js
CHANGED
|
@@ -26,6 +26,7 @@ import { summarizeWithYuanbao } from "./yuanbao.js";
|
|
|
26
26
|
import { summarizeWithDoubao } from "./doubao.js";
|
|
27
27
|
import { fetchXueqiuPost, searchXueqiuPosts } from "./xueqiu.js";
|
|
28
28
|
import { fetchSocialMediaPost, searchSocialMedia, extractSocialMediaContent } from "./socialMedia.js";
|
|
29
|
+
import { requestConfirmation, parseActionCallback, resolveConfirmation } from "./actionConfirmation.js";
|
|
29
30
|
const DEFAULT_API_BASE_URL = "https://api.ozai.dev";
|
|
30
31
|
const DEFAULT_WEBHOOK_PATH = "/ozaiya/webhook";
|
|
31
32
|
const DEFAULT_ACCOUNT_ID = "default";
|
|
@@ -39,8 +40,65 @@ const RICH_MESSAGE_GUIDANCE = "Prefer plain text for normal prose, code, markdow
|
|
|
39
40
|
const unwrappedKeys = new Map();
|
|
40
41
|
// Track which bot account handles each group (groupId → botAccountId) — used for gateway mode
|
|
41
42
|
const groupToBotAccountId = new Map();
|
|
43
|
+
// Track the originating groupId for each bot account's current dispatch
|
|
44
|
+
// (accountId → groupId). Used by tools that don't receive groupId
|
|
45
|
+
// (message_user, create_group) to know which group to send confirmation cards to.
|
|
46
|
+
const accountToOriginGroupId = new Map();
|
|
42
47
|
// Active voice call sessions keyed by callId
|
|
43
48
|
const activeVoiceCalls = new Map();
|
|
49
|
+
// --- Task Progress Tracking ---
|
|
50
|
+
// Tools that are read-only and should not produce progress steps
|
|
51
|
+
const READ_ONLY_TOOLS = new Set([
|
|
52
|
+
"search_users",
|
|
53
|
+
"list_groups",
|
|
54
|
+
"fetch_xueqiu_post",
|
|
55
|
+
"search_xueqiu",
|
|
56
|
+
"scrape_social_media_post",
|
|
57
|
+
"summarize_url",
|
|
58
|
+
]);
|
|
59
|
+
// Active dispatches keyed by accountId (one dispatch at a time per account)
|
|
60
|
+
const activeDispatches = new Map();
|
|
61
|
+
function buildTaskProgressContent(dispatch, completed) {
|
|
62
|
+
return {
|
|
63
|
+
taskProgress: {
|
|
64
|
+
taskId: dispatch.taskId,
|
|
65
|
+
steps: dispatch.steps.map((s) => ({ toolName: s.toolName, label: s.label, status: s.status })),
|
|
66
|
+
completed,
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
async function sendOrEditProgressMessage(dispatch, completed) {
|
|
71
|
+
const content = buildTaskProgressContent(dispatch, completed);
|
|
72
|
+
const groupKey = await getGroupKeyOrThrow(dispatch.account, dispatch.groupId);
|
|
73
|
+
const encrypted = encryptMessage(content, groupKey);
|
|
74
|
+
if (!dispatch.progressMessageId) {
|
|
75
|
+
const result = await sendMessage(dispatch.account.apiBaseUrl, dispatch.account.botToken, dispatch.groupId, encrypted);
|
|
76
|
+
dispatch.progressMessageId = result.message.id;
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
await editMessage(dispatch.account.apiBaseUrl, dispatch.account.botToken, dispatch.progressMessageId, encrypted);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
function onToolCallStart(dispatch, toolName, toolLabel) {
|
|
83
|
+
dispatch.steps.push({ toolName, label: toolLabel, status: "in_progress" });
|
|
84
|
+
sendOrEditProgressMessage(dispatch, false).catch(() => { });
|
|
85
|
+
}
|
|
86
|
+
function onToolCallComplete(dispatch, toolName, success) {
|
|
87
|
+
const step = dispatch.steps.find((s) => s.toolName === toolName && s.status === "in_progress");
|
|
88
|
+
if (step) {
|
|
89
|
+
step.status = success ? "completed" : "failed";
|
|
90
|
+
}
|
|
91
|
+
sendOrEditProgressMessage(dispatch, false).catch(() => { });
|
|
92
|
+
}
|
|
93
|
+
async function finalizeTaskProgress(dispatch) {
|
|
94
|
+
for (const step of dispatch.steps) {
|
|
95
|
+
if (step.status === "in_progress")
|
|
96
|
+
step.status = "completed";
|
|
97
|
+
}
|
|
98
|
+
if (dispatch.steps.length > 0) {
|
|
99
|
+
await sendOrEditProgressMessage(dispatch, true).catch(() => { });
|
|
100
|
+
}
|
|
101
|
+
}
|
|
44
102
|
/**
|
|
45
103
|
* Report TTS/STT usage to the server for billing (fire-and-forget).
|
|
46
104
|
* Bot owner is charged based on usage type and amount.
|
|
@@ -656,6 +714,17 @@ async function sendEncryptedChatContent(params) {
|
|
|
656
714
|
recordState(account.accountId, { lastOutboundAt: Date.now() });
|
|
657
715
|
return result;
|
|
658
716
|
}
|
|
717
|
+
/** Build ConfirmationDeps for the action confirmation module. */
|
|
718
|
+
function makeConfirmationDeps(account) {
|
|
719
|
+
return {
|
|
720
|
+
sendEncrypted: (groupId, content) => sendEncryptedChatContent({ account, groupId, content }),
|
|
721
|
+
editEncrypted: async (groupId, messageId, content) => {
|
|
722
|
+
const groupKey = await getGroupKeyOrThrow(account, groupId);
|
|
723
|
+
const encrypted = encryptMessage(content, groupKey);
|
|
724
|
+
await editMessage(account.apiBaseUrl, account.botToken, messageId, encrypted);
|
|
725
|
+
},
|
|
726
|
+
};
|
|
727
|
+
}
|
|
659
728
|
async function prepareOutboundAttachment(account, groupId, attachment) {
|
|
660
729
|
if (attachment.url) {
|
|
661
730
|
const mediaRes = await fetch(attachment.url, { signal: AbortSignal.timeout(60_000) });
|
|
@@ -714,10 +783,124 @@ async function stageInboundAttachmentsForAgent(params) {
|
|
|
714
783
|
}
|
|
715
784
|
return staged;
|
|
716
785
|
}
|
|
786
|
+
/**
|
|
787
|
+
* Auto-discover an OpenAI-compatible provider (e.g. clider) and a vision-capable
|
|
788
|
+
* model from the OpenClaw model config to use for native PDF reading. No extra
|
|
789
|
+
* channel config required — it reuses whatever chat provider the bot already has.
|
|
790
|
+
*/
|
|
791
|
+
function resolveNativeDocReader(cfg) {
|
|
792
|
+
const providers = cfg?.models?.providers;
|
|
793
|
+
if (!providers)
|
|
794
|
+
return null;
|
|
795
|
+
for (const provider of Object.values(providers)) {
|
|
796
|
+
if (!provider?.baseUrl?.trim() || !provider?.apiKey?.trim())
|
|
797
|
+
continue;
|
|
798
|
+
if (!(provider.api ?? "").toLowerCase().includes("openai"))
|
|
799
|
+
continue;
|
|
800
|
+
const vision = (provider.models ?? []).find((m) => m?.id?.trim() && Array.isArray(m.input) && m.input.includes("image"));
|
|
801
|
+
if (vision?.id?.trim()) {
|
|
802
|
+
return {
|
|
803
|
+
baseUrl: provider.baseUrl.trim().replace(/\/+$/, ""),
|
|
804
|
+
apiKey: provider.apiKey.trim(),
|
|
805
|
+
model: vision.id.trim(),
|
|
806
|
+
};
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
return null;
|
|
810
|
+
}
|
|
811
|
+
const NATIVE_DOC_MAX_BYTES = 15 * 1024 * 1024;
|
|
812
|
+
const NATIVE_DOC_PROMPT = "请完整、逐字地提取这个文档的全部文字内容,保留标题、段落、列表和表格结构,用 Markdown 输出。" +
|
|
813
|
+
"这可能是扫描件,请用视觉识别(OCR)。直接输出文档内容本身,不要添加任何前言或说明。";
|
|
814
|
+
/** Read a PDF natively via an OpenAI-compatible model's `input_file` content block. */
|
|
815
|
+
async function readDocumentNatively(params) {
|
|
816
|
+
const { reader, buffer, filename, mimeType } = params;
|
|
817
|
+
const dataUrl = `data:${mimeType};base64,${buffer.toString("base64")}`;
|
|
818
|
+
const res = await fetch(`${reader.baseUrl}/chat/completions`, {
|
|
819
|
+
method: "POST",
|
|
820
|
+
headers: { "Content-Type": "application/json", Authorization: `Bearer ${reader.apiKey}` },
|
|
821
|
+
body: JSON.stringify({
|
|
822
|
+
model: reader.model,
|
|
823
|
+
max_tokens: 16000,
|
|
824
|
+
messages: [
|
|
825
|
+
{
|
|
826
|
+
role: "user",
|
|
827
|
+
content: [
|
|
828
|
+
{ type: "text", text: NATIVE_DOC_PROMPT },
|
|
829
|
+
{ type: "file", file: { filename, file_data: dataUrl } },
|
|
830
|
+
],
|
|
831
|
+
},
|
|
832
|
+
],
|
|
833
|
+
}),
|
|
834
|
+
signal: AbortSignal.timeout(params.timeoutMs ?? 120_000),
|
|
835
|
+
});
|
|
836
|
+
if (!res.ok) {
|
|
837
|
+
const body = await res.text().catch(() => "");
|
|
838
|
+
throw new Error(`document read failed (${res.status})${body ? `: ${body.slice(0, 200)}` : ""}`);
|
|
839
|
+
}
|
|
840
|
+
const json = (await res.json().catch(() => null));
|
|
841
|
+
const content = json?.choices?.[0]?.message?.content;
|
|
842
|
+
const text = typeof content === "string"
|
|
843
|
+
? content
|
|
844
|
+
: Array.isArray(content)
|
|
845
|
+
? content
|
|
846
|
+
.map((b) => b && typeof b === "object" && "text" in b
|
|
847
|
+
? String(b.text ?? "")
|
|
848
|
+
: "")
|
|
849
|
+
.join("")
|
|
850
|
+
: "";
|
|
851
|
+
if (!text.trim())
|
|
852
|
+
throw new Error("document read returned empty content");
|
|
853
|
+
return text.trim();
|
|
854
|
+
}
|
|
855
|
+
/**
|
|
856
|
+
* For each staged PDF, read it natively and return a text block to append to the
|
|
857
|
+
* agent's turn so it can answer inline without a tool round-trip or file output.
|
|
858
|
+
* Returns "" when there are no PDFs, no usable provider, or all reads fail (in
|
|
859
|
+
* which case the agent falls back to its normal pdf-tool path).
|
|
860
|
+
*/
|
|
861
|
+
async function buildNativeDocumentContext(params) {
|
|
862
|
+
const pdfs = params.staged.filter((a) => a.kind === "pdf" || a.mime === "application/pdf");
|
|
863
|
+
if (pdfs.length === 0)
|
|
864
|
+
return "";
|
|
865
|
+
const reader = resolveNativeDocReader(params.cfg);
|
|
866
|
+
if (!reader)
|
|
867
|
+
return "";
|
|
868
|
+
const blocks = [];
|
|
869
|
+
for (const att of pdfs) {
|
|
870
|
+
try {
|
|
871
|
+
const buffer = await fs.readFile(att.stagedPath);
|
|
872
|
+
if (buffer.byteLength > NATIVE_DOC_MAX_BYTES) {
|
|
873
|
+
params.log?.warn?.(`ozaiya: skipping native read of ${att.name} (too large: ${buffer.byteLength} bytes)`);
|
|
874
|
+
continue;
|
|
875
|
+
}
|
|
876
|
+
const content = await readDocumentNatively({
|
|
877
|
+
reader,
|
|
878
|
+
buffer,
|
|
879
|
+
filename: att.name || "document.pdf",
|
|
880
|
+
mimeType: "application/pdf",
|
|
881
|
+
});
|
|
882
|
+
params.log?.info?.(`ozaiya: natively read PDF "${att.name}" via ${reader.model} (${content.length} chars)`);
|
|
883
|
+
blocks.push(`【系统已自动读取附件「${att.name}」的完整内容(已用视觉模型识别,扫描件也已 OCR)】:\n\n${content}`);
|
|
884
|
+
}
|
|
885
|
+
catch (err) {
|
|
886
|
+
params.log?.warn?.(`ozaiya: native read of ${att.name} failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
if (blocks.length === 0)
|
|
890
|
+
return "";
|
|
891
|
+
return ("\n\n" +
|
|
892
|
+
blocks.join("\n\n---\n\n") +
|
|
893
|
+
"\n\n[以上为附件的完整原文,已替你读取完毕。请据此直接在聊天里用文字回答用户的请求;" +
|
|
894
|
+
"不要再调用 pdf 或任何文件读取工具,也不要用 exec/python 生成或发送任何文件" +
|
|
895
|
+
"(.doc/.docx/.rtf/.txt 等)——把内容或结果直接发在消息里即可。]");
|
|
896
|
+
}
|
|
717
897
|
/**
|
|
718
898
|
* Build the full set of channel agent tools for a given bot account.
|
|
719
899
|
* Used by both the plugin's agentTools factory (for OpenClaw tool registration)
|
|
720
900
|
* and the text-based tool call fallback in deliver().
|
|
901
|
+
*
|
|
902
|
+
* Non-read-only tools are wrapped to report progress steps to the active
|
|
903
|
+
* dispatch's task progress card (when one exists for this account).
|
|
721
904
|
*/
|
|
722
905
|
function buildChannelTools(account, cfg) {
|
|
723
906
|
const resolveForGroup = (groupId) => resolveAccountForGroup(cfg, groupId) ?? account;
|
|
@@ -753,6 +936,36 @@ function buildChannelTools(account, cfg) {
|
|
|
753
936
|
const summarizeTool = createSummarizeUrlTool(cfg);
|
|
754
937
|
if (summarizeTool)
|
|
755
938
|
tools.push(summarizeTool);
|
|
939
|
+
// Wrap non-read-only tools with progress tracking.
|
|
940
|
+
// When the tool executes, it looks up the current active dispatch for this account.
|
|
941
|
+
const accountId = account.accountId;
|
|
942
|
+
for (let i = 0; i < tools.length; i++) {
|
|
943
|
+
const tool = tools[i];
|
|
944
|
+
if (READ_ONLY_TOOLS.has(tool.name))
|
|
945
|
+
continue;
|
|
946
|
+
const originalExecute = tool.execute;
|
|
947
|
+
tools[i] = {
|
|
948
|
+
...tool,
|
|
949
|
+
execute: async (toolCallId, rawArgs) => {
|
|
950
|
+
const dispatch = activeDispatches.get(accountId);
|
|
951
|
+
if (dispatch)
|
|
952
|
+
onToolCallStart(dispatch, tool.name, tool.label);
|
|
953
|
+
try {
|
|
954
|
+
const result = await originalExecute(toolCallId, rawArgs);
|
|
955
|
+
const dispatchAfter = activeDispatches.get(accountId);
|
|
956
|
+
if (dispatchAfter)
|
|
957
|
+
onToolCallComplete(dispatchAfter, tool.name, true);
|
|
958
|
+
return result;
|
|
959
|
+
}
|
|
960
|
+
catch (err) {
|
|
961
|
+
const dispatchAfter = activeDispatches.get(accountId);
|
|
962
|
+
if (dispatchAfter)
|
|
963
|
+
onToolCallComplete(dispatchAfter, tool.name, false);
|
|
964
|
+
throw err;
|
|
965
|
+
}
|
|
966
|
+
},
|
|
967
|
+
};
|
|
968
|
+
}
|
|
756
969
|
return tools;
|
|
757
970
|
}
|
|
758
971
|
export const ozaiyaPlugin = {
|
|
@@ -1351,6 +1564,18 @@ function createSendDirectMessageTool(account) {
|
|
|
1351
1564
|
try {
|
|
1352
1565
|
const args = rawArgs;
|
|
1353
1566
|
const { userId, message } = args;
|
|
1567
|
+
// Request user confirmation before sending a DM
|
|
1568
|
+
const originGroupId = accountToOriginGroupId.get(account.accountId);
|
|
1569
|
+
if (originGroupId) {
|
|
1570
|
+
const deps = makeConfirmationDeps(account);
|
|
1571
|
+
const approved = await requestConfirmation(deps, originGroupId, "message_user", {
|
|
1572
|
+
userId,
|
|
1573
|
+
message: message.length > 100 ? message.slice(0, 100) + "…" : message,
|
|
1574
|
+
});
|
|
1575
|
+
if (!approved) {
|
|
1576
|
+
return { content: [{ type: "text", text: "Direct message was not approved by the user." }] };
|
|
1577
|
+
}
|
|
1578
|
+
}
|
|
1354
1579
|
// Try to resolve as ozaiyaId first, fall back to treating as internal accountId
|
|
1355
1580
|
let accountId = userId;
|
|
1356
1581
|
const users = await searchUsers(account.apiBaseUrl, account.botToken, userId).catch(() => []);
|
|
@@ -1566,6 +1791,18 @@ function createCreateGroupTool(account) {
|
|
|
1566
1791
|
if (!groupName) {
|
|
1567
1792
|
return { content: [{ type: "text", text: "Error: group name is required." }] };
|
|
1568
1793
|
}
|
|
1794
|
+
// Request user confirmation before creating a group
|
|
1795
|
+
const originGroupId = accountToOriginGroupId.get(account.accountId);
|
|
1796
|
+
if (originGroupId) {
|
|
1797
|
+
const deps = makeConfirmationDeps(account);
|
|
1798
|
+
const approved = await requestConfirmation(deps, originGroupId, "create_group", {
|
|
1799
|
+
name: groupName,
|
|
1800
|
+
...(args.memberIds?.length ? { members: args.memberIds.join(", ") } : {}),
|
|
1801
|
+
});
|
|
1802
|
+
if (!approved) {
|
|
1803
|
+
return { content: [{ type: "text", text: "Group creation was not approved by the user." }] };
|
|
1804
|
+
}
|
|
1805
|
+
}
|
|
1569
1806
|
// Resolve ozaiyaIds to account IDs
|
|
1570
1807
|
let accountIds;
|
|
1571
1808
|
if (args.memberIds && args.memberIds.length > 0) {
|
|
@@ -1934,6 +2171,15 @@ function createStartInAppCallTool(account, cfg) {
|
|
|
1934
2171
|
execute: async (_toolCallId, rawArgs) => {
|
|
1935
2172
|
const args = rawArgs;
|
|
1936
2173
|
try {
|
|
2174
|
+
// Request user confirmation before starting the call
|
|
2175
|
+
const deps = makeConfirmationDeps(account);
|
|
2176
|
+
const approved = await requestConfirmation(deps, args.groupId, "start_in_app_call", {
|
|
2177
|
+
groupId: args.groupId,
|
|
2178
|
+
type: args.type ?? "voice",
|
|
2179
|
+
});
|
|
2180
|
+
if (!approved) {
|
|
2181
|
+
return { content: [{ type: "text", text: "Call was not approved by the user." }] };
|
|
2182
|
+
}
|
|
1937
2183
|
const result = await startCall(account.apiBaseUrl, account.botToken, args.groupId, args.type ?? "voice");
|
|
1938
2184
|
if (!result) {
|
|
1939
2185
|
return { content: [{ type: "text", text: "Failed to start call. The bot may not have permission or is not a member of the group." }] };
|
|
@@ -2039,6 +2285,16 @@ function createMakePhoneCallTool(account, cfg) {
|
|
|
2039
2285
|
execute: async (_toolCallId, rawArgs) => {
|
|
2040
2286
|
const args = rawArgs;
|
|
2041
2287
|
try {
|
|
2288
|
+
// Request user confirmation before making the phone call
|
|
2289
|
+
const deps = makeConfirmationDeps(account);
|
|
2290
|
+
const approved = await requestConfirmation(deps, args.groupId, "make_phone_call", {
|
|
2291
|
+
phoneNumber: args.phoneNumber,
|
|
2292
|
+
mode: args.mode ?? "auto",
|
|
2293
|
+
...(args.purpose ? { purpose: args.purpose } : {}),
|
|
2294
|
+
});
|
|
2295
|
+
if (!approved) {
|
|
2296
|
+
return { content: [{ type: "text", text: "Phone call was not approved by the user." }] };
|
|
2297
|
+
}
|
|
2042
2298
|
const mode = args.mode ?? "auto";
|
|
2043
2299
|
const result = await startPhoneCall(account.apiBaseUrl, account.botToken, args.groupId, args.phoneNumber, mode, args.purpose);
|
|
2044
2300
|
// Report connected status
|
|
@@ -2637,6 +2893,14 @@ ctx) {
|
|
|
2637
2893
|
if (inboundAttachments.length > 0) {
|
|
2638
2894
|
ctx.log?.info?.(`ozaiya: staged ${stagedInboundAttachments.length}/${inboundAttachments.length} inbound attachments for OpenClaw media context`);
|
|
2639
2895
|
}
|
|
2896
|
+
// Read inbound PDFs natively (feed straight to a vision model) and inject their
|
|
2897
|
+
// content into the agent's turn, so it answers inline without a pdf-tool round
|
|
2898
|
+
// trip or file generation. Empty string when there are no PDFs / no provider.
|
|
2899
|
+
const nativeDocContext = await buildNativeDocumentContext({
|
|
2900
|
+
cfg: ctx.cfg,
|
|
2901
|
+
staged: stagedInboundAttachments,
|
|
2902
|
+
log: ctx.log,
|
|
2903
|
+
});
|
|
2640
2904
|
const transcriptionResult = await maybeTranscribeInboundAudio({
|
|
2641
2905
|
stt: account.stt,
|
|
2642
2906
|
messageText,
|
|
@@ -2685,6 +2949,9 @@ ctx) {
|
|
|
2685
2949
|
id: groupId,
|
|
2686
2950
|
},
|
|
2687
2951
|
});
|
|
2952
|
+
// Track originating group for this account so tools without groupId
|
|
2953
|
+
// (message_user, create_group) can send confirmation cards to the right group.
|
|
2954
|
+
accountToOriginGroupId.set(account.accountId, groupId);
|
|
2688
2955
|
const fromAddress = `ozaiya:group:${groupId}`;
|
|
2689
2956
|
const conversationLabel = `group:${groupId}`;
|
|
2690
2957
|
// Build inbound session envelope context
|
|
@@ -2702,9 +2969,10 @@ ctx) {
|
|
|
2702
2969
|
"Rules: respond concisely (1-3 sentences), use natural spoken language, " +
|
|
2703
2970
|
"never use markdown/code blocks/bullet lists/URLs/emojis. " +
|
|
2704
2971
|
'Do not say "sure" or "of course" — just answer directly.';
|
|
2972
|
+
const agentInputWithDocs = nativeDocContext ? `${agentInput}${nativeDocContext}` : agentInput;
|
|
2705
2973
|
const effectiveAgentInput = voiceReply
|
|
2706
|
-
? `${voiceReplyPrompt || DEFAULT_VOICE_REPLY_PROMPT}\n\n${
|
|
2707
|
-
:
|
|
2974
|
+
? `${voiceReplyPrompt || DEFAULT_VOICE_REPLY_PROMPT}\n\n${agentInputWithDocs}`
|
|
2975
|
+
: agentInputWithDocs;
|
|
2708
2976
|
const body = ch.reply.formatAgentEnvelope({
|
|
2709
2977
|
channel: "Ozaiya",
|
|
2710
2978
|
from: `${message.senderName} (${conversationLabel})`,
|
|
@@ -2788,6 +3056,15 @@ ctx) {
|
|
|
2788
3056
|
}).catch((err) => {
|
|
2789
3057
|
ctx.log?.warn?.(`ozaiya: failed recording session: ${String(err)}`);
|
|
2790
3058
|
});
|
|
3059
|
+
// Create dispatch tracking for task progress card
|
|
3060
|
+
const dispatch = {
|
|
3061
|
+
taskId: `${account.accountId}:${groupId}:${Date.now()}`,
|
|
3062
|
+
groupId,
|
|
3063
|
+
account,
|
|
3064
|
+
steps: [],
|
|
3065
|
+
progressMessageId: null,
|
|
3066
|
+
};
|
|
3067
|
+
activeDispatches.set(account.accountId, dispatch);
|
|
2791
3068
|
// Build channel tools map for text-based tool call fallback.
|
|
2792
3069
|
// When a model outputs tool calls as plain text instead of structured API tool_calls,
|
|
2793
3070
|
// we match against registered tool names and execute via their .execute() method.
|
|
@@ -2796,136 +3073,142 @@ ctx) {
|
|
|
2796
3073
|
const channelToolsByName = new Map(channelTools.map((t) => [t.name, t]));
|
|
2797
3074
|
ctx.log?.info?.(`ozaiya: text fallback tools loaded: ${channelToolsByName.size} tools [${[...channelToolsByName.keys()].join(", ")}]`);
|
|
2798
3075
|
// Dispatch to agent with buffered block dispatcher
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
// Try strict JSON first, then lenient (unquoted keys, trailing commas)
|
|
2828
|
-
let args;
|
|
3076
|
+
try {
|
|
3077
|
+
await ch.reply.dispatchReplyWithBufferedBlockDispatcher({
|
|
3078
|
+
ctx: msgCtx,
|
|
3079
|
+
cfg: ctx.cfg,
|
|
3080
|
+
dispatcherOptions: {
|
|
3081
|
+
deliver: async (replyPayload, _info) => {
|
|
3082
|
+
let replyText = replyPayload.text;
|
|
3083
|
+
ctx.log?.info?.(`ozaiya: deliver called, text length=${replyText?.length ?? 0}, empty=${!replyText?.trim()}, voiceReply=${voiceReply}, voiceReplyVoice=${voiceReplyVoice ?? 'none'}`);
|
|
3084
|
+
if (!replyText?.trim())
|
|
3085
|
+
return;
|
|
3086
|
+
// Generic fallback: intercept tool calls that models output as text
|
|
3087
|
+
// instead of structured API tool_calls. Supports two formats:
|
|
3088
|
+
// 1. JSON function syntax: tool_name({"arg":"value"}) or tool_name({arg: "value"})
|
|
3089
|
+
// 2. XML: <function_calls><invoke name="tool_name"><parameter name="arg">value</parameter></invoke></function_calls>
|
|
3090
|
+
let textToolsExecuted = false;
|
|
3091
|
+
// --- Format 1: JSON function syntax tool_name({"key":"val"}) ---
|
|
3092
|
+
if (channelToolsByName.size > 0) {
|
|
3093
|
+
const toolNames = [...channelToolsByName.keys()].map((n) => n.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
|
|
3094
|
+
const jsonFnRegex = new RegExp(`\\b(${toolNames.join("|")})\\s*\\(\\s*(\\{[\\s\\S]*?\\})\\s*\\)`, "g");
|
|
3095
|
+
let jsonMatch;
|
|
3096
|
+
while ((jsonMatch = jsonFnRegex.exec(replyText)) !== null) {
|
|
3097
|
+
const toolName = jsonMatch[1];
|
|
3098
|
+
const tool = channelToolsByName.get(toolName);
|
|
3099
|
+
if (!tool)
|
|
3100
|
+
continue;
|
|
3101
|
+
// Always strip the tool call text from the message — users should never
|
|
3102
|
+
// see raw tool_name({...}) syntax even if execution fails.
|
|
3103
|
+
textToolsExecuted = true;
|
|
2829
3104
|
try {
|
|
2830
|
-
|
|
3105
|
+
// Try strict JSON first, then lenient (unquoted keys, trailing commas)
|
|
3106
|
+
let args;
|
|
3107
|
+
try {
|
|
3108
|
+
args = JSON.parse(jsonMatch[2]);
|
|
3109
|
+
}
|
|
3110
|
+
catch {
|
|
3111
|
+
// Handle JS-style object literals: unquoted keys, trailing commas
|
|
3112
|
+
const lenient = jsonMatch[2]
|
|
3113
|
+
.replace(/([{,]\s*)([a-zA-Z_]\w*)\s*:/g, '$1"$2":')
|
|
3114
|
+
.replace(/,\s*}/g, "}");
|
|
3115
|
+
args = JSON.parse(lenient);
|
|
3116
|
+
}
|
|
3117
|
+
ctx.log?.info?.(`ozaiya: text fallback — executing ${toolName}(${JSON.stringify(args)})`);
|
|
3118
|
+
await tool.execute(`text-fallback-${Date.now()}`, args);
|
|
2831
3119
|
}
|
|
2832
|
-
catch {
|
|
2833
|
-
|
|
2834
|
-
const lenient = jsonMatch[2]
|
|
2835
|
-
.replace(/([{,]\s*)([a-zA-Z_]\w*)\s*:/g, '$1"$2":')
|
|
2836
|
-
.replace(/,\s*}/g, "}");
|
|
2837
|
-
args = JSON.parse(lenient);
|
|
3120
|
+
catch (err) {
|
|
3121
|
+
ctx.log?.warn?.(`ozaiya: text fallback — ${toolName} failed: ${String(err)}`);
|
|
2838
3122
|
}
|
|
2839
|
-
ctx.log?.info?.(`ozaiya: text fallback — executing ${toolName}(${JSON.stringify(args)})`);
|
|
2840
|
-
await tool.execute(`text-fallback-${Date.now()}`, args);
|
|
2841
3123
|
}
|
|
2842
|
-
|
|
2843
|
-
|
|
3124
|
+
if (textToolsExecuted) {
|
|
3125
|
+
replyText = replyText.replace(jsonFnRegex, "").trim();
|
|
3126
|
+
if (!replyText)
|
|
3127
|
+
return;
|
|
2844
3128
|
}
|
|
2845
3129
|
}
|
|
2846
|
-
|
|
2847
|
-
|
|
3130
|
+
// --- Format 2: XML <function_calls> ---
|
|
3131
|
+
if (replyText.includes("<function_calls>") && replyText.includes("<invoke")) {
|
|
3132
|
+
const invokeRegex = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g;
|
|
3133
|
+
let match;
|
|
3134
|
+
while ((match = invokeRegex.exec(replyText)) !== null) {
|
|
3135
|
+
const toolName = match[1];
|
|
3136
|
+
const tool = channelToolsByName.get(toolName);
|
|
3137
|
+
if (!tool)
|
|
3138
|
+
continue;
|
|
3139
|
+
const paramsXml = match[2];
|
|
3140
|
+
const paramRegex = /<parameter\s+name="([^"]+)">([^<]*)<\/parameter>/g;
|
|
3141
|
+
const args = {};
|
|
3142
|
+
let pm;
|
|
3143
|
+
while ((pm = paramRegex.exec(paramsXml)) !== null) {
|
|
3144
|
+
args[pm[1]] = pm[2];
|
|
3145
|
+
}
|
|
3146
|
+
ctx.log?.info?.(`ozaiya: text fallback (XML) — executing ${toolName}(${JSON.stringify(args)})`);
|
|
3147
|
+
try {
|
|
3148
|
+
await tool.execute(`text-fallback-xml-${Date.now()}`, args);
|
|
3149
|
+
textToolsExecuted = true;
|
|
3150
|
+
}
|
|
3151
|
+
catch (err) {
|
|
3152
|
+
ctx.log?.warn?.(`ozaiya: text fallback (XML) — ${toolName} failed: ${String(err)}`);
|
|
3153
|
+
}
|
|
3154
|
+
}
|
|
3155
|
+
replyText = replyText
|
|
3156
|
+
.replace(/<function_calls>[\s\S]*?<\/function_calls>/g, "")
|
|
3157
|
+
.replace(/<function_results>[\s\S]*?<\/function_results>/g, "")
|
|
3158
|
+
.replace(/\[\[reply_to_current\]\]/g, "")
|
|
3159
|
+
.replace(/NO_REPLY/g, "")
|
|
3160
|
+
.trim();
|
|
3161
|
+
if (!replyText && textToolsExecuted)
|
|
3162
|
+
return;
|
|
2848
3163
|
if (!replyText)
|
|
2849
3164
|
return;
|
|
2850
3165
|
}
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
|
|
2860
|
-
|
|
2861
|
-
|
|
2862
|
-
|
|
2863
|
-
|
|
2864
|
-
|
|
2865
|
-
|
|
2866
|
-
|
|
2867
|
-
|
|
2868
|
-
ctx.log?.info?.(`ozaiya: text fallback (XML) — executing ${toolName}(${JSON.stringify(args)})`);
|
|
2869
|
-
try {
|
|
2870
|
-
await tool.execute(`text-fallback-xml-${Date.now()}`, args);
|
|
2871
|
-
textToolsExecuted = true;
|
|
2872
|
-
}
|
|
2873
|
-
catch (err) {
|
|
2874
|
-
ctx.log?.warn?.(`ozaiya: text fallback (XML) — ${toolName} failed: ${String(err)}`);
|
|
3166
|
+
// Voice reply: synthesize TTS audio and send as voice message
|
|
3167
|
+
if (voiceReply) {
|
|
3168
|
+
ctx.log?.info?.(`ozaiya: voice reply — synthesizing TTS for group ${groupId}`);
|
|
3169
|
+
const audioBuffer = await synthesizeVoiceReply(replyText, ctx, voiceReplyVoice ?? undefined);
|
|
3170
|
+
if (audioBuffer) {
|
|
3171
|
+
const ext = audioBuffer.ext;
|
|
3172
|
+
const mime = ext === ".mp3" ? "audio/mpeg" : ext === ".opus" ? "audio/ogg" : ext === ".wav" ? "audio/wav" : "audio/mpeg";
|
|
3173
|
+
const fileInfo = await uploadFile(account.apiBaseUrl, account.botToken, groupId, `voice${ext}`, mime, audioBuffer.data);
|
|
3174
|
+
await sendEncryptedChatContent({
|
|
3175
|
+
account,
|
|
3176
|
+
groupId,
|
|
3177
|
+
content: { text: replyText, files: [fileInfo] },
|
|
3178
|
+
log: ctx.log,
|
|
3179
|
+
});
|
|
3180
|
+
ctx.log?.info?.(`ozaiya: voice reply sent successfully (${ext}, ${audioBuffer.data.length} bytes)`);
|
|
3181
|
+
reportUsage(account, "tts", { provider: "voice-reply" });
|
|
3182
|
+
return;
|
|
2875
3183
|
}
|
|
2876
|
-
|
|
2877
|
-
replyText = replyText
|
|
2878
|
-
.replace(/<function_calls>[\s\S]*?<\/function_calls>/g, "")
|
|
2879
|
-
.replace(/<function_results>[\s\S]*?<\/function_results>/g, "")
|
|
2880
|
-
.replace(/\[\[reply_to_current\]\]/g, "")
|
|
2881
|
-
.replace(/NO_REPLY/g, "")
|
|
2882
|
-
.trim();
|
|
2883
|
-
if (!replyText && textToolsExecuted)
|
|
2884
|
-
return;
|
|
2885
|
-
if (!replyText)
|
|
2886
|
-
return;
|
|
2887
|
-
}
|
|
2888
|
-
// Voice reply: synthesize TTS audio and send as voice message
|
|
2889
|
-
if (voiceReply) {
|
|
2890
|
-
ctx.log?.info?.(`ozaiya: voice reply — synthesizing TTS for group ${groupId}`);
|
|
2891
|
-
const audioBuffer = await synthesizeVoiceReply(replyText, ctx, voiceReplyVoice ?? undefined);
|
|
2892
|
-
if (audioBuffer) {
|
|
2893
|
-
const ext = audioBuffer.ext;
|
|
2894
|
-
const mime = ext === ".mp3" ? "audio/mpeg" : ext === ".opus" ? "audio/ogg" : ext === ".wav" ? "audio/wav" : "audio/mpeg";
|
|
2895
|
-
const fileInfo = await uploadFile(account.apiBaseUrl, account.botToken, groupId, `voice${ext}`, mime, audioBuffer.data);
|
|
3184
|
+
ctx.log?.warn?.(`ozaiya: TTS failed, falling back to text reply`);
|
|
2896
3185
|
await sendEncryptedChatContent({
|
|
2897
3186
|
account,
|
|
2898
3187
|
groupId,
|
|
2899
|
-
content: { text:
|
|
3188
|
+
content: { text: "⚠️ Voice synthesis failed, falling back to text." },
|
|
2900
3189
|
log: ctx.log,
|
|
2901
3190
|
});
|
|
2902
|
-
ctx.log?.info?.(`ozaiya: voice reply sent successfully (${ext}, ${audioBuffer.data.length} bytes)`);
|
|
2903
|
-
reportUsage(account, "tts", { provider: "voice-reply" });
|
|
2904
|
-
return;
|
|
2905
3191
|
}
|
|
2906
|
-
ctx.log?.
|
|
3192
|
+
ctx.log?.info?.(`ozaiya: sending reply to group ${groupId}`);
|
|
2907
3193
|
await sendEncryptedChatContent({
|
|
2908
3194
|
account,
|
|
2909
3195
|
groupId,
|
|
2910
|
-
content: { text:
|
|
3196
|
+
content: { text: replyText },
|
|
2911
3197
|
log: ctx.log,
|
|
2912
3198
|
});
|
|
2913
|
-
|
|
2914
|
-
|
|
2915
|
-
|
|
2916
|
-
|
|
2917
|
-
|
|
2918
|
-
content: { text: replyText },
|
|
2919
|
-
log: ctx.log,
|
|
2920
|
-
});
|
|
2921
|
-
ctx.log?.info?.(`ozaiya: reply sent successfully`);
|
|
2922
|
-
},
|
|
2923
|
-
onError: (err) => {
|
|
2924
|
-
ctx.log?.warn?.(`ozaiya: reply dispatch error: ${String(err)}`);
|
|
3199
|
+
ctx.log?.info?.(`ozaiya: reply sent successfully`);
|
|
3200
|
+
},
|
|
3201
|
+
onError: (err) => {
|
|
3202
|
+
ctx.log?.warn?.(`ozaiya: reply dispatch error: ${String(err)}`);
|
|
3203
|
+
},
|
|
2925
3204
|
},
|
|
2926
|
-
|
|
2927
|
-
|
|
2928
|
-
}
|
|
3205
|
+
replyOptions: account.model ? { isHeartbeat: true, heartbeatModelOverride: account.model } : undefined,
|
|
3206
|
+
});
|
|
3207
|
+
}
|
|
3208
|
+
finally {
|
|
3209
|
+
await finalizeTaskProgress(dispatch);
|
|
3210
|
+
activeDispatches.delete(account.accountId);
|
|
3211
|
+
}
|
|
2929
3212
|
}
|
|
2930
3213
|
async function resetRouteSession(route, ctx) {
|
|
2931
3214
|
const runtime = getOzaiyaRuntime();
|
|
@@ -2983,6 +3266,15 @@ async function handleCallbackQuery(payload,
|
|
|
2983
3266
|
ctx) {
|
|
2984
3267
|
const { groupId, messageId, callbackData, buttonText, buttonRowIndex, buttonIndex, from } = payload;
|
|
2985
3268
|
const account = ctx.account;
|
|
3269
|
+
// Intercept action confirmation callbacks (oz_ac: prefix) — resolve the
|
|
3270
|
+
// pending confirmation promise and don't dispatch to the agent.
|
|
3271
|
+
const actionCallback = parseActionCallback(callbackData);
|
|
3272
|
+
if (actionCallback) {
|
|
3273
|
+
const deps = makeConfirmationDeps(account);
|
|
3274
|
+
const resolved = await resolveConfirmation(deps, actionCallback.actionId, actionCallback.approved);
|
|
3275
|
+
if (resolved)
|
|
3276
|
+
return; // intercepted — don't dispatch to agent
|
|
3277
|
+
}
|
|
2986
3278
|
const runtime = getOzaiyaRuntime();
|
|
2987
3279
|
const ch = runtime.channel;
|
|
2988
3280
|
const { callbackBody, callbackCommand, callbackLabel, callbackOptionIndex } = normalizeCallbackQueryPayload({
|