@openclaw/voice-call 2026.5.2 → 2026.5.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api.js +2 -0
- package/dist/call-status-CXldV5o8.js +32 -0
- package/dist/cli-metadata.js +12 -0
- package/dist/config-7w04YpHh.js +548 -0
- package/dist/config-compat-B0me39_4.js +129 -0
- package/dist/guarded-json-api-Btx5EE4w.js +591 -0
- package/dist/http-headers-BrnxBasF.js +10 -0
- package/dist/index.js +1284 -0
- package/dist/mock-CeKvfVEd.js +135 -0
- package/dist/plivo-B-a7KFoT.js +393 -0
- package/dist/realtime-handler-B63CIDP2.js +325 -0
- package/dist/realtime-transcription.runtime-B2h70y2W.js +2 -0
- package/dist/realtime-voice.runtime-Bkh4nvLn.js +2 -0
- package/dist/response-generator-BrcmwDZU.js +182 -0
- package/dist/response-model-CyF5K80p.js +12 -0
- package/dist/runtime-api.js +6 -0
- package/dist/runtime-entry-88ytYAQa.js +3119 -0
- package/dist/runtime-entry.js +2 -0
- package/dist/setup-api.js +37 -0
- package/dist/telnyx-jjBE8boz.js +260 -0
- package/dist/twilio-1OqbcXLL.js +676 -0
- package/dist/voice-mapping-BYDGdWGx.js +40 -0
- package/package.json +14 -6
- package/api.ts +0 -16
- package/cli-metadata.ts +0 -10
- package/config-api.ts +0 -12
- package/index.test.ts +0 -943
- package/index.ts +0 -794
- package/runtime-api.ts +0 -20
- package/runtime-entry.ts +0 -1
- package/setup-api.ts +0 -47
- package/src/allowlist.test.ts +0 -18
- package/src/allowlist.ts +0 -19
- package/src/cli.ts +0 -845
- package/src/config-compat.test.ts +0 -120
- package/src/config-compat.ts +0 -227
- package/src/config.test.ts +0 -479
- package/src/config.ts +0 -808
- package/src/core-bridge.ts +0 -14
- package/src/deep-merge.test.ts +0 -40
- package/src/deep-merge.ts +0 -23
- package/src/gateway-continue-operation.ts +0 -200
- package/src/http-headers.test.ts +0 -16
- package/src/http-headers.ts +0 -15
- package/src/manager/context.ts +0 -42
- package/src/manager/events.test.ts +0 -581
- package/src/manager/events.ts +0 -288
- package/src/manager/lifecycle.ts +0 -53
- package/src/manager/lookup.test.ts +0 -52
- package/src/manager/lookup.ts +0 -35
- package/src/manager/outbound.test.ts +0 -528
- package/src/manager/outbound.ts +0 -486
- package/src/manager/state.ts +0 -48
- package/src/manager/store.ts +0 -106
- package/src/manager/timers.test.ts +0 -129
- package/src/manager/timers.ts +0 -113
- package/src/manager/twiml.test.ts +0 -13
- package/src/manager/twiml.ts +0 -17
- package/src/manager.closed-loop.test.ts +0 -236
- package/src/manager.inbound-allowlist.test.ts +0 -188
- package/src/manager.notify.test.ts +0 -377
- package/src/manager.restore.test.ts +0 -183
- package/src/manager.test-harness.ts +0 -127
- package/src/manager.ts +0 -392
- package/src/media-stream.test.ts +0 -768
- package/src/media-stream.ts +0 -708
- package/src/providers/base.ts +0 -97
- package/src/providers/mock.test.ts +0 -78
- package/src/providers/mock.ts +0 -185
- package/src/providers/plivo.test.ts +0 -93
- package/src/providers/plivo.ts +0 -601
- package/src/providers/shared/call-status.test.ts +0 -24
- package/src/providers/shared/call-status.ts +0 -24
- package/src/providers/shared/guarded-json-api.test.ts +0 -106
- package/src/providers/shared/guarded-json-api.ts +0 -42
- package/src/providers/telnyx.test.ts +0 -340
- package/src/providers/telnyx.ts +0 -394
- package/src/providers/twilio/api.test.ts +0 -145
- package/src/providers/twilio/api.ts +0 -93
- package/src/providers/twilio/twiml-policy.test.ts +0 -84
- package/src/providers/twilio/twiml-policy.ts +0 -87
- package/src/providers/twilio/webhook.ts +0 -34
- package/src/providers/twilio.test.ts +0 -591
- package/src/providers/twilio.ts +0 -861
- package/src/providers/twilio.types.ts +0 -17
- package/src/realtime-defaults.ts +0 -3
- package/src/realtime-fast-context.test.ts +0 -88
- package/src/realtime-fast-context.ts +0 -165
- package/src/realtime-transcription.runtime.ts +0 -4
- package/src/realtime-voice.runtime.ts +0 -5
- package/src/response-generator.test.ts +0 -321
- package/src/response-generator.ts +0 -318
- package/src/response-model.test.ts +0 -71
- package/src/response-model.ts +0 -23
- package/src/runtime.test.ts +0 -536
- package/src/runtime.ts +0 -510
- package/src/telephony-audio.test.ts +0 -61
- package/src/telephony-audio.ts +0 -12
- package/src/telephony-tts.test.ts +0 -196
- package/src/telephony-tts.ts +0 -235
- package/src/test-fixtures.ts +0 -73
- package/src/tts-provider-voice.test.ts +0 -34
- package/src/tts-provider-voice.ts +0 -21
- package/src/tunnel.test.ts +0 -166
- package/src/tunnel.ts +0 -314
- package/src/types.ts +0 -291
- package/src/utils.test.ts +0 -17
- package/src/utils.ts +0 -14
- package/src/voice-mapping.test.ts +0 -34
- package/src/voice-mapping.ts +0 -68
- package/src/webhook/realtime-handler.test.ts +0 -598
- package/src/webhook/realtime-handler.ts +0 -485
- package/src/webhook/stale-call-reaper.test.ts +0 -88
- package/src/webhook/stale-call-reaper.ts +0 -38
- package/src/webhook/tailscale.test.ts +0 -214
- package/src/webhook/tailscale.ts +0 -129
- package/src/webhook-exposure.test.ts +0 -33
- package/src/webhook-exposure.ts +0 -84
- package/src/webhook-security.test.ts +0 -770
- package/src/webhook-security.ts +0 -994
- package/src/webhook.hangup-once.lifecycle.test.ts +0 -135
- package/src/webhook.test.ts +0 -1470
- package/src/webhook.ts +0 -908
- package/src/webhook.types.ts +0 -5
- package/src/websocket-test-support.ts +0 -72
- package/tsconfig.json +0 -16
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
import type { WebhookSecurityConfig } from "../config.js";
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Twilio Voice API provider options.
|
|
5
|
-
*/
|
|
6
|
-
export interface TwilioProviderOptions {
|
|
7
|
-
/** Allow ngrok free tier compatibility mode (loopback only, less secure) */
|
|
8
|
-
allowNgrokFreeTierLoopbackBypass?: boolean;
|
|
9
|
-
/** Override public URL for signature verification */
|
|
10
|
-
publicUrl?: string;
|
|
11
|
-
/** Path for media stream WebSocket (e.g., /voice/stream) */
|
|
12
|
-
streamPath?: string;
|
|
13
|
-
/** Skip webhook signature verification (development only) */
|
|
14
|
-
skipVerification?: boolean;
|
|
15
|
-
/** Webhook security options (forwarded headers/allowlist) */
|
|
16
|
-
webhookSecurity?: WebhookSecurityConfig;
|
|
17
|
-
}
|
package/src/realtime-defaults.ts
DELETED
|
@@ -1,3 +0,0 @@
|
|
|
1
|
-
import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "openclaw/plugin-sdk/realtime-voice";
|
|
2
|
-
|
|
3
|
-
export const DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS = `You are OpenClaw's phone-call realtime voice interface. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`;
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
|
|
2
|
-
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
3
|
-
import type { VoiceCallRealtimeFastContextConfig } from "./config.js";
|
|
4
|
-
|
|
5
|
-
const mocks = vi.hoisted(() => ({
|
|
6
|
-
getActiveMemorySearchManager: vi.fn(),
|
|
7
|
-
}));
|
|
8
|
-
|
|
9
|
-
vi.mock("openclaw/plugin-sdk/memory-host-search", () => ({
|
|
10
|
-
getActiveMemorySearchManager: mocks.getActiveMemorySearchManager,
|
|
11
|
-
}));
|
|
12
|
-
|
|
13
|
-
import { resolveRealtimeFastContextConsult } from "./realtime-fast-context.js";
|
|
14
|
-
|
|
15
|
-
const cfg = {} as OpenClawConfig;
|
|
16
|
-
|
|
17
|
-
function createFastContextConfig(
|
|
18
|
-
overrides: Partial<VoiceCallRealtimeFastContextConfig> = {},
|
|
19
|
-
): VoiceCallRealtimeFastContextConfig {
|
|
20
|
-
return {
|
|
21
|
-
enabled: true,
|
|
22
|
-
timeoutMs: 800,
|
|
23
|
-
maxResults: 3,
|
|
24
|
-
sources: ["memory", "sessions"],
|
|
25
|
-
fallbackToConsult: false,
|
|
26
|
-
...overrides,
|
|
27
|
-
};
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
function createLogger() {
|
|
31
|
-
return {
|
|
32
|
-
debug: vi.fn(),
|
|
33
|
-
warn: vi.fn(),
|
|
34
|
-
};
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
describe("resolveRealtimeFastContextConsult", () => {
|
|
38
|
-
beforeEach(() => {
|
|
39
|
-
mocks.getActiveMemorySearchManager.mockReset();
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
afterEach(() => {
|
|
43
|
-
vi.useRealTimers();
|
|
44
|
-
});
|
|
45
|
-
|
|
46
|
-
it("falls back to the full consult when memory manager setup fails", async () => {
|
|
47
|
-
const logger = createLogger();
|
|
48
|
-
mocks.getActiveMemorySearchManager.mockRejectedValue(new Error("memory misconfigured"));
|
|
49
|
-
|
|
50
|
-
await expect(
|
|
51
|
-
resolveRealtimeFastContextConsult({
|
|
52
|
-
cfg,
|
|
53
|
-
agentId: "main",
|
|
54
|
-
sessionKey: "voice:15550001234",
|
|
55
|
-
config: createFastContextConfig({ fallbackToConsult: true }),
|
|
56
|
-
args: { question: "What do you remember?" },
|
|
57
|
-
logger,
|
|
58
|
-
}),
|
|
59
|
-
).resolves.toEqual({ handled: false });
|
|
60
|
-
|
|
61
|
-
expect(logger.debug).toHaveBeenCalledWith(expect.stringContaining("memory misconfigured"));
|
|
62
|
-
});
|
|
63
|
-
|
|
64
|
-
it("returns a bounded miss when memory manager setup exceeds the fast context timeout", async () => {
|
|
65
|
-
vi.useFakeTimers();
|
|
66
|
-
const logger = createLogger();
|
|
67
|
-
mocks.getActiveMemorySearchManager.mockReturnValue(new Promise(() => {}));
|
|
68
|
-
|
|
69
|
-
const resultPromise = resolveRealtimeFastContextConsult({
|
|
70
|
-
cfg,
|
|
71
|
-
agentId: "main",
|
|
72
|
-
sessionKey: "voice:15550001234",
|
|
73
|
-
config: createFastContextConfig({ fallbackToConsult: false, timeoutMs: 25 }),
|
|
74
|
-
args: { question: "What do you remember?" },
|
|
75
|
-
logger,
|
|
76
|
-
});
|
|
77
|
-
|
|
78
|
-
await vi.advanceTimersByTimeAsync(25);
|
|
79
|
-
|
|
80
|
-
await expect(resultPromise).resolves.toEqual({
|
|
81
|
-
handled: true,
|
|
82
|
-
result: {
|
|
83
|
-
text: expect.stringContaining("No relevant OpenClaw memory or session context"),
|
|
84
|
-
},
|
|
85
|
-
});
|
|
86
|
-
expect(logger.debug).toHaveBeenCalledWith(expect.stringContaining("timed out after 25ms"));
|
|
87
|
-
});
|
|
88
|
-
});
|
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
|
|
2
|
-
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
|
3
|
-
import { getActiveMemorySearchManager } from "openclaw/plugin-sdk/memory-host-search";
|
|
4
|
-
import {
|
|
5
|
-
parseRealtimeVoiceAgentConsultArgs,
|
|
6
|
-
type RealtimeVoiceAgentConsultResult,
|
|
7
|
-
} from "openclaw/plugin-sdk/realtime-voice";
|
|
8
|
-
import type { VoiceCallRealtimeFastContextConfig } from "./config.js";
|
|
9
|
-
|
|
10
|
-
type Logger = {
|
|
11
|
-
debug?: (message: string) => void;
|
|
12
|
-
warn: (message: string) => void;
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
type MemorySearchHit = {
|
|
16
|
-
path: string;
|
|
17
|
-
startLine: number;
|
|
18
|
-
endLine: number;
|
|
19
|
-
snippet: string;
|
|
20
|
-
source: "memory" | "sessions";
|
|
21
|
-
score: number;
|
|
22
|
-
};
|
|
23
|
-
|
|
24
|
-
type FastContextLookupResult =
|
|
25
|
-
| { status: "unavailable"; error?: string }
|
|
26
|
-
| { status: "hits"; hits: MemorySearchHit[] };
|
|
27
|
-
|
|
28
|
-
type RealtimeFastContextConsultResult =
|
|
29
|
-
| { handled: false }
|
|
30
|
-
| { handled: true; result: RealtimeVoiceAgentConsultResult };
|
|
31
|
-
|
|
32
|
-
const MAX_SNIPPET_CHARS = 700;
|
|
33
|
-
|
|
34
|
-
class RealtimeFastContextTimeoutError extends Error {
|
|
35
|
-
constructor(timeoutMs: number) {
|
|
36
|
-
super(`fast context lookup timed out after ${timeoutMs}ms`);
|
|
37
|
-
this.name = "RealtimeFastContextTimeoutError";
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
function normalizeSnippet(text: string): string {
|
|
42
|
-
const normalized = text.replace(/\s+/g, " ").trim();
|
|
43
|
-
if (normalized.length <= MAX_SNIPPET_CHARS) {
|
|
44
|
-
return normalized;
|
|
45
|
-
}
|
|
46
|
-
return `${normalized.slice(0, MAX_SNIPPET_CHARS - 1).trimEnd()}...`;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
function buildSearchQuery(args: unknown): string {
|
|
50
|
-
const parsed = parseRealtimeVoiceAgentConsultArgs(args);
|
|
51
|
-
return [parsed.question, parsed.context].filter(Boolean).join("\n\n");
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
function buildContextText(params: { query: string; hits: MemorySearchHit[] }): string {
|
|
55
|
-
const hits = params.hits
|
|
56
|
-
.map((hit, index) => {
|
|
57
|
-
const location = `${hit.path}:${hit.startLine}-${hit.endLine}`;
|
|
58
|
-
return `${index + 1}. [${hit.source}] ${location}\n${normalizeSnippet(hit.snippet)}`;
|
|
59
|
-
})
|
|
60
|
-
.join("\n\n");
|
|
61
|
-
return [
|
|
62
|
-
"Fast OpenClaw memory context found for the live caller.",
|
|
63
|
-
"Use this context only if it answers the caller's question. If it is not relevant, say briefly that you do not have that context handy.",
|
|
64
|
-
`Question:\n${params.query}`,
|
|
65
|
-
`Context:\n${hits}`,
|
|
66
|
-
].join("\n\n");
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
function buildMissText(query: string): string {
|
|
70
|
-
return [
|
|
71
|
-
"No relevant OpenClaw memory or session context was found quickly for the live caller.",
|
|
72
|
-
"Answer briefly that you do not have that context handy. Do not keep checking unless the caller asks you to.",
|
|
73
|
-
`Question:\n${query}`,
|
|
74
|
-
].join("\n\n");
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
async function withTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T> {
|
|
78
|
-
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
79
|
-
try {
|
|
80
|
-
return await Promise.race([
|
|
81
|
-
promise,
|
|
82
|
-
new Promise<T>((_resolve, reject) => {
|
|
83
|
-
timer = setTimeout(() => reject(new RealtimeFastContextTimeoutError(timeoutMs)), timeoutMs);
|
|
84
|
-
}),
|
|
85
|
-
]);
|
|
86
|
-
} finally {
|
|
87
|
-
if (timer) {
|
|
88
|
-
clearTimeout(timer);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
async function lookupFastContext(params: {
|
|
94
|
-
cfg: OpenClawConfig;
|
|
95
|
-
agentId: string;
|
|
96
|
-
sessionKey: string;
|
|
97
|
-
config: VoiceCallRealtimeFastContextConfig;
|
|
98
|
-
query: string;
|
|
99
|
-
}): Promise<FastContextLookupResult> {
|
|
100
|
-
const memory = await getActiveMemorySearchManager({
|
|
101
|
-
cfg: params.cfg,
|
|
102
|
-
agentId: params.agentId,
|
|
103
|
-
});
|
|
104
|
-
if (!memory.manager) {
|
|
105
|
-
return {
|
|
106
|
-
status: "unavailable",
|
|
107
|
-
error: memory.error ?? "no active memory manager",
|
|
108
|
-
};
|
|
109
|
-
}
|
|
110
|
-
const hits = await memory.manager.search(params.query, {
|
|
111
|
-
maxResults: params.config.maxResults,
|
|
112
|
-
sessionKey: params.sessionKey,
|
|
113
|
-
sources: params.config.sources,
|
|
114
|
-
});
|
|
115
|
-
return { status: "hits", hits };
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
export async function resolveRealtimeFastContextConsult(params: {
|
|
119
|
-
cfg: OpenClawConfig;
|
|
120
|
-
agentId: string;
|
|
121
|
-
sessionKey: string;
|
|
122
|
-
config: VoiceCallRealtimeFastContextConfig;
|
|
123
|
-
args: unknown;
|
|
124
|
-
logger: Logger;
|
|
125
|
-
}): Promise<RealtimeFastContextConsultResult> {
|
|
126
|
-
if (!params.config.enabled) {
|
|
127
|
-
return { handled: false };
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
const query = buildSearchQuery(params.args);
|
|
131
|
-
try {
|
|
132
|
-
const lookup = await withTimeout(
|
|
133
|
-
lookupFastContext({
|
|
134
|
-
cfg: params.cfg,
|
|
135
|
-
agentId: params.agentId,
|
|
136
|
-
sessionKey: params.sessionKey,
|
|
137
|
-
config: params.config,
|
|
138
|
-
query,
|
|
139
|
-
}),
|
|
140
|
-
params.config.timeoutMs,
|
|
141
|
-
);
|
|
142
|
-
if (lookup.status === "unavailable") {
|
|
143
|
-
params.logger.debug?.(`[voice-call] realtime fast context unavailable: ${lookup.error}`);
|
|
144
|
-
return params.config.fallbackToConsult
|
|
145
|
-
? { handled: false }
|
|
146
|
-
: { handled: true, result: { text: buildMissText(query) } };
|
|
147
|
-
}
|
|
148
|
-
const { hits } = lookup;
|
|
149
|
-
if (hits.length === 0) {
|
|
150
|
-
return params.config.fallbackToConsult
|
|
151
|
-
? { handled: false }
|
|
152
|
-
: { handled: true, result: { text: buildMissText(query) } };
|
|
153
|
-
}
|
|
154
|
-
return {
|
|
155
|
-
handled: true,
|
|
156
|
-
result: { text: buildContextText({ query, hits }) },
|
|
157
|
-
};
|
|
158
|
-
} catch (error) {
|
|
159
|
-
const message = formatErrorMessage(error);
|
|
160
|
-
params.logger.debug?.(`[voice-call] realtime fast context lookup failed: ${message}`);
|
|
161
|
-
return params.config.fallbackToConsult
|
|
162
|
-
? { handled: false }
|
|
163
|
-
: { handled: true, result: { text: buildMissText(query) } };
|
|
164
|
-
}
|
|
165
|
-
}
|
|
@@ -1,321 +0,0 @@
|
|
|
1
|
-
import { describe, expect, it, vi } from "vitest";
|
|
2
|
-
import { VoiceCallConfigSchema } from "./config.js";
|
|
3
|
-
import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
|
|
4
|
-
import { generateVoiceResponse } from "./response-generator.js";
|
|
5
|
-
|
|
6
|
-
function createAgentRuntime(payloads: Array<Record<string, unknown>>) {
|
|
7
|
-
const sessionStore: Record<string, { sessionId: string; updatedAt: number }> = {};
|
|
8
|
-
const saveSessionStore = vi.fn(async () => {});
|
|
9
|
-
const updateSessionStore = vi.fn(
|
|
10
|
-
async (
|
|
11
|
-
_storePath: string,
|
|
12
|
-
mutator: (store: Record<string, { sessionId: string; updatedAt: number }>) => unknown,
|
|
13
|
-
) => {
|
|
14
|
-
return await mutator(sessionStore);
|
|
15
|
-
},
|
|
16
|
-
);
|
|
17
|
-
const runEmbeddedPiAgent = vi.fn(async () => ({
|
|
18
|
-
payloads,
|
|
19
|
-
meta: { durationMs: 12, aborted: false },
|
|
20
|
-
}));
|
|
21
|
-
const resolveAgentDir = vi.fn((_cfg: CoreConfig, agentId: string) => {
|
|
22
|
-
return `/tmp/openclaw/agents/${agentId}`;
|
|
23
|
-
});
|
|
24
|
-
const resolveAgentWorkspaceDir = vi.fn((_cfg: CoreConfig, agentId: string) => {
|
|
25
|
-
return `/tmp/openclaw/workspace/${agentId}`;
|
|
26
|
-
});
|
|
27
|
-
const resolveAgentIdentity = vi.fn((_cfg: CoreConfig, agentId: string) => ({
|
|
28
|
-
name: `${agentId} tester`,
|
|
29
|
-
}));
|
|
30
|
-
const resolveStorePath = vi.fn((_store: string | undefined, params: { agentId?: string }) => {
|
|
31
|
-
return `/tmp/openclaw/${params.agentId ?? "main"}/sessions.json`;
|
|
32
|
-
});
|
|
33
|
-
const resolveSessionFilePath = vi.fn(
|
|
34
|
-
(_sessionId: string, _entry: unknown, params: { agentId?: string }) => {
|
|
35
|
-
return `/tmp/openclaw/${params.agentId ?? "main"}/sessions/session.jsonl`;
|
|
36
|
-
},
|
|
37
|
-
);
|
|
38
|
-
|
|
39
|
-
const runtime = {
|
|
40
|
-
defaults: {
|
|
41
|
-
provider: "together",
|
|
42
|
-
model: "Qwen/Qwen2.5-7B-Instruct-Turbo",
|
|
43
|
-
},
|
|
44
|
-
resolveAgentDir,
|
|
45
|
-
resolveAgentWorkspaceDir,
|
|
46
|
-
resolveAgentIdentity,
|
|
47
|
-
resolveThinkingDefault: () => "off",
|
|
48
|
-
resolveAgentTimeoutMs: () => 30_000,
|
|
49
|
-
ensureAgentWorkspace: async () => {},
|
|
50
|
-
runEmbeddedPiAgent,
|
|
51
|
-
session: {
|
|
52
|
-
resolveStorePath,
|
|
53
|
-
loadSessionStore: () => sessionStore,
|
|
54
|
-
saveSessionStore,
|
|
55
|
-
updateSessionStore,
|
|
56
|
-
resolveSessionFilePath,
|
|
57
|
-
},
|
|
58
|
-
} as unknown as CoreAgentDeps;
|
|
59
|
-
|
|
60
|
-
return {
|
|
61
|
-
runtime,
|
|
62
|
-
runEmbeddedPiAgent,
|
|
63
|
-
saveSessionStore,
|
|
64
|
-
updateSessionStore,
|
|
65
|
-
sessionStore,
|
|
66
|
-
resolveAgentDir,
|
|
67
|
-
resolveAgentWorkspaceDir,
|
|
68
|
-
resolveAgentIdentity,
|
|
69
|
-
resolveStorePath,
|
|
70
|
-
resolveSessionFilePath,
|
|
71
|
-
};
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
function requireEmbeddedAgentArgs(runEmbeddedPiAgent: ReturnType<typeof vi.fn>) {
|
|
75
|
-
const calls = runEmbeddedPiAgent.mock.calls as unknown[][];
|
|
76
|
-
const firstCall = calls[0];
|
|
77
|
-
if (!firstCall) {
|
|
78
|
-
throw new Error("voice response generator did not invoke the embedded agent");
|
|
79
|
-
}
|
|
80
|
-
const args = firstCall[0] as
|
|
81
|
-
| {
|
|
82
|
-
extraSystemPrompt?: string;
|
|
83
|
-
provider?: string;
|
|
84
|
-
model?: string;
|
|
85
|
-
}
|
|
86
|
-
| undefined;
|
|
87
|
-
if (!args?.extraSystemPrompt) {
|
|
88
|
-
throw new Error("voice response generator did not pass the spoken-output contract prompt");
|
|
89
|
-
}
|
|
90
|
-
return args;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
async function runGenerateVoiceResponse(
|
|
94
|
-
payloads: Array<Record<string, unknown>>,
|
|
95
|
-
overrides?: {
|
|
96
|
-
runtime?: CoreAgentDeps;
|
|
97
|
-
transcript?: Array<{ speaker: "user" | "bot"; text: string }>;
|
|
98
|
-
},
|
|
99
|
-
) {
|
|
100
|
-
const voiceConfig = VoiceCallConfigSchema.parse({
|
|
101
|
-
responseTimeoutMs: 5000,
|
|
102
|
-
});
|
|
103
|
-
const coreConfig = {} as CoreConfig;
|
|
104
|
-
const runtime = overrides?.runtime ?? createAgentRuntime(payloads).runtime;
|
|
105
|
-
|
|
106
|
-
const result = await generateVoiceResponse({
|
|
107
|
-
voiceConfig,
|
|
108
|
-
coreConfig,
|
|
109
|
-
agentRuntime: runtime,
|
|
110
|
-
callId: "call-123",
|
|
111
|
-
from: "+15550001111",
|
|
112
|
-
transcript: overrides?.transcript ?? [{ speaker: "user", text: "hello there" }],
|
|
113
|
-
userMessage: "hello there",
|
|
114
|
-
});
|
|
115
|
-
|
|
116
|
-
return { result };
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
describe("generateVoiceResponse", () => {
|
|
120
|
-
it("suppresses reasoning payloads and reads structured spoken output", async () => {
|
|
121
|
-
const { runtime, runEmbeddedPiAgent } = createAgentRuntime([
|
|
122
|
-
{ text: "Reasoning: hidden", isReasoning: true },
|
|
123
|
-
{ text: '{"spoken":"Hello from JSON."}' },
|
|
124
|
-
]);
|
|
125
|
-
const { result } = await runGenerateVoiceResponse([], { runtime });
|
|
126
|
-
|
|
127
|
-
expect(result.text).toBe("Hello from JSON.");
|
|
128
|
-
expect(runEmbeddedPiAgent).toHaveBeenCalledTimes(1);
|
|
129
|
-
const args = requireEmbeddedAgentArgs(runEmbeddedPiAgent);
|
|
130
|
-
expect(args.extraSystemPrompt).toContain('{"spoken":"..."}');
|
|
131
|
-
expect(args.provider).toBe("together");
|
|
132
|
-
expect(args.model).toBe("Qwen/Qwen2.5-7B-Instruct-Turbo");
|
|
133
|
-
});
|
|
134
|
-
|
|
135
|
-
it("extracts spoken text from fenced JSON", async () => {
|
|
136
|
-
const { result } = await runGenerateVoiceResponse([
|
|
137
|
-
{ text: '```json\n{"spoken":"Fenced JSON works."}\n```' },
|
|
138
|
-
]);
|
|
139
|
-
|
|
140
|
-
expect(result.text).toBe("Fenced JSON works.");
|
|
141
|
-
});
|
|
142
|
-
|
|
143
|
-
it("returns silence for an explicit empty spoken contract response", async () => {
|
|
144
|
-
const { result } = await runGenerateVoiceResponse([{ text: '{"spoken":""}' }]);
|
|
145
|
-
|
|
146
|
-
expect(result.text).toBeNull();
|
|
147
|
-
});
|
|
148
|
-
|
|
149
|
-
it("strips leading planning text when model returns plain text", async () => {
|
|
150
|
-
const { result } = await runGenerateVoiceResponse([
|
|
151
|
-
{
|
|
152
|
-
text:
|
|
153
|
-
"The user responded with short text. I should keep the response concise.\n\n" +
|
|
154
|
-
"Sounds good. I can help with the next step whenever you are ready.",
|
|
155
|
-
},
|
|
156
|
-
]);
|
|
157
|
-
|
|
158
|
-
expect(result.text).toBe("Sounds good. I can help with the next step whenever you are ready.");
|
|
159
|
-
});
|
|
160
|
-
|
|
161
|
-
it("keeps plain conversational output when no JSON contract is followed", async () => {
|
|
162
|
-
const { result } = await runGenerateVoiceResponse([
|
|
163
|
-
{ text: "Absolutely. Tell me what you want to do next." },
|
|
164
|
-
]);
|
|
165
|
-
|
|
166
|
-
expect(result.text).toBe("Absolutely. Tell me what you want to do next.");
|
|
167
|
-
});
|
|
168
|
-
|
|
169
|
-
it("pins the voice session to responseModel before running the embedded agent", async () => {
|
|
170
|
-
const { runtime, runEmbeddedPiAgent, updateSessionStore, sessionStore } = createAgentRuntime([
|
|
171
|
-
{ text: '{"spoken":"Pinned model works."}' },
|
|
172
|
-
]);
|
|
173
|
-
const voiceConfig = VoiceCallConfigSchema.parse({
|
|
174
|
-
responseModel: "openai/gpt-4.1-nano",
|
|
175
|
-
responseTimeoutMs: 5000,
|
|
176
|
-
});
|
|
177
|
-
|
|
178
|
-
const result = await generateVoiceResponse({
|
|
179
|
-
voiceConfig,
|
|
180
|
-
coreConfig: {} as CoreConfig,
|
|
181
|
-
agentRuntime: runtime,
|
|
182
|
-
callId: "call-123",
|
|
183
|
-
from: "+15550001111",
|
|
184
|
-
transcript: [{ speaker: "user", text: "hello there" }],
|
|
185
|
-
userMessage: "hello there",
|
|
186
|
-
});
|
|
187
|
-
|
|
188
|
-
expect(result.text).toBe("Pinned model works.");
|
|
189
|
-
expect(sessionStore["voice:15550001111"]).toMatchObject({
|
|
190
|
-
providerOverride: "openai",
|
|
191
|
-
modelOverride: "gpt-4.1-nano",
|
|
192
|
-
modelOverrideSource: "auto",
|
|
193
|
-
});
|
|
194
|
-
expect(updateSessionStore).toHaveBeenCalledWith(
|
|
195
|
-
"/tmp/openclaw/main/sessions.json",
|
|
196
|
-
expect.any(Function),
|
|
197
|
-
);
|
|
198
|
-
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
|
|
199
|
-
expect.objectContaining({
|
|
200
|
-
provider: "openai",
|
|
201
|
-
model: "gpt-4.1-nano",
|
|
202
|
-
sessionKey: "voice:15550001111",
|
|
203
|
-
}),
|
|
204
|
-
);
|
|
205
|
-
});
|
|
206
|
-
|
|
207
|
-
it("uses the persisted per-call session key for classic responses", async () => {
|
|
208
|
-
const { runtime, runEmbeddedPiAgent, sessionStore } = createAgentRuntime([
|
|
209
|
-
{ text: '{"spoken":"Fresh call context."}' },
|
|
210
|
-
]);
|
|
211
|
-
const voiceConfig = VoiceCallConfigSchema.parse({
|
|
212
|
-
sessionScope: "per-call",
|
|
213
|
-
responseTimeoutMs: 5000,
|
|
214
|
-
});
|
|
215
|
-
|
|
216
|
-
const result = await generateVoiceResponse({
|
|
217
|
-
voiceConfig,
|
|
218
|
-
coreConfig: {} as CoreConfig,
|
|
219
|
-
agentRuntime: runtime,
|
|
220
|
-
callId: "call-123",
|
|
221
|
-
sessionKey: "voice:call:call-123",
|
|
222
|
-
from: "+15550001111",
|
|
223
|
-
transcript: [{ speaker: "user", text: "hello there" }],
|
|
224
|
-
userMessage: "hello there",
|
|
225
|
-
});
|
|
226
|
-
|
|
227
|
-
expect(result.text).toBe("Fresh call context.");
|
|
228
|
-
expect(sessionStore["voice:call:call-123"]).toBeDefined();
|
|
229
|
-
expect(sessionStore["voice:15550001111"]).toBeUndefined();
|
|
230
|
-
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
|
|
231
|
-
expect.objectContaining({
|
|
232
|
-
sessionKey: "voice:call:call-123",
|
|
233
|
-
sandboxSessionKey: "agent:main:voice:call:call-123",
|
|
234
|
-
}),
|
|
235
|
-
);
|
|
236
|
-
});
|
|
237
|
-
|
|
238
|
-
it("uses the main agent workspace when voice config omits agentId", async () => {
|
|
239
|
-
const {
|
|
240
|
-
runtime,
|
|
241
|
-
runEmbeddedPiAgent,
|
|
242
|
-
resolveAgentDir,
|
|
243
|
-
resolveAgentWorkspaceDir,
|
|
244
|
-
resolveAgentIdentity,
|
|
245
|
-
resolveStorePath,
|
|
246
|
-
resolveSessionFilePath,
|
|
247
|
-
} = createAgentRuntime([{ text: '{"spoken":"Default agent."}' }]);
|
|
248
|
-
const coreConfig = {} as CoreConfig;
|
|
249
|
-
|
|
250
|
-
await generateVoiceResponse({
|
|
251
|
-
voiceConfig: VoiceCallConfigSchema.parse({ responseTimeoutMs: 5000 }),
|
|
252
|
-
coreConfig,
|
|
253
|
-
agentRuntime: runtime,
|
|
254
|
-
callId: "call-123",
|
|
255
|
-
from: "+15550001111",
|
|
256
|
-
transcript: [],
|
|
257
|
-
userMessage: "hello there",
|
|
258
|
-
});
|
|
259
|
-
|
|
260
|
-
expect(resolveStorePath).toHaveBeenCalledWith(undefined, { agentId: "main" });
|
|
261
|
-
expect(resolveAgentDir).toHaveBeenCalledWith(coreConfig, "main");
|
|
262
|
-
expect(resolveAgentWorkspaceDir).toHaveBeenCalledWith(coreConfig, "main");
|
|
263
|
-
expect(resolveAgentIdentity).toHaveBeenCalledWith(coreConfig, "main");
|
|
264
|
-
expect(resolveSessionFilePath).toHaveBeenCalledWith(expect.any(String), expect.any(Object), {
|
|
265
|
-
agentId: "main",
|
|
266
|
-
});
|
|
267
|
-
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
|
|
268
|
-
expect.objectContaining({
|
|
269
|
-
agentDir: "/tmp/openclaw/agents/main",
|
|
270
|
-
agentId: "main",
|
|
271
|
-
sandboxSessionKey: "agent:main:voice:15550001111",
|
|
272
|
-
workspaceDir: "/tmp/openclaw/workspace/main",
|
|
273
|
-
sessionFile: "/tmp/openclaw/main/sessions/session.jsonl",
|
|
274
|
-
}),
|
|
275
|
-
);
|
|
276
|
-
});
|
|
277
|
-
|
|
278
|
-
it("uses the configured voice response agent workspace", async () => {
|
|
279
|
-
const {
|
|
280
|
-
runtime,
|
|
281
|
-
runEmbeddedPiAgent,
|
|
282
|
-
resolveAgentDir,
|
|
283
|
-
resolveAgentWorkspaceDir,
|
|
284
|
-
resolveAgentIdentity,
|
|
285
|
-
resolveStorePath,
|
|
286
|
-
resolveSessionFilePath,
|
|
287
|
-
} = createAgentRuntime([{ text: '{"spoken":"Voice agent."}' }]);
|
|
288
|
-
const coreConfig = {} as CoreConfig;
|
|
289
|
-
|
|
290
|
-
const result = await generateVoiceResponse({
|
|
291
|
-
voiceConfig: VoiceCallConfigSchema.parse({
|
|
292
|
-
agentId: "voice",
|
|
293
|
-
responseTimeoutMs: 5000,
|
|
294
|
-
}),
|
|
295
|
-
coreConfig,
|
|
296
|
-
agentRuntime: runtime,
|
|
297
|
-
callId: "call-123",
|
|
298
|
-
from: "+15550001111",
|
|
299
|
-
transcript: [],
|
|
300
|
-
userMessage: "hello there",
|
|
301
|
-
});
|
|
302
|
-
|
|
303
|
-
expect(result.text).toBe("Voice agent.");
|
|
304
|
-
expect(resolveStorePath).toHaveBeenCalledWith(undefined, { agentId: "voice" });
|
|
305
|
-
expect(resolveAgentDir).toHaveBeenCalledWith(coreConfig, "voice");
|
|
306
|
-
expect(resolveAgentWorkspaceDir).toHaveBeenCalledWith(coreConfig, "voice");
|
|
307
|
-
expect(resolveAgentIdentity).toHaveBeenCalledWith(coreConfig, "voice");
|
|
308
|
-
expect(resolveSessionFilePath).toHaveBeenCalledWith(expect.any(String), expect.any(Object), {
|
|
309
|
-
agentId: "voice",
|
|
310
|
-
});
|
|
311
|
-
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
|
|
312
|
-
expect.objectContaining({
|
|
313
|
-
agentDir: "/tmp/openclaw/agents/voice",
|
|
314
|
-
agentId: "voice",
|
|
315
|
-
sandboxSessionKey: "agent:voice:voice:15550001111",
|
|
316
|
-
workspaceDir: "/tmp/openclaw/workspace/voice",
|
|
317
|
-
sessionFile: "/tmp/openclaw/voice/sessions/session.jsonl",
|
|
318
|
-
}),
|
|
319
|
-
);
|
|
320
|
-
});
|
|
321
|
-
});
|