daemora 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +106 -76
- package/SOUL.md +100 -28
- package/config/mcp.json +9 -9
- package/package.json +15 -8
- package/skills/apple-notes.md +0 -52
- package/skills/apple-reminders.md +1 -87
- package/skills/camsnap.md +20 -144
- package/skills/coding.md +7 -7
- package/skills/documents.md +6 -6
- package/skills/email.md +6 -6
- package/skills/gif-search.md +28 -171
- package/skills/healthcheck.md +21 -203
- package/skills/image-gen.md +24 -123
- package/skills/model-usage.md +18 -165
- package/skills/obsidian.md +28 -174
- package/skills/pdf.md +30 -181
- package/skills/research.md +6 -6
- package/skills/skill-creator.md +35 -111
- package/skills/spotify.md +2 -17
- package/skills/summarize.md +36 -193
- package/skills/things.md +23 -175
- package/skills/tmux.md +1 -91
- package/skills/trello.md +32 -157
- package/skills/video-frames.md +26 -166
- package/skills/weather.md +6 -6
- package/src/a2a/A2AClient.js +2 -2
- package/src/a2a/A2AServer.js +6 -6
- package/src/a2a/AgentCard.js +2 -2
- package/src/agents/SubAgentManager.js +61 -19
- package/src/agents/Supervisor.js +4 -4
- package/src/channels/BaseChannel.js +6 -6
- package/src/channels/BlueBubblesChannel.js +112 -0
- package/src/channels/DiscordChannel.js +8 -8
- package/src/channels/EmailChannel.js +54 -26
- package/src/channels/FeishuChannel.js +140 -0
- package/src/channels/GoogleChatChannel.js +8 -8
- package/src/channels/HttpChannel.js +2 -2
- package/src/channels/IRCChannel.js +144 -0
- package/src/channels/LineChannel.js +13 -13
- package/src/channels/MatrixChannel.js +97 -0
- package/src/channels/MattermostChannel.js +119 -0
- package/src/channels/NextcloudChannel.js +133 -0
- package/src/channels/NostrChannel.js +175 -0
- package/src/channels/SignalChannel.js +9 -9
- package/src/channels/SlackChannel.js +10 -10
- package/src/channels/TeamsChannel.js +10 -10
- package/src/channels/TelegramChannel.js +8 -8
- package/src/channels/TwitchChannel.js +128 -0
- package/src/channels/WhatsAppChannel.js +10 -10
- package/src/channels/ZaloChannel.js +119 -0
- package/src/channels/iMessageChannel.js +150 -0
- package/src/channels/index.js +241 -11
- package/src/cli.js +835 -38
- package/src/config/agentProfiles.js +19 -19
- package/src/config/channels.js +1 -1
- package/src/config/default.js +12 -7
- package/src/config/models.js +3 -3
- package/src/config/permissions.js +2 -2
- package/src/core/AgentLoop.js +13 -13
- package/src/core/Compaction.js +3 -3
- package/src/core/CostTracker.js +2 -2
- package/src/core/EventBus.js +15 -15
- package/src/core/TaskQueue.js +24 -7
- package/src/core/TaskRunner.js +19 -6
- package/src/daemon/DaemonManager.js +4 -4
- package/src/hooks/HookRunner.js +4 -4
- package/src/index.js +6 -2
- package/src/mcp/MCPAgentRunner.js +3 -3
- package/src/mcp/MCPClient.js +9 -9
- package/src/mcp/MCPManager.js +14 -14
- package/src/models/ModelRouter.js +2 -2
- package/src/safety/AuditLog.js +3 -3
- package/src/safety/CircuitBreaker.js +2 -2
- package/src/safety/CommandGuard.js +132 -0
- package/src/safety/FilesystemGuard.js +23 -3
- package/src/safety/GitRollback.js +5 -5
- package/src/safety/HumanApproval.js +9 -9
- package/src/safety/InputSanitizer.js +81 -8
- package/src/safety/PermissionGuard.js +2 -2
- package/src/safety/Sandbox.js +1 -1
- package/src/safety/SecretScanner.js +90 -28
- package/src/safety/SecretVault.js +2 -2
- package/src/scheduler/Heartbeat.js +3 -3
- package/src/scheduler/Scheduler.js +6 -6
- package/src/setup/theme.js +171 -66
- package/src/setup/wizard.js +432 -57
- package/src/skills/SkillLoader.js +145 -8
- package/src/storage/TaskStore.js +39 -15
- package/src/systemPrompt.js +45 -43
- package/src/tenants/TenantManager.js +79 -22
- package/src/tools/ToolRegistry.js +3 -3
- package/src/tools/applyPatch.js +2 -2
- package/src/tools/browserAutomation.js +4 -4
- package/src/tools/calendar.js +155 -0
- package/src/tools/clipboard.js +71 -0
- package/src/tools/contacts.js +138 -0
- package/src/tools/createDocument.js +2 -2
- package/src/tools/cronTool.js +14 -14
- package/src/tools/database.js +165 -0
- package/src/tools/editFile.js +10 -10
- package/src/tools/executeCommand.js +11 -3
- package/src/tools/generateImage.js +79 -0
- package/src/tools/gitTool.js +141 -0
- package/src/tools/glob.js +1 -1
- package/src/tools/googlePlaces.js +136 -0
- package/src/tools/grep.js +2 -2
- package/src/tools/iMessageTool.js +86 -0
- package/src/tools/imageAnalysis.js +3 -3
- package/src/tools/index.js +56 -2
- package/src/tools/makeVoiceCall.js +283 -0
- package/src/tools/manageAgents.js +2 -2
- package/src/tools/manageMCP.js +38 -20
- package/src/tools/memory.js +25 -32
- package/src/tools/messageChannel.js +1 -1
- package/src/tools/notification.js +90 -0
- package/src/tools/philipsHue.js +147 -0
- package/src/tools/projectTracker.js +8 -8
- package/src/tools/readFile.js +1 -1
- package/src/tools/readPDF.js +73 -0
- package/src/tools/screenCapture.js +6 -6
- package/src/tools/searchContent.js +2 -2
- package/src/tools/searchFiles.js +1 -1
- package/src/tools/sendEmail.js +79 -24
- package/src/tools/sendFile.js +4 -4
- package/src/tools/sonos.js +137 -0
- package/src/tools/sshTool.js +130 -0
- package/src/tools/textToSpeech.js +5 -5
- package/src/tools/transcribeAudio.js +4 -4
- package/src/tools/useMCP.js +4 -4
- package/src/tools/webFetch.js +2 -2
- package/src/tools/webSearch.js +1 -1
- package/src/utils/Embeddings.js +79 -0
- package/src/voice/VoiceSessionManager.js +170 -0
- package/src/voice/VoiceWebhook.js +188 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider-agnostic embedding generation.
|
|
3
|
+
*
|
|
4
|
+
* Auto-detects the best available embedding provider (priority order):
|
|
5
|
+
* 1. OPENAI_API_KEY → text-embedding-3-small (512 dims)
|
|
6
|
+
* 2. GOOGLE_AI_API_KEY → text-embedding-004 (768 dims)
|
|
7
|
+
* 3. OLLAMA_HOST → nomic-embed-text (768 dims, local/free)
|
|
8
|
+
* 4. None → returns null (callers fall back to keyword search)
|
|
9
|
+
*
|
|
10
|
+
* Override with: EMBEDDING_PROVIDER=openai|google|ollama
|
|
11
|
+
* Override Ollama model with: OLLAMA_EMBED_MODEL=nomic-embed-text
|
|
12
|
+
*
|
|
13
|
+
* Note: vectors from different providers are NOT interchangeable.
|
|
14
|
+
* Callers (SkillLoader, memory.js) tag stored vectors with the provider name
|
|
15
|
+
* and skip vectors that don't match the current provider.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { embed } from "ai";
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Returns the currently active embedding provider name, or null if none available.
|
|
22
|
+
*/
|
|
23
|
+
export function getEmbeddingProvider() {
|
|
24
|
+
const override = process.env.EMBEDDING_PROVIDER?.toLowerCase();
|
|
25
|
+
|
|
26
|
+
if (override) {
|
|
27
|
+
if (override === "openai" && process.env.OPENAI_API_KEY) return "openai";
|
|
28
|
+
if (override === "google" && process.env.GOOGLE_AI_API_KEY) return "google";
|
|
29
|
+
if (override === "ollama") return "ollama";
|
|
30
|
+
return null; // Override set but required key missing
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Auto-detect in priority order
|
|
34
|
+
if (process.env.OPENAI_API_KEY) return "openai";
|
|
35
|
+
if (process.env.GOOGLE_AI_API_KEY) return "google";
|
|
36
|
+
if (process.env.OLLAMA_HOST) return "ollama";
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Generate a vector embedding for the given text using the best available provider.
|
|
42
|
+
* Returns null if no provider is configured - callers must fall back to keyword search.
|
|
43
|
+
*
|
|
44
|
+
* @param {string} text
|
|
45
|
+
* @returns {Promise<number[]|null>}
|
|
46
|
+
*/
|
|
47
|
+
export async function generateEmbedding(text) {
|
|
48
|
+
const provider = getEmbeddingProvider();
|
|
49
|
+
if (!provider) return null;
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
let model;
|
|
53
|
+
|
|
54
|
+
if (provider === "openai") {
|
|
55
|
+
const { createOpenAI } = await import("@ai-sdk/openai");
|
|
56
|
+
const openai = createOpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
|
57
|
+
// 512 dims = 3x smaller than default 1536, minimal quality loss for recall tasks
|
|
58
|
+
model = openai.embedding("text-embedding-3-small", { dimensions: 512 });
|
|
59
|
+
|
|
60
|
+
} else if (provider === "google") {
|
|
61
|
+
const { createGoogleGenerativeAI } = await import("@ai-sdk/google");
|
|
62
|
+
const google = createGoogleGenerativeAI({ apiKey: process.env.GOOGLE_AI_API_KEY });
|
|
63
|
+
model = google.textEmbeddingModel("text-embedding-004"); // 768 dims
|
|
64
|
+
|
|
65
|
+
} else if (provider === "ollama") {
|
|
66
|
+
const { ollama } = await import("ollama-ai-provider");
|
|
67
|
+
const modelName = process.env.OLLAMA_EMBED_MODEL || "nomic-embed-text";
|
|
68
|
+
model = ollama.embedding(modelName); // typically 768 dims
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (!model) return null;
|
|
72
|
+
|
|
73
|
+
const { embedding } = await embed({ model, value: text.slice(0, 8000) });
|
|
74
|
+
return embedding;
|
|
75
|
+
|
|
76
|
+
} catch {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VoiceSessionManager — in-memory state for active interactive voice calls.
|
|
3
|
+
*
|
|
4
|
+
* Each session bridges two async tracks:
|
|
5
|
+
* - The agent loop (calls listen/speak/end as tool calls)
|
|
6
|
+
* - Twilio webhooks (hits /voice/input when caller speaks, /voice/wait to fetch agent reply)
|
|
7
|
+
*
|
|
8
|
+
* Promise-based signalling lets the agent's `listen` call block cleanly until
|
|
9
|
+
* the caller speaks, without polling.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { randomBytes } from "node:crypto";
|
|
13
|
+
|
|
14
|
+
const SESSION_TTL_MS = 2 * 60 * 60 * 1000; // prune sessions older than 2 hours
|
|
15
|
+
|
|
16
|
+
class VoiceSession {
|
|
17
|
+
constructor({ sessionId, callSid, greeting }) {
|
|
18
|
+
this.id = sessionId;
|
|
19
|
+
this.callSid = callSid;
|
|
20
|
+
this.status = "ringing"; // ringing | in-progress | ended
|
|
21
|
+
this.greeting = greeting || null;
|
|
22
|
+
this.transcript = []; // [{ role: "caller"|"agent", text, timestamp }]
|
|
23
|
+
this.createdAt = Date.now();
|
|
24
|
+
this.endedAt = null;
|
|
25
|
+
|
|
26
|
+
// Pending response the agent queued (text | "__HANGUP__" | null)
|
|
27
|
+
this._pendingResponse = null;
|
|
28
|
+
|
|
29
|
+
// Resolvers for agent's `listen` wait
|
|
30
|
+
this._callerInputResolve = null;
|
|
31
|
+
this._callerInputReject = null;
|
|
32
|
+
|
|
33
|
+
// Resolve for the webhook's `wait` poll (so it knows a response is ready)
|
|
34
|
+
this._responseReadyResolve = null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/** Called by /voice/answer once Twilio dials and connects */
|
|
38
|
+
markConnected() {
|
|
39
|
+
this.status = "in-progress";
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Called by /voice/input when Twilio STT delivers caller speech */
|
|
43
|
+
receiveCallerInput(text) {
|
|
44
|
+
const entry = { role: "caller", text, timestamp: new Date().toISOString() };
|
|
45
|
+
this.transcript.push(entry);
|
|
46
|
+
console.log(`[VoiceSession:${this.id}] Caller said: "${text}"`);
|
|
47
|
+
|
|
48
|
+
if (this._callerInputResolve) {
|
|
49
|
+
this._callerInputResolve(text);
|
|
50
|
+
this._callerInputResolve = null;
|
|
51
|
+
this._callerInputReject = null;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** Called by the agent's speak/end tool action */
|
|
56
|
+
setAgentResponse(textOrSignal) {
|
|
57
|
+
this._pendingResponse = textOrSignal;
|
|
58
|
+
|
|
59
|
+
if (textOrSignal !== "__HANGUP__") {
|
|
60
|
+
this.transcript.push({ role: "agent", text: textOrSignal, timestamp: new Date().toISOString() });
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Wake up the /voice/wait poll immediately
|
|
64
|
+
if (this._responseReadyResolve) {
|
|
65
|
+
this._responseReadyResolve();
|
|
66
|
+
this._responseReadyResolve = null;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
console.log(`[VoiceSession:${this.id}] Agent queued: "${textOrSignal}"`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Agent tool `listen` calls this — blocks until caller speaks or timeout.
|
|
74
|
+
* @returns {Promise<string>} caller's spoken text
|
|
75
|
+
*/
|
|
76
|
+
waitForCallerInput(timeoutMs = 120_000) {
|
|
77
|
+
return new Promise((resolve, reject) => {
|
|
78
|
+
const timer = setTimeout(() => {
|
|
79
|
+
this._callerInputResolve = null;
|
|
80
|
+
this._callerInputReject = null;
|
|
81
|
+
reject(new Error("Timed out waiting for caller input (caller may have hung up)"));
|
|
82
|
+
}, timeoutMs);
|
|
83
|
+
|
|
84
|
+
this._callerInputResolve = (text) => { clearTimeout(timer); resolve(text); };
|
|
85
|
+
this._callerInputReject = (err) => { clearTimeout(timer); reject(err); };
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* /voice/wait calls this to block until the agent queues a reply.
|
|
91
|
+
* Twilio expects a response quickly so we use a short timeout (8s) and fall
|
|
92
|
+
* back to a <Pause>+<Redirect> loop when the agent hasn't responded yet.
|
|
93
|
+
*/
|
|
94
|
+
waitForAgentResponse(timeoutMs = 8_000) {
|
|
95
|
+
if (this._pendingResponse !== null) {
|
|
96
|
+
return Promise.resolve(); // already ready
|
|
97
|
+
}
|
|
98
|
+
return new Promise((resolve) => {
|
|
99
|
+
const timer = setTimeout(resolve, timeoutMs); // resolve (don't reject) on timeout
|
|
100
|
+
this._responseReadyResolve = () => { clearTimeout(timer); resolve(); };
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Consume and return the pending response, clearing it */
|
|
105
|
+
consumeResponse() {
|
|
106
|
+
const r = this._pendingResponse;
|
|
107
|
+
this._pendingResponse = null;
|
|
108
|
+
return r;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** Mark session as ended */
|
|
112
|
+
end() {
|
|
113
|
+
this.status = "ended";
|
|
114
|
+
this.endedAt = Date.now();
|
|
115
|
+
// Reject any waiting agent listen call so it doesn't hang forever
|
|
116
|
+
if (this._callerInputReject) {
|
|
117
|
+
this._callerInputReject(new Error("Call ended"));
|
|
118
|
+
this._callerInputResolve = null;
|
|
119
|
+
this._callerInputReject = null;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// ─── Manager (singleton) ───────────────────────────────────────────────────────
|
|
125
|
+
|
|
126
|
+
class VoiceSessionManager {
|
|
127
|
+
constructor() {
|
|
128
|
+
this.sessions = new Map(); // sessionId → VoiceSession
|
|
129
|
+
// Prune dead sessions every 30 minutes
|
|
130
|
+
setInterval(() => this._prune(), 30 * 60 * 1000);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
create({ callSid, greeting } = {}) {
|
|
134
|
+
const sessionId = "vc_" + randomBytes(6).toString("hex");
|
|
135
|
+
const session = new VoiceSession({ sessionId, callSid, greeting });
|
|
136
|
+
this.sessions.set(sessionId, session);
|
|
137
|
+
console.log(`[VoiceSessionManager] Created session ${sessionId} for call ${callSid}`);
|
|
138
|
+
return session;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
get(sessionId) {
|
|
142
|
+
return this.sessions.get(sessionId) || null;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/** Look up session by Twilio call SID (used by status callbacks) */
|
|
146
|
+
getBySid(callSid) {
|
|
147
|
+
for (const session of this.sessions.values()) {
|
|
148
|
+
if (session.callSid === callSid) return session;
|
|
149
|
+
}
|
|
150
|
+
return null;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
delete(sessionId) {
|
|
154
|
+
this.sessions.delete(sessionId);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
_prune() {
|
|
158
|
+
const now = Date.now();
|
|
159
|
+
for (const [id, session] of this.sessions) {
|
|
160
|
+
if (now - session.createdAt > SESSION_TTL_MS) {
|
|
161
|
+
console.log(`[VoiceSessionManager] Pruning expired session ${id}`);
|
|
162
|
+
session.end();
|
|
163
|
+
this.sessions.delete(id);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const voiceSessionManager = new VoiceSessionManager();
|
|
170
|
+
export default voiceSessionManager;
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VoiceWebhook — Express router for Twilio voice call webhooks.
|
|
3
|
+
*
|
|
4
|
+
* Twilio calls these endpoints during the lifecycle of an active call:
|
|
5
|
+
*
|
|
6
|
+
* POST /voice/answer/:sessionId — call connected, return opening TwiML
|
|
7
|
+
* POST /voice/input/:sessionId — caller finished speaking (SpeechResult)
|
|
8
|
+
* POST /voice/wait/:sessionId — poll for agent's next reply
|
|
9
|
+
* POST /voice/status/:sessionId — call status change (completed/failed/busy)
|
|
10
|
+
*
|
|
11
|
+
* The agent talks to the call via VoiceSessionManager (not via these routes).
|
|
12
|
+
* These routes are only for Twilio ↔ Daemora signalling.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { Router } from "express";
|
|
16
|
+
import voiceSessionManager from "./VoiceSessionManager.js";
|
|
17
|
+
|
|
18
|
+
const router = Router();
|
|
19
|
+
|
|
20
|
+
// Twilio sends form-encoded bodies — parse them for voice routes
|
|
21
|
+
import { urlencoded } from "express";
|
|
22
|
+
router.use(urlencoded({ extended: false }));
|
|
23
|
+
|
|
24
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
// ── Voice quality + latency config ────────────────────────────────────────────
|
|
27
|
+
// Polly.Joanna = Amazon Polly neural voice via Twilio — high quality, low latency.
|
|
28
|
+
// Use "alice" as fallback (built-in Twilio TTS, slightly lower quality but zero extra cost).
|
|
29
|
+
const VOICE = process.env.VOICE_TTS_VOICE || "Polly.Joanna";
|
|
30
|
+
const LANGUAGE = process.env.VOICE_TTS_LANGUAGE || "en-US";
|
|
31
|
+
|
|
32
|
+
// How long Twilio waits for the caller to start speaking (seconds).
|
|
33
|
+
// 4s is fast enough to feel responsive; increase if callers complain of being cut off.
|
|
34
|
+
const SPEECH_TIMEOUT_START = process.env.VOICE_SPEECH_TIMEOUT || "4";
|
|
35
|
+
|
|
36
|
+
// How long Twilio waits after caller stops speaking before finalising (seconds).
|
|
37
|
+
// "auto" = Twilio's ML-based end-of-speech detector — fastest + most accurate option.
|
|
38
|
+
const SPEECH_TIMEOUT_END = "auto";
|
|
39
|
+
|
|
40
|
+
// How often we poll for the agent's reply while the caller is on hold (ms).
|
|
41
|
+
// Lower = snappier response, but more HTTP round-trips. 500ms is a good balance.
|
|
42
|
+
const POLL_PAUSE_MS = parseInt(process.env.VOICE_POLL_INTERVAL_MS || "500", 10);
|
|
43
|
+
const POLL_PAUSE_S = Math.max(1, Math.round(POLL_PAUSE_MS / 1000)); // TwiML needs whole seconds
|
|
44
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
45
|
+
|
|
46
|
+
function twiml(content) {
|
|
47
|
+
return `<?xml version="1.0" encoding="UTF-8"?><Response>${content}</Response>`;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function escapeXml(str) {
|
|
51
|
+
return (str || "")
|
|
52
|
+
.replace(/&/g, "&")
|
|
53
|
+
.replace(/</g, "<")
|
|
54
|
+
.replace(/>/g, ">")
|
|
55
|
+
.replace(/"/g, """);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function sayAndListen(sessionId, message) {
|
|
59
|
+
// <Gather> wraps <Say> so caller can barge-in (interrupt the agent mid-sentence).
|
|
60
|
+
// speechTimeout="auto" uses Twilio's ML end-of-speech — much faster than a fixed delay.
|
|
61
|
+
// bargeIn="true" lets the caller speak while agent is still talking (reduces turn latency).
|
|
62
|
+
return twiml(
|
|
63
|
+
`<Gather input="speech" timeout="${SPEECH_TIMEOUT_START}" speechTimeout="${SPEECH_TIMEOUT_END}" ` +
|
|
64
|
+
`bargeIn="true" action="/voice/input/${sessionId}" method="POST">` +
|
|
65
|
+
`<Say voice="${VOICE}" language="${LANGUAGE}">${escapeXml(message)}</Say>` +
|
|
66
|
+
`</Gather>` +
|
|
67
|
+
// Fallback if no speech detected — re-poll the agent (in case it has a follow-up)
|
|
68
|
+
`<Redirect method="POST">/voice/wait/${sessionId}</Redirect>`
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function waitAndPoll(sessionId) {
|
|
73
|
+
// Hold music is better UX than silence but adds latency — skip it.
|
|
74
|
+
// Use the shortest TwiML pause Twilio supports (1s) and redirect.
|
|
75
|
+
// The actual responsiveness is determined by how fast the agent queues its reply
|
|
76
|
+
// (session.waitForAgentResponse uses 8s internal timeout, which is polled at 500ms).
|
|
77
|
+
return twiml(
|
|
78
|
+
`<Pause length="${POLL_PAUSE_S}"/>` +
|
|
79
|
+
`<Redirect method="POST">/voice/wait/${sessionId}</Redirect>`
|
|
80
|
+
);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// ─── Routes ───────────────────────────────────────────────────────────────────
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* POST /voice/answer/:sessionId
|
|
87
|
+
* Twilio hits this immediately when the outbound call is answered.
|
|
88
|
+
* We mark the session connected and speak the greeting (if any).
|
|
89
|
+
*/
|
|
90
|
+
router.post("/answer/:sessionId", (req, res) => {
|
|
91
|
+
const session = voiceSessionManager.get(req.params.sessionId);
|
|
92
|
+
if (!session) {
|
|
93
|
+
// Unknown session — hang up gracefully
|
|
94
|
+
res.type("text/xml").send(twiml("<Hangup/>"));
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
session.markConnected();
|
|
99
|
+
console.log(`[VoiceWebhook] Call answered for session ${session.id}`);
|
|
100
|
+
|
|
101
|
+
const greeting = session.greeting || "Hello, this is your AI assistant. How can I help you?";
|
|
102
|
+
res.type("text/xml").send(sayAndListen(session.id, greeting));
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* POST /voice/input/:sessionId
|
|
107
|
+
* Twilio posts the caller's transcribed speech here (SpeechResult field).
|
|
108
|
+
* We store it in the session and redirect to /voice/wait while the agent thinks.
|
|
109
|
+
*/
|
|
110
|
+
router.post("/input/:sessionId", (req, res) => {
|
|
111
|
+
const session = voiceSessionManager.get(req.params.sessionId);
|
|
112
|
+
if (!session || session.status === "ended") {
|
|
113
|
+
res.type("text/xml").send(twiml("<Hangup/>"));
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const speechResult = req.body?.SpeechResult || "";
|
|
118
|
+
const confidence = req.body?.Confidence || "?";
|
|
119
|
+
|
|
120
|
+
if (speechResult.trim()) {
|
|
121
|
+
console.log(`[VoiceWebhook] Caller spoke (confidence ${confidence}): "${speechResult}"`);
|
|
122
|
+
session.receiveCallerInput(speechResult.trim());
|
|
123
|
+
} else {
|
|
124
|
+
console.log(`[VoiceWebhook] No speech detected — re-polling`);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Park the call while the agent processes the input
|
|
128
|
+
res.type("text/xml").send(waitAndPoll(session.id));
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* POST /voice/wait/:sessionId
|
|
133
|
+
* Polling endpoint. Agent queues its reply via session.setAgentResponse().
|
|
134
|
+
* We wait up to 8s for a reply; if none, return pause+redirect (1-second loop).
|
|
135
|
+
*/
|
|
136
|
+
router.post("/wait/:sessionId", async (req, res) => {
|
|
137
|
+
const session = voiceSessionManager.get(req.params.sessionId);
|
|
138
|
+
if (!session || session.status === "ended") {
|
|
139
|
+
res.type("text/xml").send(twiml("<Hangup/>"));
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Wait up to 8s for the agent to queue a reply
|
|
144
|
+
await session.waitForAgentResponse(8_000);
|
|
145
|
+
|
|
146
|
+
const response = session.consumeResponse();
|
|
147
|
+
|
|
148
|
+
if (response === null) {
|
|
149
|
+
// Agent still thinking — keep the caller on hold
|
|
150
|
+
res.type("text/xml").send(waitAndPoll(session.id));
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (response === "__HANGUP__") {
|
|
155
|
+
session.end();
|
|
156
|
+
voiceSessionManager.delete(session.id);
|
|
157
|
+
console.log(`[VoiceWebhook] Session ${session.id} ended by agent`);
|
|
158
|
+
res.type("text/xml").send(twiml("<Say>Goodbye.</Say><Hangup/>"));
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Agent reply — speak it and listen for caller's next utterance
|
|
163
|
+
res.type("text/xml").send(sayAndListen(session.id, response));
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* POST /voice/status/:sessionId
|
|
168
|
+
* Twilio status callback — tracks call lifecycle (ringing → in-progress → completed).
|
|
169
|
+
* If call ends unexpectedly (caller hangs up), we clean up the session.
|
|
170
|
+
*/
|
|
171
|
+
router.post("/status/:sessionId", (req, res) => {
|
|
172
|
+
const { CallStatus, CallSid } = req.body || {};
|
|
173
|
+
const session = voiceSessionManager.get(req.params.sessionId);
|
|
174
|
+
|
|
175
|
+
console.log(`[VoiceWebhook] Status callback: ${CallSid} → ${CallStatus}`);
|
|
176
|
+
|
|
177
|
+
if (session && (CallStatus === "completed" || CallStatus === "failed" || CallStatus === "busy" || CallStatus === "no-answer")) {
|
|
178
|
+
if (session.status !== "ended") {
|
|
179
|
+
session.end();
|
|
180
|
+
voiceSessionManager.delete(session.id);
|
|
181
|
+
console.log(`[VoiceWebhook] Session ${session.id} closed via status callback (${CallStatus})`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
res.status(204).end();
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
export default router;
|