heyhank 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -10
  3. package/bin/cli.ts +7 -7
  4. package/bin/ctl.ts +42 -42
  5. package/dist/assets/{AgentsPage-BPhirnCe.js → AgentsPage-B-AAmsMK.js} +3 -3
  6. package/dist/assets/AssistantPage-BV1Mfwdt.js +2 -0
  7. package/dist/assets/BusinessPage-tLpNEz19.js +1 -0
  8. package/dist/assets/{CronManager-DDbz-yiT.js → CronManager-B-K_n3Jg.js} +1 -1
  9. package/dist/assets/HelpPage-Bhf_j6Xr.js +1 -0
  10. package/dist/assets/{IntegrationsPage-CrOitCmJ.js → IntegrationsPage-DAMjs9tM.js} +1 -1
  11. package/dist/assets/JarvisHUD-C_TGXCCn.js +120 -0
  12. package/dist/assets/MediaPage-C48HTTrt.js +1 -0
  13. package/dist/assets/MemoryPage-JkC-qtgp.js +1 -0
  14. package/dist/assets/{PlatformDashboard-Do6F0O2p.js → PlatformDashboard-AUo7tNnE.js} +1 -1
  15. package/dist/assets/{Playground-Fc5cdc5p.js → Playground-AzNMsRBL.js} +1 -1
  16. package/dist/assets/{ProcessPanel-CslEiZkI.js → ProcessPanel-DpE_2sX3.js} +1 -1
  17. package/dist/assets/{PromptsPage-D2EhsdNO.js → PromptsPage-C2RQOs6p.js} +2 -2
  18. package/dist/assets/RunsPage-B9UOyO79.js +1 -0
  19. package/dist/assets/{SandboxManager-a1AVI5q2.js → SandboxManager-jHvYjwfh.js} +1 -1
  20. package/dist/assets/SettingsPage-BBJax6gt.js +51 -0
  21. package/dist/assets/SkillsMarketplace-IjmjfdjD.js +1 -0
  22. package/dist/assets/SocialMediaPage-DoPZHhr2.js +10 -0
  23. package/dist/assets/{TailscalePage-CHiFhZXF.js → TailscalePage-DDEY7ckO.js} +1 -1
  24. package/dist/assets/TelephonyPage-OPNBZYKt.js +9 -0
  25. package/dist/assets/{TerminalPage-Drwyrnfd.js → TerminalPage-BjMbHHW3.js} +1 -1
  26. package/dist/assets/{gemini-live-client-C7rqAW7G.js → gemini-live-client-C70FEtX2.js} +11 -8
  27. package/dist/assets/{index-CEqZnThB.js → index-BgYM4wXw.js} +94 -93
  28. package/dist/assets/index-BkjSoVgn.css +32 -0
  29. package/dist/assets/sw-register-C7NOHtIu.js +1 -0
  30. package/dist/assets/text-chat-client-BSbLJerZ.js +2 -0
  31. package/dist/index.html +2 -2
  32. package/dist/sw.js +1 -1
  33. package/package.json +6 -1
  34. package/server/agent-executor.ts +37 -2
  35. package/server/agent-store.ts +3 -3
  36. package/server/agent-types.ts +11 -0
  37. package/server/assistant-store.ts +232 -6
  38. package/server/auth-manager.ts +9 -0
  39. package/server/cache-headers.ts +1 -1
  40. package/server/calendar-service.ts +10 -0
  41. package/server/ceo/document-store.ts +129 -0
  42. package/server/ceo/finance-store.ts +343 -0
  43. package/server/ceo/kpi-store.ts +208 -0
  44. package/server/ceo/memory-import.ts +277 -0
  45. package/server/ceo/news-store.ts +208 -0
  46. package/server/ceo/template-store.ts +134 -0
  47. package/server/ceo/time-tracking-store.ts +227 -0
  48. package/server/claude-auth-monitor.ts +128 -0
  49. package/server/claude-code-worker.ts +86 -0
  50. package/server/claude-session-discovery.ts +74 -1
  51. package/server/cli-launcher.ts +32 -10
  52. package/server/codex-adapter.ts +2 -2
  53. package/server/codex-ws-proxy.cjs +1 -1
  54. package/server/container-manager.ts +4 -4
  55. package/server/content-intelligence/content-engine.ts +1112 -0
  56. package/server/content-intelligence/platform-knowledge.ts +870 -0
  57. package/server/cron-store.ts +3 -3
  58. package/server/embedding-service.ts +49 -0
  59. package/server/event-bus-types.ts +13 -0
  60. package/server/federation/node-store.ts +5 -4
  61. package/server/fs-utils.ts +28 -1
  62. package/server/hank-notifications-store.ts +91 -0
  63. package/server/hank-tool-executor.ts +1835 -0
  64. package/server/hank-tools.ts +2107 -0
  65. package/server/image-pull-manager.ts +2 -2
  66. package/server/index.ts +25 -2
  67. package/server/llm-providers-streaming.ts +541 -0
  68. package/server/llm-providers.ts +12 -0
  69. package/server/marketplace.ts +249 -0
  70. package/server/mcp-registry.ts +158 -0
  71. package/server/memory-service.ts +296 -0
  72. package/server/obsidian-sync.ts +184 -0
  73. package/server/provider-manager.ts +5 -2
  74. package/server/provider-registry.ts +12 -0
  75. package/server/reminder-scheduler.ts +37 -1
  76. package/server/routes/agent-routes.ts +2 -1
  77. package/server/routes/assistant-routes.ts +198 -5
  78. package/server/routes/ceo-finance-kpi-routes.ts +167 -0
  79. package/server/routes/ceo-news-time-routes.ts +137 -0
  80. package/server/routes/ceo-routes.ts +99 -0
  81. package/server/routes/content-routes.ts +116 -0
  82. package/server/routes/email-routes.ts +147 -0
  83. package/server/routes/env-routes.ts +3 -3
  84. package/server/routes/fs-routes.ts +12 -9
  85. package/server/routes/hank-chat-routes.ts +592 -0
  86. package/server/routes/llm-routes.ts +12 -0
  87. package/server/routes/marketplace-routes.ts +63 -0
  88. package/server/routes/media-routes.ts +1 -1
  89. package/server/routes/memory-routes.ts +127 -0
  90. package/server/routes/platform-routes.ts +14 -675
  91. package/server/routes/sandbox-routes.ts +1 -1
  92. package/server/routes/settings-routes.ts +51 -1
  93. package/server/routes/socialmedia-routes.ts +152 -2
  94. package/server/routes/system-routes.ts +2 -2
  95. package/server/routes/team-routes.ts +71 -0
  96. package/server/routes/telephony-routes.ts +98 -18
  97. package/server/routes.ts +36 -9
  98. package/server/session-creation-service.ts +2 -2
  99. package/server/session-orchestrator.ts +54 -2
  100. package/server/session-types.ts +2 -0
  101. package/server/settings-manager.ts +50 -2
  102. package/server/skill-discovery.ts +68 -0
  103. package/server/socialmedia/adapters/browser-adapter.ts +179 -0
  104. package/server/socialmedia/adapters/postiz-adapter.ts +291 -14
  105. package/server/socialmedia/manager.ts +234 -15
  106. package/server/socialmedia/store.ts +51 -1
  107. package/server/socialmedia/types.ts +35 -2
  108. package/server/socialview/browser-manager.ts +150 -0
  109. package/server/socialview/extractors.ts +1298 -0
  110. package/server/socialview/image-describe.ts +188 -0
  111. package/server/socialview/library.ts +119 -0
  112. package/server/socialview/poster.ts +276 -0
  113. package/server/socialview/routes.ts +371 -0
  114. package/server/socialview/style-analyzer.ts +187 -0
  115. package/server/socialview/style-profiles.ts +67 -0
  116. package/server/socialview/types.ts +166 -0
  117. package/server/socialview/vision.ts +127 -0
  118. package/server/socialview/vnc-manager.ts +110 -0
  119. package/server/style-injector.ts +135 -0
  120. package/server/team-service.ts +239 -0
  121. package/server/team-store.ts +75 -0
  122. package/server/team-types.ts +52 -0
  123. package/server/telephony/audio-bridge.ts +281 -35
  124. package/server/telephony/audio-recorder.ts +132 -0
  125. package/server/telephony/call-manager.ts +803 -104
  126. package/server/telephony/call-types.ts +67 -1
  127. package/server/telephony/esl-client.ts +319 -0
  128. package/server/telephony/freeswitch-sync.ts +155 -0
  129. package/server/telephony/phone-utils.ts +63 -0
  130. package/server/telephony/telephony-store.ts +9 -8
  131. package/server/url-validator.ts +82 -0
  132. package/server/vault-markdown.ts +317 -0
  133. package/server/vault-migration.ts +121 -0
  134. package/server/vault-store.ts +466 -0
  135. package/server/vault-watcher.ts +59 -0
  136. package/server/vector-store.ts +210 -0
  137. package/server/voice-pipeline/gemini-live-adapter.ts +97 -0
  138. package/server/voice-pipeline/greeting-cache.ts +200 -0
  139. package/server/voice-pipeline/manager.ts +249 -0
  140. package/server/voice-pipeline/pipeline.ts +335 -0
  141. package/server/voice-pipeline/providers/index.ts +47 -0
  142. package/server/voice-pipeline/providers/llm-internal.ts +527 -0
  143. package/server/voice-pipeline/providers/stt-google.ts +157 -0
  144. package/server/voice-pipeline/providers/tts-google.ts +126 -0
  145. package/server/voice-pipeline/types.ts +247 -0
  146. package/server/ws-bridge-types.ts +6 -1
  147. package/dist/assets/AssistantPage-DJ-cMQfb.js +0 -1
  148. package/dist/assets/HelpPage-DMfkzERp.js +0 -1
  149. package/dist/assets/MediaPage-CE5rdvkC.js +0 -1
  150. package/dist/assets/RunsPage-C5BZF5Rx.js +0 -1
  151. package/dist/assets/SettingsPage-DirhjQrJ.js +0 -51
  152. package/dist/assets/SocialMediaPage-DBuM28vD.js +0 -1
  153. package/dist/assets/TelephonyPage-x0VV0fOo.js +0 -1
  154. package/dist/assets/index-C8M_PUmX.css +0 -32
  155. package/dist/assets/sw-register-LSSpj6RU.js +0 -1
  156. package/server/socialmedia/adapters/ayrshare-adapter.ts +0 -169
@@ -3,18 +3,25 @@
3
3
  // Each call gets its own AudioBridge instance connected to Gemini.
4
4
 
5
5
  import type { ServerWebSocket } from "bun";
6
- import type { CallConfig, CallState, CallEvent, TranscriptEntry } from "./call-types.js";
7
- import { AudioBridge, downsampleTo8k, base64ToBuffer } from "./audio-bridge.js";
6
+ import type { CallConfig, CallState, CallEvent, TranscriptEntry, CallFlow, TelephonyContact } from "./call-types.js";
7
+ import { AudioRecorder } from "./audio-recorder.js";
8
8
  import { getSettings, saveCall } from "./telephony-store.js";
9
9
  import { getSettings as getMainSettings } from "../settings-manager.js";
10
+ import { eslCommand, eslSubscribe } from "./esl-client.js";
11
+ import { normalizePhoneE164, extractCountryCode } from "./phone-utils.js";
12
+ import { DEFAULT_PORT_DEV, DEFAULT_PORT_PROD } from "../constants.js";
10
13
  import { randomUUID } from "node:crypto";
14
+ import { createGeminiLiveEngine } from "../voice-pipeline/gemini-live-adapter.js";
15
+ import { createPipelineEngine, getPipelineSettingsFrom, prepareGreeting, lookupContactForCall, resolveEngine, type VoiceEngineSession } from "../voice-pipeline/manager.js";
16
+ import { addNotification } from "../hank-notifications-store.js";
17
+ import { heyHankBus } from "../event-bus.js";
11
18
 
12
19
  // Gemini tool declarations for telephony calls (subset — focused on conversation)
13
20
  const TELEPHONY_TOOL_DECLARATIONS = [{
14
21
  functionDeclarations: [
15
22
  {
16
23
  name: "end_call",
17
- description: "End/hang up the current phone call. Use when the conversation is complete, the task is done, or the other person wants to end the call.",
24
+ description: "Hang up the phone. CRITICAL RULES: (1) NEVER call this tool proactively. (2) Only call this AFTER you said goodbye AND heard the other person's final response. (3) If you have a script, you MUST complete EVERY step first. (4) There must be a natural pause in the conversation before you use this. (5) When in doubt, do NOT hang up — let the other person hang up instead.",
18
25
  parameters: { type: "OBJECT", properties: {} },
19
26
  },
20
27
  {
@@ -30,6 +37,8 @@ const TELEPHONY_TOOL_DECLARATIONS = [{
30
37
  ],
31
38
  }];
32
39
 
40
+ const MAX_CONCURRENT_CALLS = 5;
41
+
33
42
  type EventListener = (event: CallEvent) => void;
34
43
 
35
44
  /**
@@ -45,13 +54,41 @@ type EventListener = (event: CallEvent) => void;
45
54
  export class CallManager {
46
55
  private activeCalls = new Map<string, {
47
56
  state: CallState;
48
- bridge: AudioBridge;
57
+ engine: VoiceEngineSession;
58
+ recorder: AudioRecorder;
49
59
  fsSockets: Set<ServerWebSocket<unknown>>; // FreeSWITCH audio fork WebSockets
50
60
  transcriptSockets: Set<ServerWebSocket<unknown>>; // Browser WebSockets for live transcript
61
+ listenSockets: Set<ServerWebSocket<unknown>>; // Browser WebSockets for live audio listen
62
+ listenMode: boolean;
51
63
  maxDurationTimer?: ReturnType<typeof setTimeout>;
64
+ /** Ad-hoc timers (end_call grace period, force-hangup safeties) — all cleared on endCall */
65
+ pendingTimers: Set<ReturnType<typeof setTimeout>>;
66
+ pendingHangup?: boolean; // end_call was requested, waiting for audio to finish
67
+ /** Whether the greeting has been triggered yet (for pipeline engine) */
68
+ greetingTriggered?: boolean;
52
69
  }>();
53
70
 
71
+ /** Register a setTimeout that belongs to a call so it gets cleared on endCall */
72
+ private trackTimer(callId: string, timer: ReturnType<typeof setTimeout>): void {
73
+ const call = this.activeCalls.get(callId);
74
+ if (call) {
75
+ call.pendingTimers.add(timer);
76
+ } else {
77
+ // Call already ended before we could register — clear immediately to avoid a leak
78
+ clearTimeout(timer);
79
+ }
80
+ }
81
+
54
82
  private listeners = new Set<EventListener>();
83
+ private inboundSubscription: { stop: () => void } | null = null;
84
+ /**
85
+ * For inbound calls, the FreeSWITCH audio-fork WebSocket may connect
86
+ * before the ESL CHANNEL_CREATE event has been processed. We hold such
87
+ * sockets here for a short grace period (keyed by callId), and attach
88
+ * them once `handleInboundCall` has registered the call state.
89
+ */
90
+ private pendingFsSockets = new Map<string, { sockets: Set<ServerWebSocket<unknown>>; timer: ReturnType<typeof setTimeout> }>();
91
+ private readonly FS_SOCKET_GRACE_MS = 3000;
55
92
 
56
93
  /** Subscribe to call events */
57
94
  onEvent(listener: EventListener): () => void {
@@ -67,6 +104,16 @@ export class CallManager {
67
104
 
68
105
  /** Start a new outbound call */
69
106
  async startCall(config: CallConfig): Promise<CallState> {
107
+ // Validate phone number to prevent ESL command injection
108
+ if (!config.phone || !/^\+?[0-9\-\s]+$/.test(config.phone)) {
109
+ throw new Error("Invalid phone number. Only digits, spaces, hyphens, and an optional leading + are allowed.");
110
+ }
111
+
112
+ // Enforce concurrent calls limit
113
+ if (this.activeCalls.size >= MAX_CONCURRENT_CALLS) {
114
+ throw new Error(`Maximum concurrent calls limit reached (${MAX_CONCURRENT_CALLS}). Please wait for an active call to end.`);
115
+ }
116
+
70
117
  const telSettings = getSettings();
71
118
 
72
119
  if (!telSettings.enabled) {
@@ -91,8 +138,11 @@ export class CallManager {
91
138
  const voice = config.voice || telSettings.defaultVoice || "Kore";
92
139
  const maxDuration = config.maxDurationSeconds || telSettings.maxCallDurationSeconds || 600;
93
140
 
141
+ // Resolve contact for this phone number (for script/callFlow/language injection)
142
+ const contact = telSettings.contacts.find(c => c.phone === config.phone) || null;
143
+
94
144
  // Build telephony-specific system prompt
95
- const systemPrompt = buildTelephonyPrompt(config.prompt, config.phone);
145
+ const systemPrompt = buildTelephonyPrompt(config.prompt, config.phone, contact, contact?.language || telSettings.defaultLanguage || "de", "outbound", undefined, config.useSavedScript === true);
96
146
 
97
147
  // Create call state
98
148
  const callState: CallState = {
@@ -101,6 +151,7 @@ export class CallManager {
101
151
  prompt: config.prompt,
102
152
  voice,
103
153
  status: "initiating",
154
+ direction: "outbound",
104
155
  trunkId: trunk.id,
105
156
  callerId: config.callerId || trunk.callerId,
106
157
  transcript: [],
@@ -110,59 +161,131 @@ export class CallManager {
110
161
  connectedAt: null,
111
162
  endedAt: null,
112
163
  error: null,
164
+ listenMode: config.listen ?? false,
113
165
  };
114
166
 
115
- // Create audio bridge to Gemini
116
- const bridge = new AudioBridge(callId, {
117
- geminiApiKey: geminiKey,
118
- voice,
119
- systemPrompt,
120
- tools: [...TELEPHONY_TOOL_DECLARATIONS, { googleSearch: {} }],
121
- onTranscript: (entry) => {
167
+ // Create audio recorder for this call
168
+ const recorder = new AudioRecorder(callId);
169
+
170
+ // Common transcript/status callbacks
171
+ const onTranscript = (entry: TranscriptEntry) => {
172
+ // Only persist final entries — interim STT results flood the transcript
173
+ // with partial ngrams ("ich", "ich möchte", "ich möchte eine", ...).
174
+ // Live browsers still receive all entries via broadcastTranscript for real-time UX.
175
+ if (entry.isFinal) {
122
176
  callState.transcript.push(entry);
123
- this.emit({ type: "transcript", callId, entry });
124
- this.broadcastTranscript(callId, entry);
125
- },
126
- onStatusChange: (status) => {
127
- callState.status = status;
128
- if (status === "active" && !callState.connectedAt) {
129
- callState.connectedAt = Date.now();
130
- }
131
- this.emit({ type: "status", callId, status });
132
- },
133
- onToolCall: async (calls) => {
134
- return this.handleToolCalls(callId, calls);
135
- },
136
- });
177
+ }
178
+ this.emit({ type: "transcript", callId, entry });
179
+ this.broadcastTranscript(callId, entry);
180
+ };
181
+ const onStatusChange = (status: CallState["status"]) => {
182
+ callState.status = status;
183
+ if (status === "active" && !callState.connectedAt) {
184
+ callState.connectedAt = Date.now();
185
+ }
186
+ this.emit({ type: "status", callId, status });
187
+ };
188
+
189
+ // Decide engine: pipeline (with greeting) or Gemini Live
190
+ const pipelineSettings = getPipelineSettingsFrom(telSettings);
191
+ const chosenEngine = resolveEngine(pipelineSettings);
192
+
193
+ let engine: VoiceEngineSession;
194
+ if (chosenEngine === "pipeline") {
195
+ try {
196
+ const greeting = await prepareGreeting({ direction: "outbound", contact, pipelineSettings });
197
+ engine = await createPipelineEngine({
198
+ callId,
199
+ direction: "outbound",
200
+ remoteNumber: config.phone,
201
+ contact,
202
+ systemPrompt,
203
+ telephonySettings: telSettings,
204
+ pipelineSettings,
205
+ greetingText: greeting.text,
206
+ greetingPcm: greeting.pcm,
207
+ onToolCall: (calls) => this.handleToolCalls(callId, calls),
208
+ onTranscript,
209
+ onStatusChange,
210
+ });
211
+ } catch (err) {
212
+ console.error(`[telephony] Pipeline engine failed, fallback to Gemini Live:`, err);
213
+ if (!pipelineSettings.fallbackToGeminiLive) throw err;
214
+ engine = createGeminiLiveEngine({
215
+ callId,
216
+ voice,
217
+ systemPrompt,
218
+ geminiKey,
219
+ telSettings,
220
+ tools: [...TELEPHONY_TOOL_DECLARATIONS, { googleSearch: {} }],
221
+ isInbound: false,
222
+ onTranscript,
223
+ onStatusChange,
224
+ onToolCall: (calls) => this.handleToolCalls(callId, calls),
225
+ });
226
+ }
227
+ } else {
228
+ engine = createGeminiLiveEngine({
229
+ callId,
230
+ voice,
231
+ systemPrompt,
232
+ geminiKey,
233
+ telSettings,
234
+ tools: [...TELEPHONY_TOOL_DECLARATIONS, { googleSearch: {} }],
235
+ isInbound: false,
236
+ onTranscript,
237
+ onStatusChange,
238
+ onToolCall: (calls) => this.handleToolCalls(callId, calls),
239
+ });
240
+ }
137
241
 
138
- // When Gemini produces response audio, send it to FreeSWITCH
139
- bridge.onGeminiAudio = (base64Pcm: string) => {
140
- const pcm = base64ToBuffer(base64Pcm);
141
- // Gemini outputs 24kHz PCM, FreeSWITCH expects 8kHz
142
- const downsampled = downsampleTo8k(pcm, 24000);
242
+ // Wire AI audio FreeSWITCH + listen sockets + recorder
243
+ engine.onAudio = (pcm8k: Uint8Array) => {
244
+ recorder.addAiAudio(pcm8k);
143
245
  const call = this.activeCalls.get(callId);
144
- if (call) {
145
- for (const ws of call.fsSockets) {
146
- try {
147
- ws.send(downsampled);
148
- } catch { /* socket closed */ }
246
+ if (!call) return;
247
+ for (const ws of call.fsSockets) {
248
+ try { ws.send(pcm8k); } catch { /* socket closed */ }
249
+ }
250
+ if (call.listenMode) {
251
+ const base64 = Buffer.from(pcm8k).toString('base64');
252
+ const msg = JSON.stringify({ type: "audio", speaker: "ai", data: base64 });
253
+ for (const ws of call.listenSockets) {
254
+ try { ws.send(msg); } catch { /* ignore */ }
149
255
  }
150
256
  }
151
257
  };
152
258
 
259
+ // Hangup-after-turn-complete logic
260
+ let hangupScheduled = false;
261
+ if ("onTurnComplete" in engine) {
262
+ engine.onTurnComplete = () => {
263
+ const call = this.activeCalls.get(callId);
264
+ if (call?.pendingHangup && !hangupScheduled) {
265
+ hangupScheduled = true;
266
+ console.log(`[telephony] Call ${callId}: turn complete after end_call — hanging up in 5s`);
267
+ this.trackTimer(callId, setTimeout(() => this.endCall(callId).catch(() => {}), 5000));
268
+ }
269
+ };
270
+ }
271
+
153
272
  this.activeCalls.set(callId, {
154
273
  state: callState,
155
- bridge,
274
+ engine,
275
+ recorder,
156
276
  fsSockets: new Set(),
157
277
  transcriptSockets: new Set(),
278
+ listenSockets: new Set(),
279
+ listenMode: config.listen ?? false,
280
+ pendingTimers: new Set(),
158
281
  });
159
282
 
160
- // Connect to Gemini first
283
+ // Connect engine
161
284
  try {
162
- await bridge.connect();
285
+ await engine.connect();
163
286
  } catch (err) {
164
287
  callState.status = "failed";
165
- callState.error = err instanceof Error ? err.message : "Gemini connection failed";
288
+ callState.error = err instanceof Error ? err.message : "Engine connection failed";
166
289
  callState.endedAt = Date.now();
167
290
  saveCall(callState);
168
291
  this.activeCalls.delete(callId);
@@ -179,7 +302,7 @@ export class CallManager {
179
302
  callState.status = "failed";
180
303
  callState.error = err instanceof Error ? err.message : "FreeSWITCH originate failed";
181
304
  callState.endedAt = Date.now();
182
- bridge.disconnect();
305
+ engine.disconnect();
183
306
  saveCall(callState);
184
307
  this.activeCalls.delete(callId);
185
308
  throw err;
@@ -203,13 +326,28 @@ export class CallManager {
203
326
  const call = this.activeCalls.get(callId);
204
327
  if (!call) return null;
205
328
 
206
- const { state, bridge } = call;
329
+ // Delete from activeCalls FIRST to prevent concurrent entry (race condition guard)
330
+ this.activeCalls.delete(callId);
331
+
332
+ // Clear any pending audio-fork buffer entry so late arrivals don't leak
333
+ const pending = this.pendingFsSockets.get(callId);
334
+ if (pending) {
335
+ clearTimeout(pending.timer);
336
+ for (const s of pending.sockets) { try { s.close(); } catch { /* ignore */ } }
337
+ this.pendingFsSockets.delete(callId);
338
+ }
339
+
340
+ // Store references before cleanup
341
+ const { state, engine, recorder, maxDurationTimer, pendingTimers } = call;
207
342
 
208
343
  // Clear safety timer
209
- if (call.maxDurationTimer) clearTimeout(call.maxDurationTimer);
344
+ if (maxDurationTimer) clearTimeout(maxDurationTimer);
345
+ // Clear all ad-hoc pending timers (end_call grace, force-hangup safety) to prevent leaks
346
+ for (const t of pendingTimers) clearTimeout(t);
347
+ pendingTimers.clear();
210
348
 
211
- // Disconnect Gemini
212
- bridge.disconnect();
349
+ // Disconnect engine (Pipeline or Gemini Live)
350
+ engine.disconnect();
213
351
 
214
352
  // Hangup FreeSWITCH
215
353
  try {
@@ -223,12 +361,48 @@ export class CallManager {
223
361
  state.durationSeconds = Math.round((state.endedAt - state.connectedAt) / 1000);
224
362
  }
225
363
 
364
+ // Save audio recording
365
+ try {
366
+ const audioFile = recorder.save();
367
+ if (audioFile) {
368
+ state.audioFile = audioFile;
369
+ }
370
+ } catch (err) {
371
+ console.error(`[telephony] Failed to save recording for ${callId}:`, err);
372
+ }
373
+
226
374
  // Generate summary from transcript
227
375
  state.summary = this.generateSummary(state);
228
376
 
229
377
  this.emit({ type: "ended", callId, summary: state.summary });
230
378
  saveCall(state);
231
- this.activeCalls.delete(callId);
379
+
380
+ // Persist a HankChat notification so Hank can summarise the call for the user.
381
+ try {
382
+ const telSettings = getSettings();
383
+ const contact = telSettings.contacts.find((c) => c.phone === state.phone) || null;
384
+ const notification = addNotification({
385
+ type: "call-ended",
386
+ callId,
387
+ phone: state.phone,
388
+ contactName: contact?.name ?? null,
389
+ direction: state.direction,
390
+ durationSeconds: state.durationSeconds,
391
+ summary: state.summary,
392
+ transcript: state.transcript,
393
+ });
394
+ heyHankBus.emit("telephony:call-ended", {
395
+ notificationId: notification.id,
396
+ callId,
397
+ phone: state.phone,
398
+ contactName: contact?.name ?? null,
399
+ direction: state.direction,
400
+ durationSeconds: state.durationSeconds,
401
+ summary: state.summary,
402
+ });
403
+ } catch (err) {
404
+ console.error(`[telephony] Failed to create HankChat notification for ${callId}:`, err);
405
+ }
232
406
 
233
407
  return state;
234
408
  }
@@ -237,18 +411,90 @@ export class CallManager {
237
411
  handleFreeSwitchAudio(callId: string, data: Buffer | Uint8Array): void {
238
412
  const call = this.activeCalls.get(callId);
239
413
  if (!call) return;
240
- call.bridge.sendCallerAudio(data);
414
+ // Record caller audio (8kHz PCM)
415
+ call.recorder.addCallerAudio(data);
416
+ call.engine.sendCallerAudio(data);
417
+ // Stream callee audio to listen sockets (browser)
418
+ if (call.listenMode && call.listenSockets.size > 0) {
419
+ const base64 = Buffer.from(data).toString('base64');
420
+ const msg = JSON.stringify({ type: "audio", speaker: "callee", data: base64 });
421
+ for (const ws of call.listenSockets) {
422
+ try { ws.send(msg); } catch { /* ignore */ }
423
+ }
424
+ }
241
425
  }
242
426
 
243
427
  /** Register a FreeSWITCH audio WebSocket for a call */
244
428
  addFreeSwitchSocket(callId: string, ws: ServerWebSocket<unknown>): void {
245
429
  const call = this.activeCalls.get(callId);
246
- if (call) call.fsSockets.add(ws);
430
+ if (call) {
431
+ this.attachFsSocket(call, callId, ws);
432
+ return;
433
+ }
434
+
435
+ // Inbound race: audio fork connected before ESL CHANNEL_CREATE was processed.
436
+ // Buffer the socket and try to attach once handleInboundCall registers the call.
437
+ console.log(`[telephony] Audio socket for ${callId} arrived before call state — buffering (${this.FS_SOCKET_GRACE_MS}ms grace)`);
438
+ let pending = this.pendingFsSockets.get(callId);
439
+ if (!pending) {
440
+ const timer = setTimeout(() => {
441
+ const p = this.pendingFsSockets.get(callId);
442
+ if (p) {
443
+ console.warn(`[telephony] No call state for ${callId} within grace period — closing ${p.sockets.size} buffered audio socket(s)`);
444
+ for (const s of p.sockets) {
445
+ try { s.close(); } catch { /* ignore */ }
446
+ }
447
+ this.pendingFsSockets.delete(callId);
448
+ }
449
+ }, this.FS_SOCKET_GRACE_MS);
450
+ pending = { sockets: new Set(), timer };
451
+ this.pendingFsSockets.set(callId, pending);
452
+ }
453
+ pending.sockets.add(ws);
454
+ }
455
+
456
+ /** Attach (possibly buffered) FreeSWITCH sockets and trigger greeting if needed */
457
+ private attachFsSocket(
458
+ call: NonNullable<ReturnType<CallManager["activeCalls"]["get"]>>,
459
+ callId: string,
460
+ ws: ServerWebSocket<unknown>,
461
+ ): void {
462
+ call.fsSockets.add(ws);
463
+ // Audio fork connected = callee picked up. Play greeting immediately.
464
+ // For Pipeline: pre-rendered greeting MP3 plays at 0ms latency.
465
+ // For Gemini Live: text trigger makes it start speaking (1-2s latency).
466
+ if (call.fsSockets.size === 1 && !call.greetingTriggered) {
467
+ const isInbound = call.state.direction === "inbound";
468
+ console.log(`[telephony] Call ${callId}: audio connected (${isInbound ? "inbound" : "outbound"}) — triggering greeting via ${call.engine.engineLabel}`);
469
+ call.greetingTriggered = true;
470
+ call.engine.playGreetingIfReady?.();
471
+ }
472
+ }
473
+
474
+ /** Flush any buffered FreeSWITCH audio sockets for a call that just became known */
475
+ private flushPendingFsSockets(callId: string): void {
476
+ const pending = this.pendingFsSockets.get(callId);
477
+ if (!pending) return;
478
+ const call = this.activeCalls.get(callId);
479
+ clearTimeout(pending.timer);
480
+ this.pendingFsSockets.delete(callId);
481
+ if (!call) return;
482
+ console.log(`[telephony] Flushing ${pending.sockets.size} buffered audio socket(s) for call ${callId}`);
483
+ for (const s of pending.sockets) {
484
+ this.attachFsSocket(call, callId, s);
485
+ }
247
486
  }
248
487
 
249
488
  removeFreeSwitchSocket(callId: string, ws: ServerWebSocket<unknown>): void {
250
489
  const call = this.activeCalls.get(callId);
251
- if (call) call.fsSockets.delete(ws);
490
+ if (call) {
491
+ call.fsSockets.delete(ws);
492
+ // If no more FreeSWITCH sockets remain, the caller has hung up — end the call
493
+ if (call.fsSockets.size === 0 && call.state.status === "active") {
494
+ console.log(`[telephony] Call ${callId}: all audio sockets closed — caller hung up, ending call`);
495
+ this.endCall(callId).catch(() => {});
496
+ }
497
+ }
252
498
  }
253
499
 
254
500
  /** Register a browser WebSocket for live transcript */
@@ -270,6 +516,22 @@ export class CallManager {
270
516
  if (call) call.transcriptSockets.delete(ws);
271
517
  }
272
518
 
519
+ /** Register a browser WebSocket for live audio listen */
520
+ addListenSocket(callId: string, ws: ServerWebSocket<unknown>): void {
521
+ const call = this.activeCalls.get(callId);
522
+ if (call) call.listenSockets.add(ws);
523
+ }
524
+
525
+ removeListenSocket(callId: string, ws: ServerWebSocket<unknown>): void {
526
+ const call = this.activeCalls.get(callId);
527
+ if (call) call.listenSockets.delete(ws);
528
+ }
529
+
530
+ /** Check if call has listen mode */
531
+ isListenMode(callId: string): boolean {
532
+ return this.activeCalls.get(callId)?.listenMode ?? false;
533
+ }
534
+
273
535
  private broadcastTranscript(callId: string, entry: TranscriptEntry): void {
274
536
  const call = this.activeCalls.get(callId);
275
537
  if (!call) return;
@@ -298,18 +560,34 @@ export class CallManager {
298
560
 
299
561
  for (const call of calls) {
300
562
  switch (call.name) {
301
- case "end_call":
302
- // Gemini decided to hang up
303
- setTimeout(() => this.endCall(callId).catch(() => {}), 500); // short delay for final audio
304
- results.push({ id: call.id, name: call.name, response: { success: true, message: "Hanging up" } });
563
+ case "end_call": {
564
+ // Gemini decided to hang up — don't disconnect immediately!
565
+ // Set flag and wait for turnComplete (= all audio has been sent),
566
+ // then add a grace period for audio to play through the phone line.
567
+ const activeCall = this.activeCalls.get(callId);
568
+ if (activeCall) {
569
+ activeCall.pendingHangup = true;
570
+ console.log(`[telephony] Call ${callId}: end_call requested — waiting for turn to complete`);
571
+ // Safety: if turnComplete never comes, force hangup after 15s
572
+ this.trackTimer(callId, setTimeout(() => {
573
+ const c = this.activeCalls.get(callId);
574
+ if (c?.pendingHangup) {
575
+ console.log(`[telephony] Call ${callId}: force hangup (turnComplete timeout)`);
576
+ this.endCall(callId).catch(() => {});
577
+ }
578
+ }, 15000));
579
+ }
580
+ results.push({ id: call.id, name: call.name, response: { success: true, message: "Will hang up after finishing current speech" } });
305
581
  break;
582
+ }
306
583
  case "transfer_call":
307
584
  results.push({ id: call.id, name: call.name, response: { error: "Transfer not yet implemented" } });
308
585
  break;
309
586
  default:
310
587
  // Forward to main tool handler (for todos, notes, etc.)
311
588
  try {
312
- const res = await fetch(`http://127.0.0.1:${process.env.PORT || 3100}/api/gemini/tool-call`, {
589
+ const toolPort = Number(process.env.PORT) || (process.env.NODE_ENV === "production" ? DEFAULT_PORT_PROD : DEFAULT_PORT_DEV);
590
+ const res = await fetch(`http://127.0.0.1:${toolPort}/api/gemini/tool-call`, {
313
591
  method: "POST",
314
592
  headers: { "Content-Type": "application/json" },
315
593
  body: JSON.stringify({ name: call.name, args: call.args }),
@@ -325,13 +603,9 @@ export class CallManager {
325
603
  return results;
326
604
  }
327
605
 
328
- // ─── FreeSWITCH ESL Commands ────���──────────────────────────────────────────
606
+ // ─── FreeSWITCH ESL Commands (TCP) ──────────────────────────────────────────
329
607
 
330
- /**
331
- * Originate a call via FreeSWITCH ESL.
332
- * Uses HTTP API (mod_xml_rpc or mod_httapi) for simplicity —
333
- * no need for a persistent ESL TCP connection.
334
- */
608
+ /** Originate a call via FreeSWITCH ESL TCP. */
335
609
  private async eslOriginate(
336
610
  callId: string,
337
611
  phone: string,
@@ -339,64 +613,68 @@ export class CallManager {
339
613
  callerId: string,
340
614
  ): Promise<void> {
341
615
  const settings = getSettings();
342
- const { eslHost, eslPort, eslPassword } = settings.freeswitch;
343
616
 
344
- // FreeSWITCH ESL over HTTP (mod_xml_rpc)
345
- // Format: api originate {vars}sofia/gateway/trunk/number &park()
617
+ // Sanitize gateway name to match what freeswitch-sync generates
618
+ const gwName = `heyhank_${trunk.name.replace(/[^a-zA-Z0-9_-]/g, "_").toLowerCase()}`;
619
+
346
620
  const vars = [
347
621
  `origination_caller_id_number=${callerId}`,
348
622
  `origination_caller_id_name=HeyHank`,
623
+ `effective_caller_id_number=${callerId}`,
624
+ `effective_caller_id_name=HeyHank`,
349
625
  `origination_uuid=${callId}`,
350
626
  `ignore_early_media=true`,
351
627
  ].join(",");
352
628
 
353
- const cmd = `originate {${vars}}sofia/gateway/${trunk.name}/${phone} &park()`;
354
-
629
+ // playback silence_stream keeps the media path active for mod_audio_fork.
630
+ // -1 = infinite duration. 1400 = comfort noise level.
631
+ const cmd = `originate {${vars}}sofia/gateway/${gwName}/${phone} &playback(silence_stream://-1;1400)`;
355
632
  console.log(`[telephony] ESL originate: ${cmd}`);
356
633
 
357
- // Try ESL HTTP API first (port 8080 default for mod_xml_rpc)
358
634
  try {
359
- const eslUrl = `http://${eslHost}:${eslPort}/api`;
360
- const res = await fetch(eslUrl, {
361
- method: "POST",
362
- headers: {
363
- "Content-Type": "text/plain",
364
- "Authorization": `Basic ${btoa(`freeswitch:${eslPassword}`)}`,
365
- },
366
- body: cmd,
367
- });
368
-
369
- if (!res.ok) {
370
- const text = await res.text();
371
- throw new Error(`ESL error: ${res.status} ${text}`);
372
- }
373
-
374
- const result = await res.text();
635
+ // Use api with 60s timeout — originate blocks until answered/rejected/timeout
636
+ const result = await eslCommand(cmd, settings.freeswitch, 60000);
375
637
  console.log(`[telephony] ESL originate result: ${result.trim()}`);
376
-
377
638
  if (result.includes("-ERR")) {
378
639
  throw new Error(`FreeSWITCH error: ${result.trim()}`);
379
640
  }
641
+
642
+ // Call connected — start audio fork to stream audio via WebSocket
643
+ await this.startAudioFork(callId, settings.freeswitch);
380
644
  } catch (err) {
381
645
  console.error(`[telephony] ESL originate failed:`, err);
382
646
  throw err;
383
647
  }
384
648
  }
385
649
 
650
+ /** Start mod_audio_fork to stream call audio to our WebSocket endpoint */
651
+ private async startAudioFork(
652
+ callId: string,
653
+ fsConfig: { eslHost: string; eslPort: number; eslPassword: string },
654
+ ): Promise<void> {
655
+ const defaultPort = process.env.NODE_ENV === "production" ? DEFAULT_PORT_PROD : DEFAULT_PORT_DEV;
656
+ const port = Number(process.env.PORT) || defaultPort;
657
+ const wsUrl = `ws://127.0.0.1:${port}/ws/telephony/audio/${callId}`;
658
+ // Bidirectional mode: callee audio → WebSocket → Gemini, Gemini audio → WebSocket → callee
659
+ const forkCmd = `uuid_audio_fork ${callId} start ${wsUrl} mono 8000 heyhank {} true true 8000`;
660
+ console.log(`[telephony] Starting audio fork: ${forkCmd}`);
661
+
662
+ try {
663
+ const result = await eslCommand(forkCmd, fsConfig, 10000);
664
+ console.log(`[telephony] Audio fork result: ${result.trim()}`);
665
+ if (result.includes("-ERR")) {
666
+ console.error(`[telephony] Audio fork failed: ${result.trim()}`);
667
+ }
668
+ } catch (err) {
669
+ console.error(`[telephony] Audio fork error:`, err);
670
+ // Don't throw — call is still connected, just no audio bridge
671
+ }
672
+ }
673
+
386
674
  private async eslHangup(callId: string): Promise<void> {
387
675
  const settings = getSettings();
388
- const { eslHost, eslPort, eslPassword } = settings.freeswitch;
389
-
390
676
  try {
391
- const eslUrl = `http://${eslHost}:${eslPort}/api`;
392
- await fetch(eslUrl, {
393
- method: "POST",
394
- headers: {
395
- "Content-Type": "text/plain",
396
- "Authorization": `Basic ${btoa(`freeswitch:${eslPassword}`)}`,
397
- },
398
- body: `uuid_kill ${callId}`,
399
- });
677
+ await eslCommand(`uuid_kill ${callId}`, settings.freeswitch);
400
678
  } catch {
401
679
  // Might already be disconnected
402
680
  }
@@ -417,8 +695,279 @@ export class CallManager {
417
695
  lines.slice(-6).join("\n"); // Last 6 lines as summary
418
696
  }
419
697
 
698
+ /** Handle an inbound call detected by ESL event subscription */
699
+ async handleInboundCall(uuid: string, callerNumber: string): Promise<void> {
700
+ const telSettings = getSettings();
701
+ const mainSettings = getMainSettings();
702
+
703
+ if (!telSettings.inboundEnabled) {
704
+ console.log(`[telephony] Inbound call from ${callerNumber} ignored — inbound disabled`);
705
+ return;
706
+ }
707
+
708
+ // Check if we already have this call (avoid duplicates)
709
+ if (this.activeCalls.has(uuid)) {
710
+ console.log(`[telephony] Inbound call ${uuid} already tracked`);
711
+ return;
712
+ }
713
+
714
+ // Enforce concurrent calls limit
715
+ if (this.activeCalls.size >= MAX_CONCURRENT_CALLS) {
716
+ console.log(`[telephony] Inbound call from ${callerNumber} rejected — max concurrent calls (${MAX_CONCURRENT_CALLS}) reached`);
717
+ return;
718
+ }
719
+
720
+ const geminiKey = telSettings.geminiApiKey || mainSettings.geminiApiKey;
721
+ if (!geminiKey) {
722
+ console.error("[telephony] Cannot handle inbound call — no Gemini API key");
723
+ return;
724
+ }
725
+
726
+ console.log(`[telephony] Handling inbound call from ${callerNumber} (UUID: ${uuid})`);
727
+
728
+ // Look up contact by phone number — normalize to E.164 first so peoplefone-style
729
+ // national caller-IDs ("06508920611") match contacts stored as "+436508920611".
730
+ // Country code comes from explicit settings, else parsed from the first enabled trunk's callerId.
731
+ const firstTrunkCallerId = telSettings.trunks.find(t => t.enabled)?.callerId;
732
+ const defaultCountryCode = telSettings.defaultCountryCode || extractCountryCode(firstTrunkCallerId);
733
+ const normalizedCaller = normalizePhoneE164(callerNumber, defaultCountryCode);
734
+ const contact = telSettings.contacts.find(c =>
735
+ normalizePhoneE164(c.phone, defaultCountryCode) === normalizedCaller
736
+ ) || null;
737
+
738
+ const voice = telSettings.defaultInboundVoice || telSettings.defaultVoice || "Kore";
739
+ const prompt = contact?.script || telSettings.defaultInboundPrompt || "You are Hank, a helpful AI assistant answering the phone.";
740
+ const maxDuration = telSettings.maxCallDurationSeconds || 600;
741
+
742
+ // Build telephony-specific system prompt (include knowledge base for inbound)
743
+ const systemPrompt = buildTelephonyPrompt(prompt, callerNumber, contact, contact?.language || telSettings.defaultLanguage || "de", "inbound", telSettings.inboundKnowledgeBase, true);
744
+
745
+ // Create call state — already answered by dialplan
746
+ const callState: CallState = {
747
+ id: uuid,
748
+ phone: callerNumber,
749
+ prompt,
750
+ voice,
751
+ status: "active",
752
+ direction: "inbound",
753
+ trunkId: "",
754
+ callerId: callerNumber,
755
+ transcript: [],
756
+ summary: null,
757
+ durationSeconds: 0,
758
+ startedAt: Date.now(),
759
+ connectedAt: Date.now(), // Already connected (dialplan answered)
760
+ endedAt: null,
761
+ error: null,
762
+ listenMode: false,
763
+ };
764
+
765
+ // Create audio recorder for inbound call
766
+ const recorder = new AudioRecorder(uuid);
767
+
768
+ // Common transcript/status callbacks
769
+ const onTranscript = (entry: TranscriptEntry) => {
770
+ // Only persist final entries (see outbound onTranscript for rationale).
771
+ if (entry.isFinal) {
772
+ callState.transcript.push(entry);
773
+ }
774
+ this.emit({ type: "transcript", callId: uuid, entry });
775
+ this.broadcastTranscript(uuid, entry);
776
+ };
777
+ const onStatusChange = (status: CallState["status"]) => {
778
+ callState.status = status;
779
+ this.emit({ type: "status", callId: uuid, status });
780
+ };
781
+
782
+ // Decide engine
783
+ const pipelineSettings = getPipelineSettingsFrom(telSettings);
784
+ const chosenEngine = resolveEngine(pipelineSettings);
785
+ const lookupContact = lookupContactForCall("inbound", callerNumber, contact, telSettings.contacts, defaultCountryCode);
786
+
787
+ let engine: VoiceEngineSession;
788
+ if (chosenEngine === "pipeline") {
789
+ try {
790
+ const greeting = await prepareGreeting({ direction: "inbound", contact: lookupContact, pipelineSettings });
791
+ engine = await createPipelineEngine({
792
+ callId: uuid,
793
+ direction: "inbound",
794
+ remoteNumber: callerNumber,
795
+ contact: lookupContact,
796
+ systemPrompt,
797
+ telephonySettings: telSettings,
798
+ pipelineSettings,
799
+ greetingText: greeting.text,
800
+ greetingPcm: greeting.pcm,
801
+ onToolCall: (calls) => this.handleToolCalls(uuid, calls),
802
+ onTranscript,
803
+ onStatusChange,
804
+ });
805
+ } catch (err) {
806
+ console.error(`[telephony] Inbound pipeline failed, fallback to Gemini Live:`, err);
807
+ if (!pipelineSettings.fallbackToGeminiLive) {
808
+ callState.status = "failed";
809
+ callState.error = err instanceof Error ? err.message : "Pipeline init failed";
810
+ callState.endedAt = Date.now();
811
+ saveCall(callState);
812
+ return;
813
+ }
814
+ engine = createGeminiLiveEngine({
815
+ callId: uuid,
816
+ voice,
817
+ systemPrompt,
818
+ geminiKey,
819
+ telSettings,
820
+ tools: [...TELEPHONY_TOOL_DECLARATIONS, { googleSearch: {} }],
821
+ isInbound: true,
822
+ onTranscript,
823
+ onStatusChange,
824
+ onToolCall: (calls) => this.handleToolCalls(uuid, calls),
825
+ });
826
+ }
827
+ } else {
828
+ engine = createGeminiLiveEngine({
829
+ callId: uuid,
830
+ voice,
831
+ systemPrompt,
832
+ geminiKey,
833
+ telSettings,
834
+ tools: [...TELEPHONY_TOOL_DECLARATIONS, { googleSearch: {} }],
835
+ isInbound: true,
836
+ onTranscript,
837
+ onStatusChange,
838
+ onToolCall: (calls) => this.handleToolCalls(uuid, calls),
839
+ });
840
+ }
841
+
842
+ // Wire AI audio → FreeSWITCH + recorder
843
+ engine.onAudio = (pcm8k: Uint8Array) => {
844
+ recorder.addAiAudio(pcm8k);
845
+ const call = this.activeCalls.get(uuid);
846
+ if (!call) return;
847
+ for (const ws of call.fsSockets) {
848
+ try { ws.send(pcm8k); } catch { /* socket closed */ }
849
+ }
850
+ };
851
+
852
+ // Hangup-after-turn-complete
853
+ let hangupScheduled = false;
854
+ if ("onTurnComplete" in engine) {
855
+ engine.onTurnComplete = () => {
856
+ const call = this.activeCalls.get(uuid);
857
+ if (call?.pendingHangup && !hangupScheduled) {
858
+ hangupScheduled = true;
859
+ console.log(`[telephony] Inbound call ${uuid}: turn complete after end_call — hanging up in 5s`);
860
+ this.trackTimer(uuid, setTimeout(() => this.endCall(uuid).catch(() => {}), 5000));
861
+ }
862
+ };
863
+ }
864
+
865
+ this.activeCalls.set(uuid, {
866
+ state: callState,
867
+ engine,
868
+ recorder,
869
+ fsSockets: new Set(),
870
+ transcriptSockets: new Set(),
871
+ listenSockets: new Set(),
872
+ listenMode: false,
873
+ pendingTimers: new Set(),
874
+ });
875
+
876
+ // If the FreeSWITCH audio fork already connected before we got here, attach it now.
877
+ this.flushPendingFsSockets(uuid);
878
+
879
+ // Connect engine
880
+ try {
881
+ await engine.connect();
882
+ console.log(`[telephony] Inbound call ${uuid}: ${engine.engineLabel} connected, waiting for audio fork`);
883
+ } catch (err) {
884
+ console.error(`[telephony] Inbound call ${uuid}: engine connection failed:`, err);
885
+ callState.status = "failed";
886
+ callState.error = err instanceof Error ? err.message : "Engine connection failed";
887
+ callState.endedAt = Date.now();
888
+ saveCall(callState);
889
+ this.activeCalls.delete(uuid);
890
+ return;
891
+ }
892
+
893
+ // Safety: auto-hangup after max duration
894
+ const timer = setTimeout(() => {
895
+ console.log(`[telephony] Inbound call ${uuid} hit max duration (${maxDuration}s), hanging up`);
896
+ this.endCall(uuid).catch(() => {});
897
+ }, maxDuration * 1000);
898
+
899
+ const call = this.activeCalls.get(uuid);
900
+ if (call) call.maxDurationTimer = timer;
901
+
902
+ // Safety: if no audio fork attaches within 30s AND no CHANNEL_HANGUP_COMPLETE
903
+ // arrives, treat as a ghost/orphaned channel and clean up. Prevents phantom
904
+ // "active" calls in the dashboard when FS drops the channel silently.
905
+ this.trackTimer(uuid, setTimeout(() => {
906
+ const c = this.activeCalls.get(uuid);
907
+ if (c && c.fsSockets.size === 0) {
908
+ console.warn(`[telephony] Inbound call ${uuid}: no audio fork within 30s — orphaned, cleaning up`);
909
+ this.endCall(uuid).catch(() => {});
910
+ }
911
+ }, 30000));
912
+
913
+ saveCall(callState);
914
+ this.emit({ type: "status", callId: uuid, status: "active" });
915
+ }
916
+
917
+ /** Start listening for inbound calls via ESL event subscription */
918
+ startInboundListener(): void {
919
+ const telSettings = getSettings();
920
+ if (!telSettings.inboundEnabled) {
921
+ console.log("[telephony] Inbound listener not started — inbound disabled");
922
+ return;
923
+ }
924
+
925
+ if (this.inboundSubscription) {
926
+ this.inboundSubscription.stop();
927
+ }
928
+
929
+ console.log("[telephony] Starting inbound call listener");
930
+ this.inboundSubscription = eslSubscribe(
931
+ telSettings.freeswitch,
932
+ ({ uuid, callerNumber }) => {
933
+ this.handleInboundCall(uuid, callerNumber).catch(err => {
934
+ console.error("[telephony] Failed to handle inbound call:", err);
935
+ });
936
+ },
937
+ ({ uuid }) => {
938
+ // FS channel ended — clean up our call state so phantom "active" calls don't accumulate.
939
+ if (this.activeCalls.has(uuid)) {
940
+ console.log(`[telephony] CHANNEL_HANGUP_COMPLETE for ${uuid} — ending call`);
941
+ this.endCall(uuid).catch(err => {
942
+ console.error("[telephony] endCall on hangup failed:", err);
943
+ });
944
+ } else {
945
+ // Also clear any buffered audio-fork sockets for calls that never registered.
946
+ this.pendingFsSockets.get(uuid)?.sockets.forEach(ws => {
947
+ try { ws.close(1000, "channel hung up"); } catch { /* ignore */ }
948
+ });
949
+ const pending = this.pendingFsSockets.get(uuid);
950
+ if (pending) {
951
+ clearTimeout(pending.timer);
952
+ this.pendingFsSockets.delete(uuid);
953
+ }
954
+ }
955
+ },
956
+ );
957
+ }
958
+
959
+ /** Stop inbound listener */
960
+ stopInboundListener(): void {
961
+ if (this.inboundSubscription) {
962
+ this.inboundSubscription.stop();
963
+ this.inboundSubscription = null;
964
+ console.log("[telephony] Inbound listener stopped");
965
+ }
966
+ }
967
+
420
968
  /** Shutdown: end all calls */
421
969
  async shutdown(): Promise<void> {
970
+ this.stopInboundListener();
422
971
  const callIds = Array.from(this.activeCalls.keys());
423
972
  await Promise.all(callIds.map((id) => this.endCall(id)));
424
973
  }
@@ -426,31 +975,181 @@ export class CallManager {
426
975
 
427
976
  // ─── Prompt Builder ──────────────────────────────────────────────────────────
428
977
 
429
- function buildTelephonyPrompt(taskPrompt: string, phoneNumber: string): string {
430
- return `You are conducting a PHONE CALL. You are speaking to a real person on the telephone.
978
+ const LANGUAGE_NAMES: Record<string, string> = {
979
+ de: "German (Deutsch)",
980
+ en: "English",
981
+ fr: "French (Français)",
982
+ it: "Italian (Italiano)",
983
+ es: "Spanish (Español)",
984
+ pt: "Portuguese (Português)",
985
+ nl: "Dutch (Nederlands)",
986
+ pl: "Polish (Polski)",
987
+ cs: "Czech (Čeština)",
988
+ hu: "Hungarian (Magyar)",
989
+ ro: "Romanian (Română)",
990
+ tr: "Turkish (Türkçe)",
991
+ ru: "Russian (Русский)",
992
+ ja: "Japanese (日本語)",
993
+ zh: "Chinese (中文)",
994
+ ko: "Korean (한국어)",
995
+ ar: "Arabic (العربية)",
996
+ };
997
+
998
+ function buildTelephonyPrompt(taskPrompt: string, phoneNumber: string, contact?: TelephonyContact | null, language?: string, direction: "outbound" | "inbound" = "outbound", knowledgeBase?: string, useSavedScript: boolean = false): string {
999
+ const langCode = language || "de";
1000
+ const langName = LANGUAGE_NAMES[langCode] || langCode;
1001
+ const isInbound = direction === "inbound";
1002
+
1003
+ let prompt = `You are ${isInbound ? "ANSWERING" : "conducting"} a PHONE CALL. You are speaking to a real person on the telephone.
431
1004
 
432
1005
  YOUR TASK: ${taskPrompt}
433
1006
 
1007
+ LANGUAGE: You MUST speak ${langName}. All your responses must be in ${langName}. This is non-negotiable.
1008
+
434
1009
  CRITICAL PHONE CALL RULES:
435
1010
  - You are on a TELEPHONE. Speak naturally, briefly, and conversationally.
1011
+ - ${isInbound
1012
+ ? "Someone is calling YOU. Answer warmly and ask how you can help."
1013
+ : "When you receive the signal that the callee picked up, START SPEAKING IMMEDIATELY with your greeting. Do not wait for them to say something first."}
436
1014
  - Maximum 2-3 short sentences per response. Nobody likes long monologues on the phone.
437
- - Use natural filler words ("well", "I see", "right", "exactly") — this sounds human.
1015
+ - Use natural filler words appropriate for ${langName}.
438
1016
  - If you don't understand something, politely ask them to repeat.
439
- - When your task is complete, summarize the result and say goodbye politely.
1017
+ - When the conversation is complete, say goodbye politely.
440
1018
  - If asked directly whether you're an AI, be honest.
441
1019
  - NEVER mention that you are calling on behalf of "HeyHank" or a "platform" unless asked.
442
- - Speak in the same language as the person you're calling.
443
- - The phone number you're calling is: ${phoneNumber}
1020
+ - ${isInbound ? `The caller's phone number is: ${phoneNumber}` : `The phone number you're calling is: ${phoneNumber}`}`;
1021
+
1022
+ // Inject contact info
1023
+ if (contact) {
1024
+ prompt += `\n\nCONTACT INFO:\n- Name: ${contact.name}`;
1025
+ if (contact.notes) prompt += `\n- Notes: ${contact.notes}`;
1026
+ }
1027
+
1028
+ // Inject call script (simple markdown) or call flow (state machine) — only when explicitly requested
1029
+ if (useSavedScript && contact?.callFlow && contact.callFlow.nodes.length > 0) {
1030
+ prompt += `\n\n${buildCallFlowInstructions(contact.callFlow)}`;
1031
+ } else if (useSavedScript && contact?.script) {
1032
+ prompt += `\n\nCALL SCRIPT — THIS IS YOUR PRIMARY OBJECTIVE. Follow each step IN ORDER. Do NOT skip any steps. Do NOT end the call until you have completed the LAST step. After each question, WAIT for their response before moving to the next step:\n${contact.script}`;
1033
+ }
444
1034
 
445
- CALL FLOW:
1035
+ const hasScript = useSavedScript && ((contact?.callFlow && contact.callFlow.nodes.length > 0) || contact?.script);
1036
+
1037
+ if (!hasScript) {
1038
+ prompt += isInbound
1039
+ ? `\n\nDEFAULT INBOUND CALL FLOW:
1040
+ 1. Answer warmly (e.g., "Hello, how can I help you?")
1041
+ 2. Listen to what the caller needs
1042
+ 3. Help them to the best of your ability
1043
+ 4. If you cannot help, politely explain and suggest alternatives
1044
+ 5. When the caller is satisfied, thank them and say goodbye
1045
+ 6. Use the end_call tool when the conversation is finished`
1046
+ : `\n\nDEFAULT CALL FLOW:
446
1047
  1. Greet the person naturally (e.g., "Hello, good day!")
447
1048
  2. State your request concisely
448
1049
  3. Listen and respond to their questions
449
1050
  4. When done, thank them and say goodbye
450
- 5. Use the end_call tool when the conversation is finished
1051
+ 5. Use the end_call tool when the conversation is finished`;
1052
+ }
1053
+
1054
+ prompt += `\n\nYou also have access to Google Search if you need to look something up during the call.
1055
+
1056
+ HANGING UP — FOLLOW THESE RULES STRICTLY:
1057
+ - ${hasScript ? "You MUST complete EVERY step in the script before even thinking about hanging up." : "Complete your full task before hanging up."}
1058
+ - After your final goodbye, WAIT for the other person to respond or say goodbye back.
1059
+ - Only call end_call after a clear mutual goodbye — NEVER during or right after speaking.
1060
+ - If unsure whether to hang up, DON'T. Let the other person end the conversation naturally.
1061
+ - It is MUCH better to stay on the line too long than to hang up too early.`;
1062
+
1063
+ // Inject knowledge base for inbound calls
1064
+ if (knowledgeBase?.trim()) {
1065
+ prompt += `\n\nKNOWLEDGE BASE — Use this information to answer the caller's questions accurately:\n${knowledgeBase.trim()}`;
1066
+ // KB-grounded inbound calls tend to make the LLM repeat the same overview
1067
+ // ("X bietet … Webentwicklung, WordPress, E-Commerce …") in every turn.
1068
+ // The rules below force tighter, non-repetitive answers.
1069
+ prompt += `
1070
+
1071
+ ANTWORT-STIL (verbindlich, gilt für jede Antwort):
1072
+ - Maximal 2 kurze Sätze. Niemals Listen oder Aufzählungen vorlesen.
1073
+ - Wiederhole NICHT, was du in vorherigen Turns schon gesagt hast — auch nicht in umformulierter Form. Beantworte nur das, was gerade gefragt wurde.
1074
+ - Knüpfe direkt an die letzte Aussage des Anrufers an. Stelle bei Bedarf EINE konkrete Rückfrage statt mehrere Optionen aufzuzählen.
1075
+ - Die Knowledge Base ist Hintergrund — nicht jeden Turn wiederkäuen. Liefere nur die für die aktuelle Frage relevante Information.`;
1076
+ }
1077
+
1078
+ return prompt;
1079
+ }
1080
+
1081
+ /** Convert a CallFlow state machine into natural language instructions for Gemini */
1082
+ function buildCallFlowInstructions(flow: CallFlow): string {
1083
+ const lines: string[] = [];
1084
+ lines.push(`CALL FLOW: "${flow.name}"${flow.description ? ` — ${flow.description}` : ""}`);
1085
+ lines.push("Follow this conversation flow strictly. Each step is a state — transition based on the callee's response.\n");
1086
+
1087
+ // Find the start node
1088
+ const startNode = flow.nodes.find(n => n.type === "start");
1089
+ if (!startNode) {
1090
+ lines.push("(No start node defined — use your best judgment)");
1091
+ return lines.join("\n");
1092
+ }
1093
+
1094
+ // Build adjacency map
1095
+ const edgeMap = new Map<string, typeof flow.edges>();
1096
+ for (const edge of flow.edges) {
1097
+ const existing = edgeMap.get(edge.from) || [];
1098
+ existing.push(edge);
1099
+ edgeMap.set(edge.from, existing);
1100
+ }
1101
+
1102
+ // Walk the graph in BFS to produce ordered instructions
1103
+ const visited = new Set<string>();
1104
+ const queue = [startNode.id];
1105
+ let stepNum = 1;
1106
+
1107
+ while (queue.length > 0) {
1108
+ const nodeId = queue.shift()!;
1109
+ if (visited.has(nodeId)) continue;
1110
+ visited.add(nodeId);
1111
+
1112
+ const node = flow.nodes.find(n => n.id === nodeId);
1113
+ if (!node) continue;
1114
+
1115
+ const outEdges = edgeMap.get(nodeId) || [];
1116
+ const typeLabel = node.type === "say" ? "SAY" : node.type === "ask" ? "ASK" : node.type === "condition" ? "CHECK" : node.type === "action" ? "DO" : node.type === "end" ? "END" : "START";
1117
+
1118
+ lines.push(`Step ${stepNum} [${typeLabel}]: ${node.label}`);
1119
+ if (node.prompt) lines.push(` → ${node.prompt}`);
1120
+ if (node.expectedResponses?.length) {
1121
+ lines.push(` Listen for: ${node.expectedResponses.join(", ")}`);
1122
+ }
1123
+ if (node.actionTool) {
1124
+ lines.push(` Action: call ${node.actionTool}${node.actionArgs ? ` with ${JSON.stringify(node.actionArgs)}` : ""}`);
1125
+ }
1126
+
1127
+ if (outEdges.length > 0) {
1128
+ for (const edge of outEdges) {
1129
+ const target = flow.nodes.find(n => n.id === edge.to);
1130
+ const label = edge.label || edge.condition || "then";
1131
+ lines.push(` → If "${label}": go to "${target?.label || edge.to}"`);
1132
+ if (!visited.has(edge.to)) queue.push(edge.to);
1133
+ }
1134
+ }
1135
+
1136
+ if (node.type === "end") {
1137
+ lines.push(" → Call end_call to hang up.");
1138
+ }
1139
+
1140
+ lines.push("");
1141
+ stepNum++;
1142
+ }
1143
+
1144
+ // Substitute template variables
1145
+ let result = lines.join("\n");
1146
+ if (flow.variables) {
1147
+ for (const [key, value] of Object.entries(flow.variables)) {
1148
+ result = result.replace(new RegExp(`\\{\\{${key}\\}\\}`, "g"), value);
1149
+ }
1150
+ }
451
1151
 
452
- You also have access to Google Search if you need to look something up during the call.
453
- When the task is completed, call end_call to hang up.`;
1152
+ return result;
454
1153
  }
455
1154
 
456
1155
  // Export singleton