heyhank 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -10
  3. package/bin/cli.ts +7 -7
  4. package/bin/ctl.ts +42 -42
  5. package/dist/assets/{AgentsPage-BPhirnCe.js → AgentsPage-DqjDAcIw.js} +3 -3
  6. package/dist/assets/AssistantPage-C50CQFSB.js +2 -0
  7. package/dist/assets/BusinessPage-AY70tf1k.js +1 -0
  8. package/dist/assets/{CronManager-DDbz-yiT.js → CronManager-Dt7LLuRr.js} +1 -1
  9. package/dist/assets/HelpPage-tlGx7fQF.js +1 -0
  10. package/dist/assets/{IntegrationsPage-CrOitCmJ.js → IntegrationsPage-B4XOuHXu.js} +1 -1
  11. package/dist/assets/JarvisHUD-BDvuRd0I.js +120 -0
  12. package/dist/assets/MediaPage-CofV9Rd-.js +1 -0
  13. package/dist/assets/MemoryPage-Cj7FeqmJ.js +1 -0
  14. package/dist/assets/{PlatformDashboard-Do6F0O2p.js → PlatformDashboard-B9kXAlH1.js} +1 -1
  15. package/dist/assets/{Playground-Fc5cdc5p.js → Playground-Cka-pRkP.js} +1 -1
  16. package/dist/assets/{ProcessPanel-CslEiZkI.js → ProcessPanel-BqhQgfYj.js} +1 -1
  17. package/dist/assets/{PromptsPage-D2EhsdNO.js → PromptsPage-VveKc9uX.js} +2 -2
  18. package/dist/assets/RunsPage-DXVEk0AZ.js +1 -0
  19. package/dist/assets/{SandboxManager-a1AVI5q2.js → SandboxManager-DACcwfDF.js} +1 -1
  20. package/dist/assets/SettingsPage-jfuQh8Tu.js +51 -0
  21. package/dist/assets/SkillsMarketplace-DrigiApe.js +1 -0
  22. package/dist/assets/SocialMediaPage-DOh3IPe8.js +10 -0
  23. package/dist/assets/{TailscalePage-CHiFhZXF.js → TailscalePage-DLhJWATT.js} +1 -1
  24. package/dist/assets/TelephonyPage-9C4C3_ot.js +9 -0
  25. package/dist/assets/{TerminalPage-Drwyrnfd.js → TerminalPage-ChX-8Wu7.js} +1 -1
  26. package/dist/assets/{gemini-live-client-C7rqAW7G.js → gemini-live-client-C70FEtX2.js} +11 -8
  27. package/dist/assets/index-C6Q5UQHD.js +229 -0
  28. package/dist/assets/index-ZxGXgiV3.css +32 -0
  29. package/dist/assets/sw-register-BBYuk-kw.js +1 -0
  30. package/dist/assets/text-chat-client-BSbLJerZ.js +2 -0
  31. package/dist/assets/workbox-window.prod.es5-BBnX5xw4.js +2 -0
  32. package/dist/index.html +2 -2
  33. package/dist/sw.js +1 -1
  34. package/dist/{workbox-d2a0910a.js → workbox-080c8b91.js} +1 -1
  35. package/package.json +6 -1
  36. package/server/agent-executor.ts +102 -2
  37. package/server/agent-store.ts +3 -3
  38. package/server/agent-types.ts +11 -0
  39. package/server/assistant-store.ts +232 -6
  40. package/server/auth-manager.ts +9 -0
  41. package/server/cache-headers.ts +1 -1
  42. package/server/calendar-service.ts +10 -0
  43. package/server/ceo/document-store.ts +129 -0
  44. package/server/ceo/finance-store.ts +343 -0
  45. package/server/ceo/kpi-store.ts +208 -0
  46. package/server/ceo/memory-import.ts +277 -0
  47. package/server/ceo/news-store.ts +208 -0
  48. package/server/ceo/template-store.ts +134 -0
  49. package/server/ceo/time-tracking-store.ts +227 -0
  50. package/server/claude-auth-monitor.ts +128 -0
  51. package/server/claude-code-worker.ts +86 -0
  52. package/server/claude-session-discovery.ts +74 -1
  53. package/server/cli-launcher.ts +32 -10
  54. package/server/codex-adapter.ts +2 -2
  55. package/server/codex-ws-proxy.cjs +1 -1
  56. package/server/container-manager.ts +4 -4
  57. package/server/content-intelligence/content-engine.ts +1112 -0
  58. package/server/content-intelligence/platform-knowledge.ts +870 -0
  59. package/server/cron-store.ts +3 -3
  60. package/server/embedding-service.ts +49 -0
  61. package/server/event-bus-types.ts +13 -0
  62. package/server/execution-store.ts +54 -1
  63. package/server/federation/node-store.ts +5 -4
  64. package/server/fs-utils.ts +28 -1
  65. package/server/hank-notifications-store.ts +91 -0
  66. package/server/hank-tool-executor.ts +1835 -0
  67. package/server/hank-tools.ts +2107 -0
  68. package/server/image-pull-manager.ts +2 -2
  69. package/server/index.ts +25 -2
  70. package/server/llm-providers-streaming.ts +541 -0
  71. package/server/llm-providers.ts +12 -0
  72. package/server/marketplace.ts +249 -0
  73. package/server/mcp-registry.ts +158 -0
  74. package/server/memory-service.ts +296 -0
  75. package/server/obsidian-sync.ts +184 -0
  76. package/server/provider-manager.ts +5 -2
  77. package/server/provider-registry.ts +12 -0
  78. package/server/reminder-scheduler.ts +37 -1
  79. package/server/routes/agent-routes.ts +44 -1
  80. package/server/routes/assistant-routes.ts +198 -5
  81. package/server/routes/ceo-finance-kpi-routes.ts +167 -0
  82. package/server/routes/ceo-news-time-routes.ts +137 -0
  83. package/server/routes/ceo-routes.ts +99 -0
  84. package/server/routes/content-routes.ts +116 -0
  85. package/server/routes/email-routes.ts +147 -0
  86. package/server/routes/env-routes.ts +3 -3
  87. package/server/routes/fs-routes.ts +12 -9
  88. package/server/routes/hank-chat-routes.ts +592 -0
  89. package/server/routes/llm-routes.ts +12 -0
  90. package/server/routes/marketplace-routes.ts +63 -0
  91. package/server/routes/media-routes.ts +1 -1
  92. package/server/routes/memory-routes.ts +127 -0
  93. package/server/routes/platform-routes.ts +14 -675
  94. package/server/routes/sandbox-routes.ts +1 -1
  95. package/server/routes/settings-routes.ts +51 -1
  96. package/server/routes/socialmedia-routes.ts +152 -2
  97. package/server/routes/system-routes.ts +2 -2
  98. package/server/routes/team-routes.ts +71 -0
  99. package/server/routes/telephony-routes.ts +98 -18
  100. package/server/routes.ts +36 -9
  101. package/server/session-creation-service.ts +2 -2
  102. package/server/session-orchestrator.ts +54 -2
  103. package/server/session-types.ts +2 -0
  104. package/server/settings-manager.ts +50 -2
  105. package/server/skill-discovery.ts +68 -0
  106. package/server/socialmedia/adapters/browser-adapter.ts +179 -0
  107. package/server/socialmedia/adapters/postiz-adapter.ts +291 -14
  108. package/server/socialmedia/manager.ts +234 -15
  109. package/server/socialmedia/store.ts +51 -1
  110. package/server/socialmedia/types.ts +35 -2
  111. package/server/socialview/browser-manager.ts +150 -0
  112. package/server/socialview/extractors.ts +1298 -0
  113. package/server/socialview/image-describe.ts +188 -0
  114. package/server/socialview/library.ts +119 -0
  115. package/server/socialview/poster.ts +276 -0
  116. package/server/socialview/routes.ts +371 -0
  117. package/server/socialview/style-analyzer.ts +187 -0
  118. package/server/socialview/style-profiles.ts +67 -0
  119. package/server/socialview/types.ts +166 -0
  120. package/server/socialview/vision.ts +127 -0
  121. package/server/socialview/vnc-manager.ts +110 -0
  122. package/server/style-injector.ts +135 -0
  123. package/server/team-service.ts +239 -0
  124. package/server/team-store.ts +75 -0
  125. package/server/team-types.ts +52 -0
  126. package/server/telephony/audio-bridge.ts +281 -35
  127. package/server/telephony/audio-recorder.ts +132 -0
  128. package/server/telephony/call-manager.ts +803 -104
  129. package/server/telephony/call-types.ts +67 -1
  130. package/server/telephony/esl-client.ts +319 -0
  131. package/server/telephony/freeswitch-sync.ts +155 -0
  132. package/server/telephony/phone-utils.ts +63 -0
  133. package/server/telephony/telephony-store.ts +9 -8
  134. package/server/url-validator.ts +82 -0
  135. package/server/vault-markdown.ts +317 -0
  136. package/server/vault-migration.ts +121 -0
  137. package/server/vault-store.ts +466 -0
  138. package/server/vault-watcher.ts +59 -0
  139. package/server/vector-store.ts +210 -0
  140. package/server/voice-pipeline/gemini-live-adapter.ts +97 -0
  141. package/server/voice-pipeline/greeting-cache.ts +200 -0
  142. package/server/voice-pipeline/manager.ts +249 -0
  143. package/server/voice-pipeline/pipeline.ts +335 -0
  144. package/server/voice-pipeline/providers/index.ts +47 -0
  145. package/server/voice-pipeline/providers/llm-internal.ts +527 -0
  146. package/server/voice-pipeline/providers/stt-google.ts +157 -0
  147. package/server/voice-pipeline/providers/tts-google.ts +126 -0
  148. package/server/voice-pipeline/types.ts +247 -0
  149. package/server/ws-bridge-types.ts +6 -1
  150. package/dist/assets/AssistantPage-DJ-cMQfb.js +0 -1
  151. package/dist/assets/HelpPage-DMfkzERp.js +0 -1
  152. package/dist/assets/MediaPage-CE5rdvkC.js +0 -1
  153. package/dist/assets/RunsPage-C5BZF5Rx.js +0 -1
  154. package/dist/assets/SettingsPage-DirhjQrJ.js +0 -51
  155. package/dist/assets/SocialMediaPage-DBuM28vD.js +0 -1
  156. package/dist/assets/TelephonyPage-x0VV0fOo.js +0 -1
  157. package/dist/assets/index-C8M_PUmX.css +0 -32
  158. package/dist/assets/index-CEqZnThB.js +0 -204
  159. package/dist/assets/sw-register-LSSpj6RU.js +0 -1
  160. package/dist/assets/workbox-window.prod.es5-BIl4cyR9.js +0 -2
  161. package/server/socialmedia/adapters/ayrshare-adapter.ts +0 -169
@@ -3,12 +3,120 @@
3
3
  // Gemini Live BidiGenerateContent API (16kHz PCM).
4
4
  // This is the core of the telephony system — no STT/TTS needed,
5
5
  // Gemini handles everything natively.
6
+ //
7
+ // Supports two backends:
8
+ // 1. Google AI Studio (default) — API key auth, no regional control
9
+ // 2. Vertex AI — Service account auth, regional endpoints (EU latency savings)
10
+ //
11
+ // Set GEMINI_BACKEND=vertexai to use Vertex AI. Requires:
12
+ // GCP_PROJECT_ID, GCP_LOCATION, GCP_SERVICE_ACCOUNT_KEY
6
13
 
7
14
  import type { CallState, TranscriptEntry } from "./call-types.js";
15
+ import { GoogleAuth } from "google-auth-library";
8
16
 
9
- // Gemini Live WebSocket endpoint
10
- const GEMINI_WS_BASE = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
11
- const GEMINI_MODEL = "models/gemini-2.0-flash-live-001";
17
+ // ─── Gemini Backend Configuration ─────────────────────────────────────────────
18
+
19
+ // AI Studio uses gemini-3.1-flash-live-preview (latest live model)
20
+ // Vertex AI uses gemini-live-2.5-flash-native-audio (only live model available on Vertex)
21
+ const AISTUDIO_MODEL = "gemini-3.1-flash-live-preview";
22
+ const VERTEXAI_MODEL = "gemini-live-2.5-flash-native-audio";
23
+
24
+ // Google AI Studio endpoint (default — no regional control, traffic goes to US)
25
+ const AISTUDIO_WS_BASE = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
26
+
27
+ // Vertex AI endpoint template (regional — use europe-west4 for EU)
28
+ const VERTEXAI_WS_TEMPLATE = "wss://{LOCATION}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent";
29
+
30
+ // NOTE: proactive_audio and affective_dialog are available on gemini-live-2.5-flash-native-audio
31
+ // (Vertex AI) but NOT on gemini-3.1-flash-live-preview (AI Studio).
32
+ // TODO: Enable proactiveAudio and enableAffectiveDialog for Vertex AI backend.
33
+
34
+ interface VertexAIOverrides {
35
+ enabled: boolean;
36
+ projectId?: string;
37
+ location?: string;
38
+ serviceAccountKey?: string;
39
+ }
40
+
41
+ /** Detect which backend to use (config overrides env vars) */
42
+ function isVertexAI(overrides?: VertexAIOverrides): boolean {
43
+ if (overrides) return overrides.enabled;
44
+ return process.env.GEMINI_BACKEND === "vertexai";
45
+ }
46
+
47
+ /** Resolve a Vertex AI config value: config override → env var → default */
48
+ function vertexVal(overrides: VertexAIOverrides | undefined, field: "projectId" | "location" | "serviceAccountKey"): string {
49
+ const envMap = { projectId: "GCP_PROJECT_ID", location: "GCP_LOCATION", serviceAccountKey: "GCP_SERVICE_ACCOUNT_KEY" };
50
+ const defaults = { projectId: "", location: "europe-west4", serviceAccountKey: "" };
51
+ return overrides?.[field] || process.env[envMap[field]] || defaults[field];
52
+ }
53
+
54
+ /** Build the WebSocket URL for Gemini Live */
55
+ function getGeminiEndpoint(apiKey: string, overrides?: VertexAIOverrides): string {
56
+ if (isVertexAI(overrides)) {
57
+ const location = vertexVal(overrides, "location");
58
+ return VERTEXAI_WS_TEMPLATE.replace("{LOCATION}", location);
59
+ }
60
+ return `${AISTUDIO_WS_BASE}?key=${apiKey}`;
61
+ }
62
+
63
+ /** Build the model identifier for the setup message */
64
+ function getModelId(overrides?: VertexAIOverrides): string {
65
+ if (isVertexAI(overrides)) {
66
+ const project = vertexVal(overrides, "projectId");
67
+ const location = vertexVal(overrides, "location");
68
+ if (!project) throw new Error("GCP Project ID is required for Vertex AI. Configure in Telephony Settings or set GCP_PROJECT_ID env var.");
69
+ return `projects/${project}/locations/${location}/publishers/google/models/${VERTEXAI_MODEL}`;
70
+ }
71
+ return `models/${AISTUDIO_MODEL}`;
72
+ }
73
+
74
+ // ─── Vertex AI Auth ───────────────────────────────────────────────────────────
75
+
76
+ // Cache GoogleAuth instances per key file path
77
+ const googleAuthCache = new Map<string, GoogleAuth>();
78
+
79
+ /**
80
+ * Get a fresh OAuth2 access token for Vertex AI.
81
+ * Tokens are valid for ~60 minutes. We fetch a new one per call (per WebSocket
82
+ * connection) since phone calls rarely exceed 60 minutes.
83
+ */
84
+ async function getVertexAIToken(overrides?: VertexAIOverrides): Promise<string> {
85
+ const keyFile = vertexVal(overrides, "serviceAccountKey");
86
+ if (!keyFile) {
87
+ throw new Error(
88
+ "Service account key file is required for Vertex AI. " +
89
+ "Configure in Telephony Settings or set GCP_SERVICE_ACCOUNT_KEY env var.",
90
+ );
91
+ }
92
+
93
+ let auth = googleAuthCache.get(keyFile);
94
+ if (!auth) {
95
+ auth = new GoogleAuth({
96
+ keyFile,
97
+ scopes: ["https://www.googleapis.com/auth/cloud-platform"],
98
+ });
99
+ googleAuthCache.set(keyFile, auth);
100
+ }
101
+
102
+ const client = await auth.getClient();
103
+ const tokenResponse = await client.getAccessToken();
104
+ if (!tokenResponse.token) {
105
+ throw new Error("Failed to obtain Vertex AI access token");
106
+ }
107
+ return tokenResponse.token;
108
+ }
109
+
110
+ // Log default backend on module load
111
+ if (isVertexAI()) {
112
+ console.log(`[telephony] Default Gemini backend: Vertex AI`);
113
+ console.log(`[telephony] Default region: ${process.env.GCP_LOCATION || "europe-west4"}`);
114
+ console.log(`[telephony] GCP project: ${process.env.GCP_PROJECT_ID || "(not set — configure in Telephony Settings)"}`);
115
+ } else {
116
+ console.log(`[telephony] Default Gemini backend: Google AI Studio (configure Vertex AI in Telephony Settings for EU routing)`);
117
+ }
118
+
119
+ // ─── AudioBridge ──────────────────────────────────────────────────────────────
12
120
 
13
121
  export interface AudioBridgeConfig {
14
122
  geminiApiKey: string;
@@ -18,6 +126,13 @@ export interface AudioBridgeConfig {
18
126
  onTranscript: (entry: TranscriptEntry) => void;
19
127
  onStatusChange: (status: CallState["status"]) => void;
20
128
  onToolCall: (calls: Array<{ id: string; name: string; args: Record<string, unknown> }>) => Promise<Array<{ id: string; name: string; response: unknown }>>;
129
+ // Vertex AI overrides (take precedence over env vars)
130
+ vertexAI?: {
131
+ enabled: boolean;
132
+ projectId?: string;
133
+ location?: string;
134
+ serviceAccountKey?: string;
135
+ };
21
136
  }
22
137
 
23
138
  /**
@@ -30,35 +145,73 @@ export class AudioBridge {
30
145
  private setupDone = false;
31
146
  private callId: string;
32
147
  private textBuffer = "";
148
+ private useVertex = false;
149
+ private backendLabel = "AI Studio";
150
+
151
+ // Audio chunk batching: accumulate ~100ms of 16kHz PCM before sending
152
+ // 16kHz × 2 bytes × 100ms = 3200 bytes per batch
153
+ private static readonly BATCH_BYTES = 3200;
154
+ private static readonly BATCH_FLUSH_MS = 100;
155
+ private audioBatchBuffer: Uint8Array[] = [];
156
+ private audioBatchSize = 0;
157
+ private audioBatchTimer: ReturnType<typeof setTimeout> | null = null;
33
158
 
34
159
  constructor(callId: string, config: AudioBridgeConfig) {
35
160
  this.callId = callId;
36
161
  this.config = config;
37
162
  }
38
163
 
39
- /** Connect to Gemini Live API */
164
+ /** Connect to Gemini Live API (AI Studio or Vertex AI) */
40
165
  async connect(): Promise<void> {
41
- const url = `${GEMINI_WS_BASE}?key=${this.config.geminiApiKey}`;
42
- this.geminiWs = new WebSocket(url);
166
+ const vx = this.config.vertexAI;
167
+ this.useVertex = isVertexAI(vx);
168
+ const url = getGeminiEndpoint(this.config.geminiApiKey, vx);
169
+ this.backendLabel = this.useVertex ? `Vertex AI / ${vertexVal(vx, "location")}` : "AI Studio";
170
+ const useVertex = this.useVertex;
171
+
172
+ // For Vertex AI, we need a Bearer token instead of API key
173
+ if (useVertex) {
174
+ const location = vertexVal(vx, "location");
175
+ const token = await getVertexAIToken(vx);
176
+ console.log(`[telephony] Call ${this.callId}: connecting to Gemini (${this.backendLabel})`);
177
+ this.geminiWs = new WebSocket(url, {
178
+ headers: {
179
+ "Authorization": `Bearer ${token}`,
180
+ },
181
+ } as unknown as string[]);
182
+ } else {
183
+ console.log(`[telephony] Call ${this.callId}: connecting to Gemini (${this.backendLabel})`);
184
+ this.geminiWs = new WebSocket(url);
185
+ }
43
186
 
44
187
  return new Promise((resolve, reject) => {
45
188
  const timeout = setTimeout(() => {
46
- reject(new Error("Gemini connection timeout"));
189
+ reject(new Error(`Gemini connection timeout (${this.backendLabel})`));
47
190
  }, 15000);
48
191
 
49
192
  this.geminiWs!.onopen = () => {
50
193
  // Send setup with telephony-optimized config
194
+ // Model ID format differs between AI Studio and Vertex AI
195
+ const modelId = getModelId(vx);
196
+
197
+ // Build generation config — thinkingConfig only supported on AI Studio model
198
+ const genConfig: Record<string, unknown> = {
199
+ responseModalities: ["AUDIO"],
200
+ speechConfig: {
201
+ voiceConfig: {
202
+ prebuiltVoiceConfig: { voiceName: this.config.voice },
203
+ },
204
+ },
205
+ };
206
+ if (!this.useVertex) {
207
+ // thinkingConfig not supported on gemini-live-2.5-flash-native-audio (Vertex AI)
208
+ genConfig.thinkingConfig = { thinkingLevel: "minimal" };
209
+ }
210
+
51
211
  this.geminiWs!.send(JSON.stringify({
52
212
  setup: {
53
- model: GEMINI_MODEL,
54
- generationConfig: {
55
- responseModalities: ["AUDIO"],
56
- speechConfig: {
57
- voiceConfig: {
58
- prebuiltVoiceConfig: { voiceName: this.config.voice },
59
- },
60
- },
61
- },
213
+ model: modelId,
214
+ generationConfig: genConfig,
62
215
  systemInstruction: {
63
216
  parts: [{ text: this.config.systemPrompt }],
64
217
  },
@@ -86,13 +239,18 @@ export class AudioBridge {
86
239
 
87
240
  this.geminiWs!.onerror = () => {
88
241
  clearTimeout(timeout);
89
- reject(new Error("Gemini WebSocket error"));
242
+ reject(new Error(`Gemini WebSocket error (${this.backendLabel})`));
90
243
  };
91
244
 
92
245
  this.geminiWs!.onclose = () => {
246
+ const wasSettingUp = !this.setupDone;
93
247
  this.setupDone = false;
94
248
  this.flushTextBuffer();
95
249
  this.config.onStatusChange("ended");
250
+ if (wasSettingUp) {
251
+ clearTimeout(timeout);
252
+ reject(new Error(`Gemini WebSocket closed before setup completed (${this.backendLabel})`));
253
+ }
96
254
  };
97
255
  });
98
256
  }
@@ -122,7 +280,7 @@ export class AudioBridge {
122
280
  this.config.onStatusChange("active");
123
281
  this.config.onTranscript({
124
282
  speaker: "system",
125
- text: "AI connected to call",
283
+ text: `AI connected to call (${this.backendLabel})`,
126
284
  isFinal: true,
127
285
  ts: Date.now(),
128
286
  });
@@ -151,7 +309,15 @@ export class AudioBridge {
151
309
  // Execute tools and send response back
152
310
  this.config.onToolCall(calls).then((responses) => {
153
311
  this.sendToolResponse(responses);
154
- }).catch(() => {});
312
+ }).catch((err) => {
313
+ // Send error responses back to Gemini so it doesn't hang waiting for tool results
314
+ const errorResponses = calls.map((c) => ({
315
+ id: c.id,
316
+ name: c.name,
317
+ response: { error: `Tool call failed: ${err instanceof Error ? err.message : String(err)}` },
318
+ }));
319
+ this.sendToolResponse(errorResponses);
320
+ });
155
321
  }
156
322
  return;
157
323
  }
@@ -177,9 +343,10 @@ export class AudioBridge {
177
343
  });
178
344
  }
179
345
 
180
- // Turn complete
346
+ // Turn complete — all audio for this turn has been sent
181
347
  if (content.turnComplete) {
182
348
  this.flushTextBuffer();
349
+ this.onTurnComplete();
183
350
  return;
184
351
  }
185
352
 
@@ -209,18 +376,75 @@ export class AudioBridge {
209
376
  /** Callback for when Gemini produces audio — override to send to FreeSWITCH */
210
377
  public onGeminiAudio: (base64Pcm: string) => void = () => {};
211
378
 
379
+ /** Callback for when Gemini finishes a turn (all audio sent) */
380
+ public onTurnComplete: () => void = () => {};
381
+
382
+ /**
383
+ * Send a text trigger to make Gemini start speaking immediately.
384
+ * Gemini Live API waits for user input before responding —
385
+ * this sends a "start now" text message to kick off the greeting.
386
+ */
387
+ sendTrigger(text: string): void {
388
+ if (!this.geminiWs || this.geminiWs.readyState !== WebSocket.OPEN || !this.setupDone) return;
389
+ this.geminiWs.send(JSON.stringify({
390
+ clientContent: {
391
+ turns: [{ role: "user", parts: [{ text }] }],
392
+ turnComplete: true,
393
+ },
394
+ }));
395
+ }
396
+
212
397
  /**
213
398
  * Feed audio from FreeSWITCH into Gemini.
214
- * Input: raw PCM 8kHz 16-bit mono from mod_audio_fork
215
- * Gemini expects: PCM 16kHz
399
+ * Input: raw PCM 8kHz 16-bit mono from mod_audio_fork.
400
+ * Upsamples to 16kHz and batches into ~100ms chunks before sending
401
+ * to reduce WebSocket message overhead.
216
402
  */
217
403
  sendCallerAudio(pcm8kHz: Buffer | Uint8Array): void {
218
404
  if (!this.geminiWs || this.geminiWs.readyState !== WebSocket.OPEN || !this.setupDone) return;
219
405
 
220
- // Upsample 8kHz → 16kHz (simple linear interpolation)
406
+ // Upsample 8kHz → 16kHz (linear interpolation)
221
407
  const upsampled = upsample8to16(pcm8kHz);
222
- const base64 = bufferToBase64(upsampled);
223
408
 
409
+ // Accumulate into batch buffer
410
+ this.audioBatchBuffer.push(upsampled);
411
+ this.audioBatchSize += upsampled.byteLength;
412
+
413
+ // Send when we have >= 100ms worth of audio (3200 bytes @ 16kHz 16-bit mono)
414
+ if (this.audioBatchSize >= AudioBridge.BATCH_BYTES) {
415
+ this.flushAudioBatch();
416
+ } else if (!this.audioBatchTimer) {
417
+ // Ensure we flush within 100ms even if not enough data arrives (e.g. silence/pause)
418
+ this.audioBatchTimer = setTimeout(() => this.flushAudioBatch(), AudioBridge.BATCH_FLUSH_MS);
419
+ }
420
+ }
421
+
422
+ /** Flush accumulated audio chunks as a single WebSocket message */
423
+ private flushAudioBatch(): void {
424
+ if (this.audioBatchTimer) {
425
+ clearTimeout(this.audioBatchTimer);
426
+ this.audioBatchTimer = null;
427
+ }
428
+
429
+ if (this.audioBatchBuffer.length === 0) return;
430
+ if (!this.geminiWs || this.geminiWs.readyState !== WebSocket.OPEN || !this.setupDone) {
431
+ this.audioBatchBuffer = [];
432
+ this.audioBatchSize = 0;
433
+ return;
434
+ }
435
+
436
+ // Concatenate all buffered chunks into one
437
+ const merged = new Uint8Array(this.audioBatchSize);
438
+ let offset = 0;
439
+ for (const chunk of this.audioBatchBuffer) {
440
+ merged.set(chunk, offset);
441
+ offset += chunk.byteLength;
442
+ }
443
+
444
+ this.audioBatchBuffer = [];
445
+ this.audioBatchSize = 0;
446
+
447
+ const base64 = bufferToBase64(merged);
224
448
  this.geminiWs.send(JSON.stringify({
225
449
  realtimeInput: {
226
450
  audio: {
@@ -249,6 +473,12 @@ export class AudioBridge {
249
473
  /** Disconnect from Gemini */
250
474
  disconnect(): void {
251
475
  this.flushTextBuffer();
476
+ // Flush any remaining audio before closing
477
+ this.flushAudioBatch();
478
+ if (this.audioBatchTimer) {
479
+ clearTimeout(this.audioBatchTimer);
480
+ this.audioBatchTimer = null;
481
+ }
252
482
  if (this.geminiWs) {
253
483
  this.geminiWs.onclose = null;
254
484
  this.geminiWs.close();
@@ -290,7 +520,7 @@ function upsample8to16(input: Buffer | Uint8Array): Uint8Array {
290
520
 
291
521
  /**
292
522
  * Downsample 24kHz/16kHz PCM to 8kHz for FreeSWITCH.
293
- * Takes every Nth sample (simple decimation).
523
+ * Applies a moving-average low-pass filter before decimation to prevent aliasing.
294
524
  */
295
525
  export function downsampleTo8k(input: Uint8Array, inputRate: number): Uint8Array {
296
526
  const ratio = inputRate / 8000;
@@ -300,24 +530,40 @@ export function downsampleTo8k(input: Uint8Array, inputRate: number): Uint8Array
300
530
  const output = new Uint8Array(outputSamples * 2);
301
531
  const outputView = new DataView(output.buffer);
302
532
 
533
+ // Moving-average window size matches decimation ratio for anti-aliasing
534
+ const filterSize = Math.ceil(ratio);
535
+ const halfFilter = Math.floor(filterSize / 2);
536
+
303
537
  for (let i = 0; i < outputSamples; i++) {
304
- const srcIdx = Math.floor(i * ratio);
305
- if (srcIdx * 2 + 1 < input.byteLength) {
306
- const sample = inputView.getInt16(srcIdx * 2, true);
307
- outputView.setInt16(i * 2, sample, true);
538
+ const srcIndex = Math.floor(i * ratio);
539
+
540
+ // Average over filterSize samples centered on srcIndex
541
+ let sum = 0;
542
+ let count = 0;
543
+ const start = Math.max(0, srcIndex - halfFilter);
544
+ const end = Math.min(inputSamples, srcIndex + halfFilter + 1);
545
+ for (let j = start; j < end; j++) {
546
+ sum += inputView.getInt16(j * 2, true);
547
+ count++;
308
548
  }
549
+
550
+ const sample = Math.max(-32768, Math.min(32767, Math.round(sum / count)));
551
+ outputView.setInt16(i * 2, sample, true);
309
552
  }
310
553
 
311
554
  return output;
312
555
  }
313
556
 
314
- /** Convert Uint8Array/Buffer to base64 string */
557
+ /**
558
+ * Convert Uint8Array/Buffer to base64 string.
559
+ * NOTE: The Gemini Live BidiGenerateContent API currently only supports JSON
560
+ * WebSocket frames with base64-encoded audio. There is no binary/raw PCM
561
+ * transport mode available. The ~33% base64 overhead adds ~0.5ms encoding
562
+ * time per chunk — not critical, but worth revisiting if Google adds binary
563
+ * frame support in the future.
564
+ */
315
565
  function bufferToBase64(buf: Uint8Array): string {
316
- let binary = "";
317
- for (let i = 0; i < buf.byteLength; i++) {
318
- binary += String.fromCharCode(buf[i]);
319
- }
320
- return btoa(binary);
566
+ return Buffer.from(buf).toString('base64');
321
567
  }
322
568
 
323
569
  /** Convert base64 string to Uint8Array */
@@ -0,0 +1,132 @@
1
+ // ─── Audio Recorder ──────────────────────────────────────────────────────────
2
+ // Records call audio as stereo WAV (caller = left channel, AI = right channel).
3
+ // Both channels are 8kHz PCM 16-bit. The WAV is written on call end.
4
+
5
+ import { writeFileSync, mkdirSync, existsSync } from "node:fs";
6
+ import { join } from "node:path";
7
+ import { homedir } from "node:os";
8
+
9
+ const CALLS_DIR = join(homedir(), ".heyhank", "telephony", "calls");
10
+ const SAMPLE_RATE = 8000;
11
+
12
+ export class AudioRecorder {
13
+ private callerChunks: Uint8Array[] = [];
14
+ private aiChunks: Uint8Array[] = [];
15
+ private callerBytes = 0;
16
+ private aiBytes = 0;
17
+ private callId: string;
18
+
19
+ constructor(callId: string) {
20
+ this.callId = callId;
21
+ }
22
+
23
+ /** Record caller audio (8kHz PCM 16-bit mono from FreeSWITCH) */
24
+ addCallerAudio(pcm: Buffer | Uint8Array): void {
25
+ const chunk = new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength);
26
+ this.callerChunks.push(chunk);
27
+ this.callerBytes += chunk.byteLength;
28
+ }
29
+
30
+ /** Record AI audio (already downsampled to 8kHz PCM 16-bit mono) */
31
+ addAiAudio(pcm: Uint8Array): void {
32
+ this.aiChunks.push(pcm);
33
+ this.aiBytes += pcm.byteLength;
34
+ }
35
+
36
+ /** Write stereo WAV file and return the file path */
37
+ save(): string | null {
38
+ if (this.callerBytes === 0 && this.aiBytes === 0) {
39
+ return null;
40
+ }
41
+
42
+ if (!existsSync(CALLS_DIR)) {
43
+ mkdirSync(CALLS_DIR, { recursive: true });
44
+ }
45
+
46
+ // Merge chunks into contiguous buffers
47
+ const callerPcm = mergeChunks(this.callerChunks, this.callerBytes);
48
+ const aiPcm = mergeChunks(this.aiChunks, this.aiBytes);
49
+
50
+ // Interleave into stereo: caller = left, AI = right
51
+ // Both are 16-bit samples. The longer channel determines total length.
52
+ const callerSamples = callerPcm.byteLength / 2;
53
+ const aiSamples = aiPcm.byteLength / 2;
54
+ const totalSamples = Math.max(callerSamples, aiSamples);
55
+
56
+ // Stereo PCM: 2 channels * 2 bytes per sample * totalSamples
57
+ const stereoData = new Uint8Array(totalSamples * 4);
58
+ const stereoView = new DataView(stereoData.buffer);
59
+ const callerView = new DataView(callerPcm.buffer, callerPcm.byteOffset, callerPcm.byteLength);
60
+ const aiView = new DataView(aiPcm.buffer, aiPcm.byteOffset, aiPcm.byteLength);
61
+
62
+ for (let i = 0; i < totalSamples; i++) {
63
+ const callerSample = i < callerSamples ? callerView.getInt16(i * 2, true) : 0;
64
+ const aiSample = i < aiSamples ? aiView.getInt16(i * 2, true) : 0;
65
+ // Left = caller, Right = AI
66
+ stereoView.setInt16(i * 4, callerSample, true);
67
+ stereoView.setInt16(i * 4 + 2, aiSample, true);
68
+ }
69
+
70
+ // Build WAV file
71
+ const wav = buildWav(stereoData, SAMPLE_RATE, 2);
72
+ const filePath = join(CALLS_DIR, `${this.callId}.wav`);
73
+ writeFileSync(filePath, wav);
74
+
75
+ console.log(`[telephony] Saved call recording: ${filePath} (${(wav.byteLength / 1024).toFixed(0)} KB, ${(totalSamples / SAMPLE_RATE).toFixed(1)}s)`);
76
+
77
+ // Free memory
78
+ this.callerChunks = [];
79
+ this.aiChunks = [];
80
+ this.callerBytes = 0;
81
+ this.aiBytes = 0;
82
+
83
+ return filePath;
84
+ }
85
+ }
86
+
87
+ function mergeChunks(chunks: Uint8Array[], totalBytes: number): Uint8Array {
88
+ if (chunks.length === 0) return new Uint8Array(0);
89
+ if (chunks.length === 1) return chunks[0];
90
+ const merged = new Uint8Array(totalBytes);
91
+ let offset = 0;
92
+ for (const chunk of chunks) {
93
+ merged.set(chunk, offset);
94
+ offset += chunk.byteLength;
95
+ }
96
+ return merged;
97
+ }
98
+
99
+ /** Build a WAV file from raw PCM data */
100
+ function buildWav(pcmData: Uint8Array, sampleRate: number, channels: number): Uint8Array {
101
+ const bitsPerSample = 16;
102
+ const byteRate = sampleRate * channels * (bitsPerSample / 8);
103
+ const blockAlign = channels * (bitsPerSample / 8);
104
+ const dataSize = pcmData.byteLength;
105
+ const headerSize = 44;
106
+ const fileSize = headerSize + dataSize;
107
+
108
+ const wav = new Uint8Array(fileSize);
109
+ const view = new DataView(wav.buffer);
110
+
111
+ // RIFF header
112
+ wav.set([0x52, 0x49, 0x46, 0x46], 0); // "RIFF"
113
+ view.setUint32(4, fileSize - 8, true);
114
+ wav.set([0x57, 0x41, 0x56, 0x45], 8); // "WAVE"
115
+
116
+ // fmt chunk
117
+ wav.set([0x66, 0x6D, 0x74, 0x20], 12); // "fmt "
118
+ view.setUint32(16, 16, true); // chunk size
119
+ view.setUint16(20, 1, true); // PCM format
120
+ view.setUint16(22, channels, true);
121
+ view.setUint32(24, sampleRate, true);
122
+ view.setUint32(28, byteRate, true);
123
+ view.setUint16(32, blockAlign, true);
124
+ view.setUint16(34, bitsPerSample, true);
125
+
126
+ // data chunk
127
+ wav.set([0x64, 0x61, 0x74, 0x61], 36); // "data"
128
+ view.setUint32(40, dataSize, true);
129
+ wav.set(pcmData, 44);
130
+
131
+ return wav;
132
+ }