npm - heyhank - Versions diffs - 0.1.0 → 0.3.0 - Mend

heyhank 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

package/LICENSE +21 -0
package/README.md +83 -10
package/bin/cli.ts +7 -7
package/bin/ctl.ts +42 -42
package/dist/assets/{AgentsPage-BPhirnCe.js → AgentsPage-DqjDAcIw.js} +3 -3
package/dist/assets/AssistantPage-C50CQFSB.js +2 -0
package/dist/assets/BusinessPage-AY70tf1k.js +1 -0
package/dist/assets/{CronManager-DDbz-yiT.js → CronManager-Dt7LLuRr.js} +1 -1
package/dist/assets/HelpPage-tlGx7fQF.js +1 -0
package/dist/assets/{IntegrationsPage-CrOitCmJ.js → IntegrationsPage-B4XOuHXu.js} +1 -1
package/dist/assets/JarvisHUD-BDvuRd0I.js +120 -0
package/dist/assets/MediaPage-CofV9Rd-.js +1 -0
package/dist/assets/MemoryPage-Cj7FeqmJ.js +1 -0
package/dist/assets/{PlatformDashboard-Do6F0O2p.js → PlatformDashboard-B9kXAlH1.js} +1 -1
package/dist/assets/{Playground-Fc5cdc5p.js → Playground-Cka-pRkP.js} +1 -1
package/dist/assets/{ProcessPanel-CslEiZkI.js → ProcessPanel-BqhQgfYj.js} +1 -1
package/dist/assets/{PromptsPage-D2EhsdNO.js → PromptsPage-VveKc9uX.js} +2 -2
package/dist/assets/RunsPage-DXVEk0AZ.js +1 -0
package/dist/assets/{SandboxManager-a1AVI5q2.js → SandboxManager-DACcwfDF.js} +1 -1
package/dist/assets/SettingsPage-jfuQh8Tu.js +51 -0
package/dist/assets/SkillsMarketplace-DrigiApe.js +1 -0
package/dist/assets/SocialMediaPage-DOh3IPe8.js +10 -0
package/dist/assets/{TailscalePage-CHiFhZXF.js → TailscalePage-DLhJWATT.js} +1 -1
package/dist/assets/TelephonyPage-9C4C3_ot.js +9 -0
package/dist/assets/{TerminalPage-Drwyrnfd.js → TerminalPage-ChX-8Wu7.js} +1 -1
package/dist/assets/{gemini-live-client-C7rqAW7G.js → gemini-live-client-C70FEtX2.js} +11 -8
package/dist/assets/index-C6Q5UQHD.js +229 -0
package/dist/assets/index-ZxGXgiV3.css +32 -0
package/dist/assets/sw-register-BBYuk-kw.js +1 -0
package/dist/assets/text-chat-client-BSbLJerZ.js +2 -0
package/dist/assets/workbox-window.prod.es5-BBnX5xw4.js +2 -0
package/dist/index.html +2 -2
package/dist/sw.js +1 -1
package/dist/{workbox-d2a0910a.js → workbox-080c8b91.js} +1 -1
package/package.json +6 -1
package/server/agent-executor.ts +102 -2
package/server/agent-store.ts +3 -3
package/server/agent-types.ts +11 -0
package/server/assistant-store.ts +232 -6
package/server/auth-manager.ts +9 -0
package/server/cache-headers.ts +1 -1
package/server/calendar-service.ts +10 -0
package/server/ceo/document-store.ts +129 -0
package/server/ceo/finance-store.ts +343 -0
package/server/ceo/kpi-store.ts +208 -0
package/server/ceo/memory-import.ts +277 -0
package/server/ceo/news-store.ts +208 -0
package/server/ceo/template-store.ts +134 -0
package/server/ceo/time-tracking-store.ts +227 -0
package/server/claude-auth-monitor.ts +128 -0
package/server/claude-code-worker.ts +86 -0
package/server/claude-session-discovery.ts +74 -1
package/server/cli-launcher.ts +32 -10
package/server/codex-adapter.ts +2 -2
package/server/codex-ws-proxy.cjs +1 -1
package/server/container-manager.ts +4 -4
package/server/content-intelligence/content-engine.ts +1112 -0
package/server/content-intelligence/platform-knowledge.ts +870 -0
package/server/cron-store.ts +3 -3
package/server/embedding-service.ts +49 -0
package/server/event-bus-types.ts +13 -0
package/server/execution-store.ts +54 -1
package/server/federation/node-store.ts +5 -4
package/server/fs-utils.ts +28 -1
package/server/hank-notifications-store.ts +91 -0
package/server/hank-tool-executor.ts +1835 -0
package/server/hank-tools.ts +2107 -0
package/server/image-pull-manager.ts +2 -2
package/server/index.ts +25 -2
package/server/llm-providers-streaming.ts +541 -0
package/server/llm-providers.ts +12 -0
package/server/marketplace.ts +249 -0
package/server/mcp-registry.ts +158 -0
package/server/memory-service.ts +296 -0
package/server/obsidian-sync.ts +184 -0
package/server/provider-manager.ts +5 -2
package/server/provider-registry.ts +12 -0
package/server/reminder-scheduler.ts +37 -1
package/server/routes/agent-routes.ts +44 -1
package/server/routes/assistant-routes.ts +198 -5
package/server/routes/ceo-finance-kpi-routes.ts +167 -0
package/server/routes/ceo-news-time-routes.ts +137 -0
package/server/routes/ceo-routes.ts +99 -0
package/server/routes/content-routes.ts +116 -0
package/server/routes/email-routes.ts +147 -0
package/server/routes/env-routes.ts +3 -3
package/server/routes/fs-routes.ts +12 -9
package/server/routes/hank-chat-routes.ts +592 -0
package/server/routes/llm-routes.ts +12 -0
package/server/routes/marketplace-routes.ts +63 -0
package/server/routes/media-routes.ts +1 -1
package/server/routes/memory-routes.ts +127 -0
package/server/routes/platform-routes.ts +14 -675
package/server/routes/sandbox-routes.ts +1 -1
package/server/routes/settings-routes.ts +51 -1
package/server/routes/socialmedia-routes.ts +152 -2
package/server/routes/system-routes.ts +2 -2
package/server/routes/team-routes.ts +71 -0
package/server/routes/telephony-routes.ts +98 -18
package/server/routes.ts +36 -9
package/server/session-creation-service.ts +2 -2
package/server/session-orchestrator.ts +54 -2
package/server/session-types.ts +2 -0
package/server/settings-manager.ts +50 -2
package/server/skill-discovery.ts +68 -0
package/server/socialmedia/adapters/browser-adapter.ts +179 -0
package/server/socialmedia/adapters/postiz-adapter.ts +291 -14
package/server/socialmedia/manager.ts +234 -15
package/server/socialmedia/store.ts +51 -1
package/server/socialmedia/types.ts +35 -2
package/server/socialview/browser-manager.ts +150 -0
package/server/socialview/extractors.ts +1298 -0
package/server/socialview/image-describe.ts +188 -0
package/server/socialview/library.ts +119 -0
package/server/socialview/poster.ts +276 -0
package/server/socialview/routes.ts +371 -0
package/server/socialview/style-analyzer.ts +187 -0
package/server/socialview/style-profiles.ts +67 -0
package/server/socialview/types.ts +166 -0
package/server/socialview/vision.ts +127 -0
package/server/socialview/vnc-manager.ts +110 -0
package/server/style-injector.ts +135 -0
package/server/team-service.ts +239 -0
package/server/team-store.ts +75 -0
package/server/team-types.ts +52 -0
package/server/telephony/audio-bridge.ts +281 -35
package/server/telephony/audio-recorder.ts +132 -0
package/server/telephony/call-manager.ts +803 -104
package/server/telephony/call-types.ts +67 -1
package/server/telephony/esl-client.ts +319 -0
package/server/telephony/freeswitch-sync.ts +155 -0
package/server/telephony/phone-utils.ts +63 -0
package/server/telephony/telephony-store.ts +9 -8
package/server/url-validator.ts +82 -0
package/server/vault-markdown.ts +317 -0
package/server/vault-migration.ts +121 -0
package/server/vault-store.ts +466 -0
package/server/vault-watcher.ts +59 -0
package/server/vector-store.ts +210 -0
package/server/voice-pipeline/gemini-live-adapter.ts +97 -0
package/server/voice-pipeline/greeting-cache.ts +200 -0
package/server/voice-pipeline/manager.ts +249 -0
package/server/voice-pipeline/pipeline.ts +335 -0
package/server/voice-pipeline/providers/index.ts +47 -0
package/server/voice-pipeline/providers/llm-internal.ts +527 -0
package/server/voice-pipeline/providers/stt-google.ts +157 -0
package/server/voice-pipeline/providers/tts-google.ts +126 -0
package/server/voice-pipeline/types.ts +247 -0
package/server/ws-bridge-types.ts +6 -1
package/dist/assets/AssistantPage-DJ-cMQfb.js +0 -1
package/dist/assets/HelpPage-DMfkzERp.js +0 -1
package/dist/assets/MediaPage-CE5rdvkC.js +0 -1
package/dist/assets/RunsPage-C5BZF5Rx.js +0 -1
package/dist/assets/SettingsPage-DirhjQrJ.js +0 -51
package/dist/assets/SocialMediaPage-DBuM28vD.js +0 -1
package/dist/assets/TelephonyPage-x0VV0fOo.js +0 -1
package/dist/assets/index-C8M_PUmX.css +0 -32
package/dist/assets/index-CEqZnThB.js +0 -204
package/dist/assets/sw-register-LSSpj6RU.js +0 -1
package/dist/assets/workbox-window.prod.es5-BIl4cyR9.js +0 -2
package/server/socialmedia/adapters/ayrshare-adapter.ts +0 -169

package/server/telephony/audio-bridge.ts CHANGED Viewed

@@ -3,12 +3,120 @@
 // Gemini Live BidiGenerateContent API (16kHz PCM).
 // This is the core of the telephony system — no STT/TTS needed,
 // Gemini handles everything natively.
+//
+// Supports two backends:
+//   1. Google AI Studio (default) — API key auth, no regional control
+//   2. Vertex AI — Service account auth, regional endpoints (EU latency savings)
+//
+// Set GEMINI_BACKEND=vertexai to use Vertex AI. Requires:
+//   GCP_PROJECT_ID, GCP_LOCATION, GCP_SERVICE_ACCOUNT_KEY
 import type { CallState, TranscriptEntry } from "./call-types.js";
+import { GoogleAuth } from "google-auth-library";
-// Gemini Live WebSocket endpoint
-const GEMINI_WS_BASE = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
-const GEMINI_MODEL = "models/gemini-2.0-flash-live-001";
+// ─── Gemini Backend Configuration ─────────────────────────────────────────────
+// AI Studio uses gemini-3.1-flash-live-preview (latest live model)
+// Vertex AI uses gemini-live-2.5-flash-native-audio (only live model available on Vertex)
+const AISTUDIO_MODEL = "gemini-3.1-flash-live-preview";
+const VERTEXAI_MODEL = "gemini-live-2.5-flash-native-audio";
+// Google AI Studio endpoint (default — no regional control, traffic goes to US)
+const AISTUDIO_WS_BASE = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
+// Vertex AI endpoint template (regional — use europe-west4 for EU)
+const VERTEXAI_WS_TEMPLATE = "wss://{LOCATION}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent";
+// NOTE: proactive_audio and affective_dialog are available on gemini-live-2.5-flash-native-audio
+// (Vertex AI) but NOT on gemini-3.1-flash-live-preview (AI Studio).
+// TODO: Enable proactiveAudio and enableAffectiveDialog for Vertex AI backend.
+interface VertexAIOverrides {
+  enabled: boolean;
+  projectId?: string;
+  location?: string;
+  serviceAccountKey?: string;
+}
+/** Detect which backend to use (config overrides env vars) */
+function isVertexAI(overrides?: VertexAIOverrides): boolean {
+  if (overrides) return overrides.enabled;
+  return process.env.GEMINI_BACKEND === "vertexai";
+}
+/** Resolve a Vertex AI config value: config override → env var → default */
+function vertexVal(overrides: VertexAIOverrides | undefined, field: "projectId" | "location" | "serviceAccountKey"): string {
+  const envMap = { projectId: "GCP_PROJECT_ID", location: "GCP_LOCATION", serviceAccountKey: "GCP_SERVICE_ACCOUNT_KEY" };
+  const defaults = { projectId: "", location: "europe-west4", serviceAccountKey: "" };
+  return overrides?.[field] || process.env[envMap[field]] || defaults[field];
+}
+/** Build the WebSocket URL for Gemini Live */
+function getGeminiEndpoint(apiKey: string, overrides?: VertexAIOverrides): string {
+  if (isVertexAI(overrides)) {
+    const location = vertexVal(overrides, "location");
+    return VERTEXAI_WS_TEMPLATE.replace("{LOCATION}", location);
+  }
+  return `${AISTUDIO_WS_BASE}?key=${apiKey}`;
+}
+/** Build the model identifier for the setup message */
+function getModelId(overrides?: VertexAIOverrides): string {
+  if (isVertexAI(overrides)) {
+    const project = vertexVal(overrides, "projectId");
+    const location = vertexVal(overrides, "location");
+    if (!project) throw new Error("GCP Project ID is required for Vertex AI. Configure in Telephony Settings or set GCP_PROJECT_ID env var.");
+    return `projects/${project}/locations/${location}/publishers/google/models/${VERTEXAI_MODEL}`;
+  }
+  return `models/${AISTUDIO_MODEL}`;
+}
+// ─── Vertex AI Auth ───────────────────────────────────────────────────────────
+// Cache GoogleAuth instances per key file path
+const googleAuthCache = new Map<string, GoogleAuth>();
+/**
+ * Get a fresh OAuth2 access token for Vertex AI.
+ * Tokens are valid for ~60 minutes. We fetch a new one per call (per WebSocket
+ * connection) since phone calls rarely exceed 60 minutes.
+ */
+async function getVertexAIToken(overrides?: VertexAIOverrides): Promise<string> {
+  const keyFile = vertexVal(overrides, "serviceAccountKey");
+  if (!keyFile) {
+    throw new Error(
+      "Service account key file is required for Vertex AI. " +
+      "Configure in Telephony Settings or set GCP_SERVICE_ACCOUNT_KEY env var.",
+    );
+  }
+  let auth = googleAuthCache.get(keyFile);
+  if (!auth) {
+    auth = new GoogleAuth({
+      keyFile,
+      scopes: ["https://www.googleapis.com/auth/cloud-platform"],
+    });
+    googleAuthCache.set(keyFile, auth);
+  }
+  const client = await auth.getClient();
+  const tokenResponse = await client.getAccessToken();
+  if (!tokenResponse.token) {
+    throw new Error("Failed to obtain Vertex AI access token");
+  }
+  return tokenResponse.token;
+}
+// Log default backend on module load
+if (isVertexAI()) {
+  console.log(`[telephony] Default Gemini backend: Vertex AI`);
+  console.log(`[telephony] Default region: ${process.env.GCP_LOCATION || "europe-west4"}`);
+  console.log(`[telephony] GCP project: ${process.env.GCP_PROJECT_ID || "(not set — configure in Telephony Settings)"}`);
+} else {
+  console.log(`[telephony] Default Gemini backend: Google AI Studio (configure Vertex AI in Telephony Settings for EU routing)`);
+}
+// ─── AudioBridge ──────────────────────────────────────────────────────────────
 export interface AudioBridgeConfig {
   geminiApiKey: string;
@@ -18,6 +126,13 @@ export interface AudioBridgeConfig {
   onTranscript: (entry: TranscriptEntry) => void;
   onStatusChange: (status: CallState["status"]) => void;
   onToolCall: (calls: Array<{ id: string; name: string; args: Record<string, unknown> }>) => Promise<Array<{ id: string; name: string; response: unknown }>>;
+  // Vertex AI overrides (take precedence over env vars)
+  vertexAI?: {
+    enabled: boolean;
+    projectId?: string;
+    location?: string;
+    serviceAccountKey?: string;
+  };
 }
 /**
@@ -30,35 +145,73 @@ export class AudioBridge {
   private setupDone = false;
   private callId: string;
   private textBuffer = "";
+  private useVertex = false;
+  private backendLabel = "AI Studio";
+  // Audio chunk batching: accumulate ~100ms of 16kHz PCM before sending
+  // 16kHz × 2 bytes × 100ms = 3200 bytes per batch
+  private static readonly BATCH_BYTES = 3200;
+  private static readonly BATCH_FLUSH_MS = 100;
+  private audioBatchBuffer: Uint8Array[] = [];
+  private audioBatchSize = 0;
+  private audioBatchTimer: ReturnType<typeof setTimeout> | null = null;
   constructor(callId: string, config: AudioBridgeConfig) {
     this.callId = callId;
     this.config = config;
   }
-  /** Connect to Gemini Live API */
+  /** Connect to Gemini Live API (AI Studio or Vertex AI) */
   async connect(): Promise<void> {
-    const url = `${GEMINI_WS_BASE}?key=${this.config.geminiApiKey}`;
-    this.geminiWs = new WebSocket(url);
+    const vx = this.config.vertexAI;
+    this.useVertex = isVertexAI(vx);
+    const url = getGeminiEndpoint(this.config.geminiApiKey, vx);
+    this.backendLabel = this.useVertex ? `Vertex AI / ${vertexVal(vx, "location")}` : "AI Studio";
+    const useVertex = this.useVertex;
+    // For Vertex AI, we need a Bearer token instead of API key
+    if (useVertex) {
+      const location = vertexVal(vx, "location");
+      const token = await getVertexAIToken(vx);
+      console.log(`[telephony] Call ${this.callId}: connecting to Gemini (${this.backendLabel})`);
+      this.geminiWs = new WebSocket(url, {
+        headers: {
+          "Authorization": `Bearer ${token}`,
+        },
+      } as unknown as string[]);
+    } else {
+      console.log(`[telephony] Call ${this.callId}: connecting to Gemini (${this.backendLabel})`);
+      this.geminiWs = new WebSocket(url);
+    }
     return new Promise((resolve, reject) => {
       const timeout = setTimeout(() => {
-        reject(new Error("Gemini connection timeout"));
+        reject(new Error(`Gemini connection timeout (${this.backendLabel})`));
       }, 15000);
       this.geminiWs!.onopen = () => {
         // Send setup with telephony-optimized config
+        // Model ID format differs between AI Studio and Vertex AI
+        const modelId = getModelId(vx);
+        // Build generation config — thinkingConfig only supported on AI Studio model
+        const genConfig: Record<string, unknown> = {
+          responseModalities: ["AUDIO"],
+          speechConfig: {
+            voiceConfig: {
+              prebuiltVoiceConfig: { voiceName: this.config.voice },
+            },
+          },
+        };
+        if (!this.useVertex) {
+          // thinkingConfig not supported on gemini-live-2.5-flash-native-audio (Vertex AI)
+          genConfig.thinkingConfig = { thinkingLevel: "minimal" };
+        }
         this.geminiWs!.send(JSON.stringify({
           setup: {
-            model: GEMINI_MODEL,
-            generationConfig: {
-              responseModalities: ["AUDIO"],
-              speechConfig: {
-                voiceConfig: {
-                  prebuiltVoiceConfig: { voiceName: this.config.voice },
-                },
-              },
-            },
+            model: modelId,
+            generationConfig: genConfig,
             systemInstruction: {
               parts: [{ text: this.config.systemPrompt }],
             },
@@ -86,13 +239,18 @@ export class AudioBridge {
       this.geminiWs!.onerror = () => {
         clearTimeout(timeout);
-        reject(new Error("Gemini WebSocket error"));
+        reject(new Error(`Gemini WebSocket error (${this.backendLabel})`));
       };
       this.geminiWs!.onclose = () => {
+        const wasSettingUp = !this.setupDone;
         this.setupDone = false;
         this.flushTextBuffer();
         this.config.onStatusChange("ended");
+        if (wasSettingUp) {
+          clearTimeout(timeout);
+          reject(new Error(`Gemini WebSocket closed before setup completed (${this.backendLabel})`));
+        }
       };
     });
   }
@@ -122,7 +280,7 @@ export class AudioBridge {
       this.config.onStatusChange("active");
       this.config.onTranscript({
         speaker: "system",
-        text: "AI connected to call",
+        text: `AI connected to call (${this.backendLabel})`,
         isFinal: true,
         ts: Date.now(),
       });
@@ -151,7 +309,15 @@ export class AudioBridge {
         // Execute tools and send response back
         this.config.onToolCall(calls).then((responses) => {
           this.sendToolResponse(responses);
-        }).catch(() => {});
+        }).catch((err) => {
+          // Send error responses back to Gemini so it doesn't hang waiting for tool results
+          const errorResponses = calls.map((c) => ({
+            id: c.id,
+            name: c.name,
+            response: { error: `Tool call failed: ${err instanceof Error ? err.message : String(err)}` },
+          }));
+          this.sendToolResponse(errorResponses);
+        });
       }
       return;
     }
@@ -177,9 +343,10 @@ export class AudioBridge {
         });
       }
-      // Turn complete
+      // Turn complete — all audio for this turn has been sent
       if (content.turnComplete) {
         this.flushTextBuffer();
+        this.onTurnComplete();
         return;
       }
@@ -209,18 +376,75 @@ export class AudioBridge {
   /** Callback for when Gemini produces audio — override to send to FreeSWITCH */
   public onGeminiAudio: (base64Pcm: string) => void = () => {};
+  /** Callback for when Gemini finishes a turn (all audio sent) */
+  public onTurnComplete: () => void = () => {};
+  /**
+   * Send a text trigger to make Gemini start speaking immediately.
+   * Gemini Live API waits for user input before responding —
+   * this sends a "start now" text message to kick off the greeting.
+   */
+  sendTrigger(text: string): void {
+    if (!this.geminiWs || this.geminiWs.readyState !== WebSocket.OPEN || !this.setupDone) return;
+    this.geminiWs.send(JSON.stringify({
+      clientContent: {
+        turns: [{ role: "user", parts: [{ text }] }],
+        turnComplete: true,
+      },
+    }));
+  }
   /**
    * Feed audio from FreeSWITCH into Gemini.
-   * Input: raw PCM 8kHz 16-bit mono from mod_audio_fork
-   * Gemini expects: PCM 16kHz
+   * Input: raw PCM 8kHz 16-bit mono from mod_audio_fork.
+   * Upsamples to 16kHz and batches into ~100ms chunks before sending
+   * to reduce WebSocket message overhead.
    */
   sendCallerAudio(pcm8kHz: Buffer | Uint8Array): void {
     if (!this.geminiWs || this.geminiWs.readyState !== WebSocket.OPEN || !this.setupDone) return;
-    // Upsample 8kHz → 16kHz (simple linear interpolation)
+    // Upsample 8kHz → 16kHz (linear interpolation)
     const upsampled = upsample8to16(pcm8kHz);
-    const base64 = bufferToBase64(upsampled);
+    // Accumulate into batch buffer
+    this.audioBatchBuffer.push(upsampled);
+    this.audioBatchSize += upsampled.byteLength;
+    // Send when we have >= 100ms worth of audio (3200 bytes @ 16kHz 16-bit mono)
+    if (this.audioBatchSize >= AudioBridge.BATCH_BYTES) {
+      this.flushAudioBatch();
+    } else if (!this.audioBatchTimer) {
+      // Ensure we flush within 100ms even if not enough data arrives (e.g. silence/pause)
+      this.audioBatchTimer = setTimeout(() => this.flushAudioBatch(), AudioBridge.BATCH_FLUSH_MS);
+    }
+  }
+  /** Flush accumulated audio chunks as a single WebSocket message */
+  private flushAudioBatch(): void {
+    if (this.audioBatchTimer) {
+      clearTimeout(this.audioBatchTimer);
+      this.audioBatchTimer = null;
+    }
+    if (this.audioBatchBuffer.length === 0) return;
+    if (!this.geminiWs || this.geminiWs.readyState !== WebSocket.OPEN || !this.setupDone) {
+      this.audioBatchBuffer = [];
+      this.audioBatchSize = 0;
+      return;
+    }
+    // Concatenate all buffered chunks into one
+    const merged = new Uint8Array(this.audioBatchSize);
+    let offset = 0;
+    for (const chunk of this.audioBatchBuffer) {
+      merged.set(chunk, offset);
+      offset += chunk.byteLength;
+    }
+    this.audioBatchBuffer = [];
+    this.audioBatchSize = 0;
+    const base64 = bufferToBase64(merged);
     this.geminiWs.send(JSON.stringify({
       realtimeInput: {
         audio: {
@@ -249,6 +473,12 @@ export class AudioBridge {
   /** Disconnect from Gemini */
   disconnect(): void {
     this.flushTextBuffer();
+    // Flush any remaining audio before closing
+    this.flushAudioBatch();
+    if (this.audioBatchTimer) {
+      clearTimeout(this.audioBatchTimer);
+      this.audioBatchTimer = null;
+    }
     if (this.geminiWs) {
       this.geminiWs.onclose = null;
       this.geminiWs.close();
@@ -290,7 +520,7 @@ function upsample8to16(input: Buffer | Uint8Array): Uint8Array {
 /**
  * Downsample 24kHz/16kHz PCM to 8kHz for FreeSWITCH.
- * Takes every Nth sample (simple decimation).
+ * Applies a moving-average low-pass filter before decimation to prevent aliasing.
  */
 export function downsampleTo8k(input: Uint8Array, inputRate: number): Uint8Array {
   const ratio = inputRate / 8000;
@@ -300,24 +530,40 @@ export function downsampleTo8k(input: Uint8Array, inputRate: number): Uint8Array
   const output = new Uint8Array(outputSamples * 2);
   const outputView = new DataView(output.buffer);
+  // Moving-average window size matches decimation ratio for anti-aliasing
+  const filterSize = Math.ceil(ratio);
+  const halfFilter = Math.floor(filterSize / 2);
   for (let i = 0; i < outputSamples; i++) {
-    const srcIdx = Math.floor(i * ratio);
-    if (srcIdx * 2 + 1 < input.byteLength) {
-      const sample = inputView.getInt16(srcIdx * 2, true);
-      outputView.setInt16(i * 2, sample, true);
+    const srcIndex = Math.floor(i * ratio);
+    // Average over filterSize samples centered on srcIndex
+    let sum = 0;
+    let count = 0;
+    const start = Math.max(0, srcIndex - halfFilter);
+    const end = Math.min(inputSamples, srcIndex + halfFilter + 1);
+    for (let j = start; j < end; j++) {
+      sum += inputView.getInt16(j * 2, true);
+      count++;
     }
+    const sample = Math.max(-32768, Math.min(32767, Math.round(sum / count)));
+    outputView.setInt16(i * 2, sample, true);
   }
   return output;
 }
-/** Convert Uint8Array/Buffer to base64 string */
+/**
+ * Convert Uint8Array/Buffer to base64 string.
+ * NOTE: The Gemini Live BidiGenerateContent API currently only supports JSON
+ * WebSocket frames with base64-encoded audio. There is no binary/raw PCM
+ * transport mode available. The ~33% base64 overhead adds ~0.5ms encoding
+ * time per chunk — not critical, but worth revisiting if Google adds binary
+ * frame support in the future.
+ */
 function bufferToBase64(buf: Uint8Array): string {
-  let binary = "";
-  for (let i = 0; i < buf.byteLength; i++) {
-    binary += String.fromCharCode(buf[i]);
-  }
-  return btoa(binary);
+  return Buffer.from(buf).toString('base64');
 }
 /** Convert base64 string to Uint8Array */

package/server/telephony/audio-recorder.ts ADDED Viewed

@@ -0,0 +1,132 @@
+// ─── Audio Recorder ──────────────────────────────────────────────────────────
+// Records call audio as stereo WAV (caller = left channel, AI = right channel).
+// Both channels are 8kHz PCM 16-bit. The WAV is written on call end.
+import { writeFileSync, mkdirSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { homedir } from "node:os";
+const CALLS_DIR = join(homedir(), ".heyhank", "telephony", "calls");
+const SAMPLE_RATE = 8000;
+export class AudioRecorder {
+  private callerChunks: Uint8Array[] = [];
+  private aiChunks: Uint8Array[] = [];
+  private callerBytes = 0;
+  private aiBytes = 0;
+  private callId: string;
+  constructor(callId: string) {
+    this.callId = callId;
+  }
+  /** Record caller audio (8kHz PCM 16-bit mono from FreeSWITCH) */
+  addCallerAudio(pcm: Buffer | Uint8Array): void {
+    const chunk = new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength);
+    this.callerChunks.push(chunk);
+    this.callerBytes += chunk.byteLength;
+  }
+  /** Record AI audio (already downsampled to 8kHz PCM 16-bit mono) */
+  addAiAudio(pcm: Uint8Array): void {
+    this.aiChunks.push(pcm);
+    this.aiBytes += pcm.byteLength;
+  }
+  /** Write stereo WAV file and return the file path */
+  save(): string | null {
+    if (this.callerBytes === 0 && this.aiBytes === 0) {
+      return null;
+    }
+    if (!existsSync(CALLS_DIR)) {
+      mkdirSync(CALLS_DIR, { recursive: true });
+    }
+    // Merge chunks into contiguous buffers
+    const callerPcm = mergeChunks(this.callerChunks, this.callerBytes);
+    const aiPcm = mergeChunks(this.aiChunks, this.aiBytes);
+    // Interleave into stereo: caller = left, AI = right
+    // Both are 16-bit samples. The longer channel determines total length.
+    const callerSamples = callerPcm.byteLength / 2;
+    const aiSamples = aiPcm.byteLength / 2;
+    const totalSamples = Math.max(callerSamples, aiSamples);
+    // Stereo PCM: 2 channels * 2 bytes per sample * totalSamples
+    const stereoData = new Uint8Array(totalSamples * 4);
+    const stereoView = new DataView(stereoData.buffer);
+    const callerView = new DataView(callerPcm.buffer, callerPcm.byteOffset, callerPcm.byteLength);
+    const aiView = new DataView(aiPcm.buffer, aiPcm.byteOffset, aiPcm.byteLength);
+    for (let i = 0; i < totalSamples; i++) {
+      const callerSample = i < callerSamples ? callerView.getInt16(i * 2, true) : 0;
+      const aiSample = i < aiSamples ? aiView.getInt16(i * 2, true) : 0;
+      // Left = caller, Right = AI
+      stereoView.setInt16(i * 4, callerSample, true);
+      stereoView.setInt16(i * 4 + 2, aiSample, true);
+    }
+    // Build WAV file
+    const wav = buildWav(stereoData, SAMPLE_RATE, 2);
+    const filePath = join(CALLS_DIR, `${this.callId}.wav`);
+    writeFileSync(filePath, wav);
+    console.log(`[telephony] Saved call recording: ${filePath} (${(wav.byteLength / 1024).toFixed(0)} KB, ${(totalSamples / SAMPLE_RATE).toFixed(1)}s)`);
+    // Free memory
+    this.callerChunks = [];
+    this.aiChunks = [];
+    this.callerBytes = 0;
+    this.aiBytes = 0;
+    return filePath;
+  }
+}
+function mergeChunks(chunks: Uint8Array[], totalBytes: number): Uint8Array {
+  if (chunks.length === 0) return new Uint8Array(0);
+  if (chunks.length === 1) return chunks[0];
+  const merged = new Uint8Array(totalBytes);
+  let offset = 0;
+  for (const chunk of chunks) {
+    merged.set(chunk, offset);
+    offset += chunk.byteLength;
+  }
+  return merged;
+}
+/** Build a WAV file from raw PCM data */
+function buildWav(pcmData: Uint8Array, sampleRate: number, channels: number): Uint8Array {
+  const bitsPerSample = 16;
+  const byteRate = sampleRate * channels * (bitsPerSample / 8);
+  const blockAlign = channels * (bitsPerSample / 8);
+  const dataSize = pcmData.byteLength;
+  const headerSize = 44;
+  const fileSize = headerSize + dataSize;
+  const wav = new Uint8Array(fileSize);
+  const view = new DataView(wav.buffer);
+  // RIFF header
+  wav.set([0x52, 0x49, 0x46, 0x46], 0); // "RIFF"
+  view.setUint32(4, fileSize - 8, true);
+  wav.set([0x57, 0x41, 0x56, 0x45], 8); // "WAVE"
+  // fmt chunk
+  wav.set([0x66, 0x6D, 0x74, 0x20], 12); // "fmt "
+  view.setUint32(16, 16, true); // chunk size
+  view.setUint16(20, 1, true); // PCM format
+  view.setUint16(22, channels, true);
+  view.setUint32(24, sampleRate, true);
+  view.setUint32(28, byteRate, true);
+  view.setUint16(32, blockAlign, true);
+  view.setUint16(34, bitsPerSample, true);
+  // data chunk
+  wav.set([0x64, 0x61, 0x74, 0x61], 36); // "data"
+  view.setUint32(40, dataSize, true);
+  wav.set(pcmData, 44);
+  return wav;
+}