heyhank 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -10
- package/bin/cli.ts +7 -7
- package/bin/ctl.ts +42 -42
- package/dist/assets/{AgentsPage-BPhirnCe.js → AgentsPage-B-AAmsMK.js} +3 -3
- package/dist/assets/AssistantPage-BV1Mfwdt.js +2 -0
- package/dist/assets/BusinessPage-tLpNEz19.js +1 -0
- package/dist/assets/{CronManager-DDbz-yiT.js → CronManager-B-K_n3Jg.js} +1 -1
- package/dist/assets/HelpPage-Bhf_j6Xr.js +1 -0
- package/dist/assets/{IntegrationsPage-CrOitCmJ.js → IntegrationsPage-DAMjs9tM.js} +1 -1
- package/dist/assets/JarvisHUD-C_TGXCCn.js +120 -0
- package/dist/assets/MediaPage-C48HTTrt.js +1 -0
- package/dist/assets/MemoryPage-JkC-qtgp.js +1 -0
- package/dist/assets/{PlatformDashboard-Do6F0O2p.js → PlatformDashboard-AUo7tNnE.js} +1 -1
- package/dist/assets/{Playground-Fc5cdc5p.js → Playground-AzNMsRBL.js} +1 -1
- package/dist/assets/{ProcessPanel-CslEiZkI.js → ProcessPanel-DpE_2sX3.js} +1 -1
- package/dist/assets/{PromptsPage-D2EhsdNO.js → PromptsPage-C2RQOs6p.js} +2 -2
- package/dist/assets/RunsPage-B9UOyO79.js +1 -0
- package/dist/assets/{SandboxManager-a1AVI5q2.js → SandboxManager-jHvYjwfh.js} +1 -1
- package/dist/assets/SettingsPage-BBJax6gt.js +51 -0
- package/dist/assets/SkillsMarketplace-IjmjfdjD.js +1 -0
- package/dist/assets/SocialMediaPage-DoPZHhr2.js +10 -0
- package/dist/assets/{TailscalePage-CHiFhZXF.js → TailscalePage-DDEY7ckO.js} +1 -1
- package/dist/assets/TelephonyPage-OPNBZYKt.js +9 -0
- package/dist/assets/{TerminalPage-Drwyrnfd.js → TerminalPage-BjMbHHW3.js} +1 -1
- package/dist/assets/{gemini-live-client-C7rqAW7G.js → gemini-live-client-C70FEtX2.js} +11 -8
- package/dist/assets/{index-CEqZnThB.js → index-BgYM4wXw.js} +94 -93
- package/dist/assets/index-BkjSoVgn.css +32 -0
- package/dist/assets/sw-register-C7NOHtIu.js +1 -0
- package/dist/assets/text-chat-client-BSbLJerZ.js +2 -0
- package/dist/index.html +2 -2
- package/dist/sw.js +1 -1
- package/package.json +6 -1
- package/server/agent-executor.ts +37 -2
- package/server/agent-store.ts +3 -3
- package/server/agent-types.ts +11 -0
- package/server/assistant-store.ts +232 -6
- package/server/auth-manager.ts +9 -0
- package/server/cache-headers.ts +1 -1
- package/server/calendar-service.ts +10 -0
- package/server/ceo/document-store.ts +129 -0
- package/server/ceo/finance-store.ts +343 -0
- package/server/ceo/kpi-store.ts +208 -0
- package/server/ceo/memory-import.ts +277 -0
- package/server/ceo/news-store.ts +208 -0
- package/server/ceo/template-store.ts +134 -0
- package/server/ceo/time-tracking-store.ts +227 -0
- package/server/claude-auth-monitor.ts +128 -0
- package/server/claude-code-worker.ts +86 -0
- package/server/claude-session-discovery.ts +74 -1
- package/server/cli-launcher.ts +32 -10
- package/server/codex-adapter.ts +2 -2
- package/server/codex-ws-proxy.cjs +1 -1
- package/server/container-manager.ts +4 -4
- package/server/content-intelligence/content-engine.ts +1112 -0
- package/server/content-intelligence/platform-knowledge.ts +870 -0
- package/server/cron-store.ts +3 -3
- package/server/embedding-service.ts +49 -0
- package/server/event-bus-types.ts +13 -0
- package/server/federation/node-store.ts +5 -4
- package/server/fs-utils.ts +28 -1
- package/server/hank-notifications-store.ts +91 -0
- package/server/hank-tool-executor.ts +1835 -0
- package/server/hank-tools.ts +2107 -0
- package/server/image-pull-manager.ts +2 -2
- package/server/index.ts +25 -2
- package/server/llm-providers-streaming.ts +541 -0
- package/server/llm-providers.ts +12 -0
- package/server/marketplace.ts +249 -0
- package/server/mcp-registry.ts +158 -0
- package/server/memory-service.ts +296 -0
- package/server/obsidian-sync.ts +184 -0
- package/server/provider-manager.ts +5 -2
- package/server/provider-registry.ts +12 -0
- package/server/reminder-scheduler.ts +37 -1
- package/server/routes/agent-routes.ts +2 -1
- package/server/routes/assistant-routes.ts +198 -5
- package/server/routes/ceo-finance-kpi-routes.ts +167 -0
- package/server/routes/ceo-news-time-routes.ts +137 -0
- package/server/routes/ceo-routes.ts +99 -0
- package/server/routes/content-routes.ts +116 -0
- package/server/routes/email-routes.ts +147 -0
- package/server/routes/env-routes.ts +3 -3
- package/server/routes/fs-routes.ts +12 -9
- package/server/routes/hank-chat-routes.ts +592 -0
- package/server/routes/llm-routes.ts +12 -0
- package/server/routes/marketplace-routes.ts +63 -0
- package/server/routes/media-routes.ts +1 -1
- package/server/routes/memory-routes.ts +127 -0
- package/server/routes/platform-routes.ts +14 -675
- package/server/routes/sandbox-routes.ts +1 -1
- package/server/routes/settings-routes.ts +51 -1
- package/server/routes/socialmedia-routes.ts +152 -2
- package/server/routes/system-routes.ts +2 -2
- package/server/routes/team-routes.ts +71 -0
- package/server/routes/telephony-routes.ts +98 -18
- package/server/routes.ts +36 -9
- package/server/session-creation-service.ts +2 -2
- package/server/session-orchestrator.ts +54 -2
- package/server/session-types.ts +2 -0
- package/server/settings-manager.ts +50 -2
- package/server/skill-discovery.ts +68 -0
- package/server/socialmedia/adapters/browser-adapter.ts +179 -0
- package/server/socialmedia/adapters/postiz-adapter.ts +291 -14
- package/server/socialmedia/manager.ts +234 -15
- package/server/socialmedia/store.ts +51 -1
- package/server/socialmedia/types.ts +35 -2
- package/server/socialview/browser-manager.ts +150 -0
- package/server/socialview/extractors.ts +1298 -0
- package/server/socialview/image-describe.ts +188 -0
- package/server/socialview/library.ts +119 -0
- package/server/socialview/poster.ts +276 -0
- package/server/socialview/routes.ts +371 -0
- package/server/socialview/style-analyzer.ts +187 -0
- package/server/socialview/style-profiles.ts +67 -0
- package/server/socialview/types.ts +166 -0
- package/server/socialview/vision.ts +127 -0
- package/server/socialview/vnc-manager.ts +110 -0
- package/server/style-injector.ts +135 -0
- package/server/team-service.ts +239 -0
- package/server/team-store.ts +75 -0
- package/server/team-types.ts +52 -0
- package/server/telephony/audio-bridge.ts +281 -35
- package/server/telephony/audio-recorder.ts +132 -0
- package/server/telephony/call-manager.ts +803 -104
- package/server/telephony/call-types.ts +67 -1
- package/server/telephony/esl-client.ts +319 -0
- package/server/telephony/freeswitch-sync.ts +155 -0
- package/server/telephony/phone-utils.ts +63 -0
- package/server/telephony/telephony-store.ts +9 -8
- package/server/url-validator.ts +82 -0
- package/server/vault-markdown.ts +317 -0
- package/server/vault-migration.ts +121 -0
- package/server/vault-store.ts +466 -0
- package/server/vault-watcher.ts +59 -0
- package/server/vector-store.ts +210 -0
- package/server/voice-pipeline/gemini-live-adapter.ts +97 -0
- package/server/voice-pipeline/greeting-cache.ts +200 -0
- package/server/voice-pipeline/manager.ts +249 -0
- package/server/voice-pipeline/pipeline.ts +335 -0
- package/server/voice-pipeline/providers/index.ts +47 -0
- package/server/voice-pipeline/providers/llm-internal.ts +527 -0
- package/server/voice-pipeline/providers/stt-google.ts +157 -0
- package/server/voice-pipeline/providers/tts-google.ts +126 -0
- package/server/voice-pipeline/types.ts +247 -0
- package/server/ws-bridge-types.ts +6 -1
- package/dist/assets/AssistantPage-DJ-cMQfb.js +0 -1
- package/dist/assets/HelpPage-DMfkzERp.js +0 -1
- package/dist/assets/MediaPage-CE5rdvkC.js +0 -1
- package/dist/assets/RunsPage-C5BZF5Rx.js +0 -1
- package/dist/assets/SettingsPage-DirhjQrJ.js +0 -51
- package/dist/assets/SocialMediaPage-DBuM28vD.js +0 -1
- package/dist/assets/TelephonyPage-x0VV0fOo.js +0 -1
- package/dist/assets/index-C8M_PUmX.css +0 -32
- package/dist/assets/sw-register-LSSpj6RU.js +0 -1
- package/server/socialmedia/adapters/ayrshare-adapter.ts +0 -169
|
@@ -3,12 +3,120 @@
|
|
|
3
3
|
// Gemini Live BidiGenerateContent API (16kHz PCM).
|
|
4
4
|
// This is the core of the telephony system — no STT/TTS needed,
|
|
5
5
|
// Gemini handles everything natively.
|
|
6
|
+
//
|
|
7
|
+
// Supports two backends:
|
|
8
|
+
// 1. Google AI Studio (default) — API key auth, no regional control
|
|
9
|
+
// 2. Vertex AI — Service account auth, regional endpoints (EU latency savings)
|
|
10
|
+
//
|
|
11
|
+
// Set GEMINI_BACKEND=vertexai to use Vertex AI. Requires:
|
|
12
|
+
// GCP_PROJECT_ID, GCP_LOCATION, GCP_SERVICE_ACCOUNT_KEY
|
|
6
13
|
|
|
7
14
|
import type { CallState, TranscriptEntry } from "./call-types.js";
|
|
15
|
+
import { GoogleAuth } from "google-auth-library";
|
|
8
16
|
|
|
9
|
-
// Gemini
|
|
10
|
-
|
|
11
|
-
|
|
17
|
+
// ─── Gemini Backend Configuration ─────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
// AI Studio uses gemini-3.1-flash-live-preview (latest live model)
|
|
20
|
+
// Vertex AI uses gemini-live-2.5-flash-native-audio (only live model available on Vertex)
|
|
21
|
+
const AISTUDIO_MODEL = "gemini-3.1-flash-live-preview";
|
|
22
|
+
const VERTEXAI_MODEL = "gemini-live-2.5-flash-native-audio";
|
|
23
|
+
|
|
24
|
+
// Google AI Studio endpoint (default — no regional control, traffic goes to US)
|
|
25
|
+
const AISTUDIO_WS_BASE = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
|
|
26
|
+
|
|
27
|
+
// Vertex AI endpoint template (regional — use europe-west4 for EU)
|
|
28
|
+
const VERTEXAI_WS_TEMPLATE = "wss://{LOCATION}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent";
|
|
29
|
+
|
|
30
|
+
// NOTE: proactive_audio and affective_dialog are available on gemini-live-2.5-flash-native-audio
|
|
31
|
+
// (Vertex AI) but NOT on gemini-3.1-flash-live-preview (AI Studio).
|
|
32
|
+
// TODO: Enable proactiveAudio and enableAffectiveDialog for Vertex AI backend.
|
|
33
|
+
|
|
34
|
+
interface VertexAIOverrides {
|
|
35
|
+
enabled: boolean;
|
|
36
|
+
projectId?: string;
|
|
37
|
+
location?: string;
|
|
38
|
+
serviceAccountKey?: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Detect which backend to use (config overrides env vars) */
|
|
42
|
+
function isVertexAI(overrides?: VertexAIOverrides): boolean {
|
|
43
|
+
if (overrides) return overrides.enabled;
|
|
44
|
+
return process.env.GEMINI_BACKEND === "vertexai";
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** Resolve a Vertex AI config value: config override → env var → default */
|
|
48
|
+
function vertexVal(overrides: VertexAIOverrides | undefined, field: "projectId" | "location" | "serviceAccountKey"): string {
|
|
49
|
+
const envMap = { projectId: "GCP_PROJECT_ID", location: "GCP_LOCATION", serviceAccountKey: "GCP_SERVICE_ACCOUNT_KEY" };
|
|
50
|
+
const defaults = { projectId: "", location: "europe-west4", serviceAccountKey: "" };
|
|
51
|
+
return overrides?.[field] || process.env[envMap[field]] || defaults[field];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Build the WebSocket URL for Gemini Live */
|
|
55
|
+
function getGeminiEndpoint(apiKey: string, overrides?: VertexAIOverrides): string {
|
|
56
|
+
if (isVertexAI(overrides)) {
|
|
57
|
+
const location = vertexVal(overrides, "location");
|
|
58
|
+
return VERTEXAI_WS_TEMPLATE.replace("{LOCATION}", location);
|
|
59
|
+
}
|
|
60
|
+
return `${AISTUDIO_WS_BASE}?key=${apiKey}`;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** Build the model identifier for the setup message */
|
|
64
|
+
function getModelId(overrides?: VertexAIOverrides): string {
|
|
65
|
+
if (isVertexAI(overrides)) {
|
|
66
|
+
const project = vertexVal(overrides, "projectId");
|
|
67
|
+
const location = vertexVal(overrides, "location");
|
|
68
|
+
if (!project) throw new Error("GCP Project ID is required for Vertex AI. Configure in Telephony Settings or set GCP_PROJECT_ID env var.");
|
|
69
|
+
return `projects/${project}/locations/${location}/publishers/google/models/${VERTEXAI_MODEL}`;
|
|
70
|
+
}
|
|
71
|
+
return `models/${AISTUDIO_MODEL}`;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ─── Vertex AI Auth ───────────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
// Cache GoogleAuth instances per key file path
|
|
77
|
+
const googleAuthCache = new Map<string, GoogleAuth>();
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Get a fresh OAuth2 access token for Vertex AI.
|
|
81
|
+
* Tokens are valid for ~60 minutes. We fetch a new one per call (per WebSocket
|
|
82
|
+
* connection) since phone calls rarely exceed 60 minutes.
|
|
83
|
+
*/
|
|
84
|
+
async function getVertexAIToken(overrides?: VertexAIOverrides): Promise<string> {
|
|
85
|
+
const keyFile = vertexVal(overrides, "serviceAccountKey");
|
|
86
|
+
if (!keyFile) {
|
|
87
|
+
throw new Error(
|
|
88
|
+
"Service account key file is required for Vertex AI. " +
|
|
89
|
+
"Configure in Telephony Settings or set GCP_SERVICE_ACCOUNT_KEY env var.",
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
let auth = googleAuthCache.get(keyFile);
|
|
94
|
+
if (!auth) {
|
|
95
|
+
auth = new GoogleAuth({
|
|
96
|
+
keyFile,
|
|
97
|
+
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
98
|
+
});
|
|
99
|
+
googleAuthCache.set(keyFile, auth);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const client = await auth.getClient();
|
|
103
|
+
const tokenResponse = await client.getAccessToken();
|
|
104
|
+
if (!tokenResponse.token) {
|
|
105
|
+
throw new Error("Failed to obtain Vertex AI access token");
|
|
106
|
+
}
|
|
107
|
+
return tokenResponse.token;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Log default backend on module load
|
|
111
|
+
if (isVertexAI()) {
|
|
112
|
+
console.log(`[telephony] Default Gemini backend: Vertex AI`);
|
|
113
|
+
console.log(`[telephony] Default region: ${process.env.GCP_LOCATION || "europe-west4"}`);
|
|
114
|
+
console.log(`[telephony] GCP project: ${process.env.GCP_PROJECT_ID || "(not set — configure in Telephony Settings)"}`);
|
|
115
|
+
} else {
|
|
116
|
+
console.log(`[telephony] Default Gemini backend: Google AI Studio (configure Vertex AI in Telephony Settings for EU routing)`);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ─── AudioBridge ──────────────────────────────────────────────────────────────
|
|
12
120
|
|
|
13
121
|
export interface AudioBridgeConfig {
|
|
14
122
|
geminiApiKey: string;
|
|
@@ -18,6 +126,13 @@ export interface AudioBridgeConfig {
|
|
|
18
126
|
onTranscript: (entry: TranscriptEntry) => void;
|
|
19
127
|
onStatusChange: (status: CallState["status"]) => void;
|
|
20
128
|
onToolCall: (calls: Array<{ id: string; name: string; args: Record<string, unknown> }>) => Promise<Array<{ id: string; name: string; response: unknown }>>;
|
|
129
|
+
// Vertex AI overrides (take precedence over env vars)
|
|
130
|
+
vertexAI?: {
|
|
131
|
+
enabled: boolean;
|
|
132
|
+
projectId?: string;
|
|
133
|
+
location?: string;
|
|
134
|
+
serviceAccountKey?: string;
|
|
135
|
+
};
|
|
21
136
|
}
|
|
22
137
|
|
|
23
138
|
/**
|
|
@@ -30,35 +145,73 @@ export class AudioBridge {
|
|
|
30
145
|
private setupDone = false;
|
|
31
146
|
private callId: string;
|
|
32
147
|
private textBuffer = "";
|
|
148
|
+
private useVertex = false;
|
|
149
|
+
private backendLabel = "AI Studio";
|
|
150
|
+
|
|
151
|
+
// Audio chunk batching: accumulate ~100ms of 16kHz PCM before sending
|
|
152
|
+
// 16kHz × 2 bytes × 100ms = 3200 bytes per batch
|
|
153
|
+
private static readonly BATCH_BYTES = 3200;
|
|
154
|
+
private static readonly BATCH_FLUSH_MS = 100;
|
|
155
|
+
private audioBatchBuffer: Uint8Array[] = [];
|
|
156
|
+
private audioBatchSize = 0;
|
|
157
|
+
private audioBatchTimer: ReturnType<typeof setTimeout> | null = null;
|
|
33
158
|
|
|
34
159
|
constructor(callId: string, config: AudioBridgeConfig) {
|
|
35
160
|
this.callId = callId;
|
|
36
161
|
this.config = config;
|
|
37
162
|
}
|
|
38
163
|
|
|
39
|
-
/** Connect to Gemini Live API */
|
|
164
|
+
/** Connect to Gemini Live API (AI Studio or Vertex AI) */
|
|
40
165
|
async connect(): Promise<void> {
|
|
41
|
-
const
|
|
42
|
-
this.
|
|
166
|
+
const vx = this.config.vertexAI;
|
|
167
|
+
this.useVertex = isVertexAI(vx);
|
|
168
|
+
const url = getGeminiEndpoint(this.config.geminiApiKey, vx);
|
|
169
|
+
this.backendLabel = this.useVertex ? `Vertex AI / ${vertexVal(vx, "location")}` : "AI Studio";
|
|
170
|
+
const useVertex = this.useVertex;
|
|
171
|
+
|
|
172
|
+
// For Vertex AI, we need a Bearer token instead of API key
|
|
173
|
+
if (useVertex) {
|
|
174
|
+
const location = vertexVal(vx, "location");
|
|
175
|
+
const token = await getVertexAIToken(vx);
|
|
176
|
+
console.log(`[telephony] Call ${this.callId}: connecting to Gemini (${this.backendLabel})`);
|
|
177
|
+
this.geminiWs = new WebSocket(url, {
|
|
178
|
+
headers: {
|
|
179
|
+
"Authorization": `Bearer ${token}`,
|
|
180
|
+
},
|
|
181
|
+
} as unknown as string[]);
|
|
182
|
+
} else {
|
|
183
|
+
console.log(`[telephony] Call ${this.callId}: connecting to Gemini (${this.backendLabel})`);
|
|
184
|
+
this.geminiWs = new WebSocket(url);
|
|
185
|
+
}
|
|
43
186
|
|
|
44
187
|
return new Promise((resolve, reject) => {
|
|
45
188
|
const timeout = setTimeout(() => {
|
|
46
|
-
reject(new Error(
|
|
189
|
+
reject(new Error(`Gemini connection timeout (${this.backendLabel})`));
|
|
47
190
|
}, 15000);
|
|
48
191
|
|
|
49
192
|
this.geminiWs!.onopen = () => {
|
|
50
193
|
// Send setup with telephony-optimized config
|
|
194
|
+
// Model ID format differs between AI Studio and Vertex AI
|
|
195
|
+
const modelId = getModelId(vx);
|
|
196
|
+
|
|
197
|
+
// Build generation config — thinkingConfig only supported on AI Studio model
|
|
198
|
+
const genConfig: Record<string, unknown> = {
|
|
199
|
+
responseModalities: ["AUDIO"],
|
|
200
|
+
speechConfig: {
|
|
201
|
+
voiceConfig: {
|
|
202
|
+
prebuiltVoiceConfig: { voiceName: this.config.voice },
|
|
203
|
+
},
|
|
204
|
+
},
|
|
205
|
+
};
|
|
206
|
+
if (!this.useVertex) {
|
|
207
|
+
// thinkingConfig not supported on gemini-live-2.5-flash-native-audio (Vertex AI)
|
|
208
|
+
genConfig.thinkingConfig = { thinkingLevel: "minimal" };
|
|
209
|
+
}
|
|
210
|
+
|
|
51
211
|
this.geminiWs!.send(JSON.stringify({
|
|
52
212
|
setup: {
|
|
53
|
-
model:
|
|
54
|
-
generationConfig:
|
|
55
|
-
responseModalities: ["AUDIO"],
|
|
56
|
-
speechConfig: {
|
|
57
|
-
voiceConfig: {
|
|
58
|
-
prebuiltVoiceConfig: { voiceName: this.config.voice },
|
|
59
|
-
},
|
|
60
|
-
},
|
|
61
|
-
},
|
|
213
|
+
model: modelId,
|
|
214
|
+
generationConfig: genConfig,
|
|
62
215
|
systemInstruction: {
|
|
63
216
|
parts: [{ text: this.config.systemPrompt }],
|
|
64
217
|
},
|
|
@@ -86,13 +239,18 @@ export class AudioBridge {
|
|
|
86
239
|
|
|
87
240
|
this.geminiWs!.onerror = () => {
|
|
88
241
|
clearTimeout(timeout);
|
|
89
|
-
reject(new Error(
|
|
242
|
+
reject(new Error(`Gemini WebSocket error (${this.backendLabel})`));
|
|
90
243
|
};
|
|
91
244
|
|
|
92
245
|
this.geminiWs!.onclose = () => {
|
|
246
|
+
const wasSettingUp = !this.setupDone;
|
|
93
247
|
this.setupDone = false;
|
|
94
248
|
this.flushTextBuffer();
|
|
95
249
|
this.config.onStatusChange("ended");
|
|
250
|
+
if (wasSettingUp) {
|
|
251
|
+
clearTimeout(timeout);
|
|
252
|
+
reject(new Error(`Gemini WebSocket closed before setup completed (${this.backendLabel})`));
|
|
253
|
+
}
|
|
96
254
|
};
|
|
97
255
|
});
|
|
98
256
|
}
|
|
@@ -122,7 +280,7 @@ export class AudioBridge {
|
|
|
122
280
|
this.config.onStatusChange("active");
|
|
123
281
|
this.config.onTranscript({
|
|
124
282
|
speaker: "system",
|
|
125
|
-
text:
|
|
283
|
+
text: `AI connected to call (${this.backendLabel})`,
|
|
126
284
|
isFinal: true,
|
|
127
285
|
ts: Date.now(),
|
|
128
286
|
});
|
|
@@ -151,7 +309,15 @@ export class AudioBridge {
|
|
|
151
309
|
// Execute tools and send response back
|
|
152
310
|
this.config.onToolCall(calls).then((responses) => {
|
|
153
311
|
this.sendToolResponse(responses);
|
|
154
|
-
}).catch(() => {
|
|
312
|
+
}).catch((err) => {
|
|
313
|
+
// Send error responses back to Gemini so it doesn't hang waiting for tool results
|
|
314
|
+
const errorResponses = calls.map((c) => ({
|
|
315
|
+
id: c.id,
|
|
316
|
+
name: c.name,
|
|
317
|
+
response: { error: `Tool call failed: ${err instanceof Error ? err.message : String(err)}` },
|
|
318
|
+
}));
|
|
319
|
+
this.sendToolResponse(errorResponses);
|
|
320
|
+
});
|
|
155
321
|
}
|
|
156
322
|
return;
|
|
157
323
|
}
|
|
@@ -177,9 +343,10 @@ export class AudioBridge {
|
|
|
177
343
|
});
|
|
178
344
|
}
|
|
179
345
|
|
|
180
|
-
// Turn complete
|
|
346
|
+
// Turn complete — all audio for this turn has been sent
|
|
181
347
|
if (content.turnComplete) {
|
|
182
348
|
this.flushTextBuffer();
|
|
349
|
+
this.onTurnComplete();
|
|
183
350
|
return;
|
|
184
351
|
}
|
|
185
352
|
|
|
@@ -209,18 +376,75 @@ export class AudioBridge {
|
|
|
209
376
|
/** Callback for when Gemini produces audio — override to send to FreeSWITCH */
|
|
210
377
|
public onGeminiAudio: (base64Pcm: string) => void = () => {};
|
|
211
378
|
|
|
379
|
+
/** Callback for when Gemini finishes a turn (all audio sent) */
|
|
380
|
+
public onTurnComplete: () => void = () => {};
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Send a text trigger to make Gemini start speaking immediately.
|
|
384
|
+
* Gemini Live API waits for user input before responding —
|
|
385
|
+
* this sends a "start now" text message to kick off the greeting.
|
|
386
|
+
*/
|
|
387
|
+
sendTrigger(text: string): void {
|
|
388
|
+
if (!this.geminiWs || this.geminiWs.readyState !== WebSocket.OPEN || !this.setupDone) return;
|
|
389
|
+
this.geminiWs.send(JSON.stringify({
|
|
390
|
+
clientContent: {
|
|
391
|
+
turns: [{ role: "user", parts: [{ text }] }],
|
|
392
|
+
turnComplete: true,
|
|
393
|
+
},
|
|
394
|
+
}));
|
|
395
|
+
}
|
|
396
|
+
|
|
212
397
|
/**
|
|
213
398
|
* Feed audio from FreeSWITCH into Gemini.
|
|
214
|
-
* Input: raw PCM 8kHz 16-bit mono from mod_audio_fork
|
|
215
|
-
*
|
|
399
|
+
* Input: raw PCM 8kHz 16-bit mono from mod_audio_fork.
|
|
400
|
+
* Upsamples to 16kHz and batches into ~100ms chunks before sending
|
|
401
|
+
* to reduce WebSocket message overhead.
|
|
216
402
|
*/
|
|
217
403
|
sendCallerAudio(pcm8kHz: Buffer | Uint8Array): void {
|
|
218
404
|
if (!this.geminiWs || this.geminiWs.readyState !== WebSocket.OPEN || !this.setupDone) return;
|
|
219
405
|
|
|
220
|
-
// Upsample 8kHz → 16kHz (
|
|
406
|
+
// Upsample 8kHz → 16kHz (linear interpolation)
|
|
221
407
|
const upsampled = upsample8to16(pcm8kHz);
|
|
222
|
-
const base64 = bufferToBase64(upsampled);
|
|
223
408
|
|
|
409
|
+
// Accumulate into batch buffer
|
|
410
|
+
this.audioBatchBuffer.push(upsampled);
|
|
411
|
+
this.audioBatchSize += upsampled.byteLength;
|
|
412
|
+
|
|
413
|
+
// Send when we have >= 100ms worth of audio (3200 bytes @ 16kHz 16-bit mono)
|
|
414
|
+
if (this.audioBatchSize >= AudioBridge.BATCH_BYTES) {
|
|
415
|
+
this.flushAudioBatch();
|
|
416
|
+
} else if (!this.audioBatchTimer) {
|
|
417
|
+
// Ensure we flush within 100ms even if not enough data arrives (e.g. silence/pause)
|
|
418
|
+
this.audioBatchTimer = setTimeout(() => this.flushAudioBatch(), AudioBridge.BATCH_FLUSH_MS);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
/** Flush accumulated audio chunks as a single WebSocket message */
|
|
423
|
+
private flushAudioBatch(): void {
|
|
424
|
+
if (this.audioBatchTimer) {
|
|
425
|
+
clearTimeout(this.audioBatchTimer);
|
|
426
|
+
this.audioBatchTimer = null;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
if (this.audioBatchBuffer.length === 0) return;
|
|
430
|
+
if (!this.geminiWs || this.geminiWs.readyState !== WebSocket.OPEN || !this.setupDone) {
|
|
431
|
+
this.audioBatchBuffer = [];
|
|
432
|
+
this.audioBatchSize = 0;
|
|
433
|
+
return;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// Concatenate all buffered chunks into one
|
|
437
|
+
const merged = new Uint8Array(this.audioBatchSize);
|
|
438
|
+
let offset = 0;
|
|
439
|
+
for (const chunk of this.audioBatchBuffer) {
|
|
440
|
+
merged.set(chunk, offset);
|
|
441
|
+
offset += chunk.byteLength;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
this.audioBatchBuffer = [];
|
|
445
|
+
this.audioBatchSize = 0;
|
|
446
|
+
|
|
447
|
+
const base64 = bufferToBase64(merged);
|
|
224
448
|
this.geminiWs.send(JSON.stringify({
|
|
225
449
|
realtimeInput: {
|
|
226
450
|
audio: {
|
|
@@ -249,6 +473,12 @@ export class AudioBridge {
|
|
|
249
473
|
/** Disconnect from Gemini */
|
|
250
474
|
disconnect(): void {
|
|
251
475
|
this.flushTextBuffer();
|
|
476
|
+
// Flush any remaining audio before closing
|
|
477
|
+
this.flushAudioBatch();
|
|
478
|
+
if (this.audioBatchTimer) {
|
|
479
|
+
clearTimeout(this.audioBatchTimer);
|
|
480
|
+
this.audioBatchTimer = null;
|
|
481
|
+
}
|
|
252
482
|
if (this.geminiWs) {
|
|
253
483
|
this.geminiWs.onclose = null;
|
|
254
484
|
this.geminiWs.close();
|
|
@@ -290,7 +520,7 @@ function upsample8to16(input: Buffer | Uint8Array): Uint8Array {
|
|
|
290
520
|
|
|
291
521
|
/**
|
|
292
522
|
* Downsample 24kHz/16kHz PCM to 8kHz for FreeSWITCH.
|
|
293
|
-
*
|
|
523
|
+
* Applies a moving-average low-pass filter before decimation to prevent aliasing.
|
|
294
524
|
*/
|
|
295
525
|
export function downsampleTo8k(input: Uint8Array, inputRate: number): Uint8Array {
|
|
296
526
|
const ratio = inputRate / 8000;
|
|
@@ -300,24 +530,40 @@ export function downsampleTo8k(input: Uint8Array, inputRate: number): Uint8Array
|
|
|
300
530
|
const output = new Uint8Array(outputSamples * 2);
|
|
301
531
|
const outputView = new DataView(output.buffer);
|
|
302
532
|
|
|
533
|
+
// Moving-average window size matches decimation ratio for anti-aliasing
|
|
534
|
+
const filterSize = Math.ceil(ratio);
|
|
535
|
+
const halfFilter = Math.floor(filterSize / 2);
|
|
536
|
+
|
|
303
537
|
for (let i = 0; i < outputSamples; i++) {
|
|
304
|
-
const
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
538
|
+
const srcIndex = Math.floor(i * ratio);
|
|
539
|
+
|
|
540
|
+
// Average over filterSize samples centered on srcIndex
|
|
541
|
+
let sum = 0;
|
|
542
|
+
let count = 0;
|
|
543
|
+
const start = Math.max(0, srcIndex - halfFilter);
|
|
544
|
+
const end = Math.min(inputSamples, srcIndex + halfFilter + 1);
|
|
545
|
+
for (let j = start; j < end; j++) {
|
|
546
|
+
sum += inputView.getInt16(j * 2, true);
|
|
547
|
+
count++;
|
|
308
548
|
}
|
|
549
|
+
|
|
550
|
+
const sample = Math.max(-32768, Math.min(32767, Math.round(sum / count)));
|
|
551
|
+
outputView.setInt16(i * 2, sample, true);
|
|
309
552
|
}
|
|
310
553
|
|
|
311
554
|
return output;
|
|
312
555
|
}
|
|
313
556
|
|
|
314
|
-
/**
|
|
557
|
+
/**
|
|
558
|
+
* Convert Uint8Array/Buffer to base64 string.
|
|
559
|
+
* NOTE: The Gemini Live BidiGenerateContent API currently only supports JSON
|
|
560
|
+
* WebSocket frames with base64-encoded audio. There is no binary/raw PCM
|
|
561
|
+
* transport mode available. The ~33% base64 overhead adds ~0.5ms encoding
|
|
562
|
+
* time per chunk — not critical, but worth revisiting if Google adds binary
|
|
563
|
+
* frame support in the future.
|
|
564
|
+
*/
|
|
315
565
|
function bufferToBase64(buf: Uint8Array): string {
|
|
316
|
-
|
|
317
|
-
for (let i = 0; i < buf.byteLength; i++) {
|
|
318
|
-
binary += String.fromCharCode(buf[i]);
|
|
319
|
-
}
|
|
320
|
-
return btoa(binary);
|
|
566
|
+
return Buffer.from(buf).toString('base64');
|
|
321
567
|
}
|
|
322
568
|
|
|
323
569
|
/** Convert base64 string to Uint8Array */
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
// ─── Audio Recorder ──────────────────────────────────────────────────────────
|
|
2
|
+
// Records call audio as stereo WAV (caller = left channel, AI = right channel).
|
|
3
|
+
// Both channels are 8kHz PCM 16-bit. The WAV is written on call end.
|
|
4
|
+
|
|
5
|
+
import { writeFileSync, mkdirSync, existsSync } from "node:fs";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
import { homedir } from "node:os";
|
|
8
|
+
|
|
9
|
+
const CALLS_DIR = join(homedir(), ".heyhank", "telephony", "calls");
|
|
10
|
+
const SAMPLE_RATE = 8000;
|
|
11
|
+
|
|
12
|
+
export class AudioRecorder {
|
|
13
|
+
private callerChunks: Uint8Array[] = [];
|
|
14
|
+
private aiChunks: Uint8Array[] = [];
|
|
15
|
+
private callerBytes = 0;
|
|
16
|
+
private aiBytes = 0;
|
|
17
|
+
private callId: string;
|
|
18
|
+
|
|
19
|
+
constructor(callId: string) {
|
|
20
|
+
this.callId = callId;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Record caller audio (8kHz PCM 16-bit mono from FreeSWITCH) */
|
|
24
|
+
addCallerAudio(pcm: Buffer | Uint8Array): void {
|
|
25
|
+
const chunk = new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength);
|
|
26
|
+
this.callerChunks.push(chunk);
|
|
27
|
+
this.callerBytes += chunk.byteLength;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/** Record AI audio (already downsampled to 8kHz PCM 16-bit mono) */
|
|
31
|
+
addAiAudio(pcm: Uint8Array): void {
|
|
32
|
+
this.aiChunks.push(pcm);
|
|
33
|
+
this.aiBytes += pcm.byteLength;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** Write stereo WAV file and return the file path */
|
|
37
|
+
save(): string | null {
|
|
38
|
+
if (this.callerBytes === 0 && this.aiBytes === 0) {
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (!existsSync(CALLS_DIR)) {
|
|
43
|
+
mkdirSync(CALLS_DIR, { recursive: true });
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Merge chunks into contiguous buffers
|
|
47
|
+
const callerPcm = mergeChunks(this.callerChunks, this.callerBytes);
|
|
48
|
+
const aiPcm = mergeChunks(this.aiChunks, this.aiBytes);
|
|
49
|
+
|
|
50
|
+
// Interleave into stereo: caller = left, AI = right
|
|
51
|
+
// Both are 16-bit samples. The longer channel determines total length.
|
|
52
|
+
const callerSamples = callerPcm.byteLength / 2;
|
|
53
|
+
const aiSamples = aiPcm.byteLength / 2;
|
|
54
|
+
const totalSamples = Math.max(callerSamples, aiSamples);
|
|
55
|
+
|
|
56
|
+
// Stereo PCM: 2 channels * 2 bytes per sample * totalSamples
|
|
57
|
+
const stereoData = new Uint8Array(totalSamples * 4);
|
|
58
|
+
const stereoView = new DataView(stereoData.buffer);
|
|
59
|
+
const callerView = new DataView(callerPcm.buffer, callerPcm.byteOffset, callerPcm.byteLength);
|
|
60
|
+
const aiView = new DataView(aiPcm.buffer, aiPcm.byteOffset, aiPcm.byteLength);
|
|
61
|
+
|
|
62
|
+
for (let i = 0; i < totalSamples; i++) {
|
|
63
|
+
const callerSample = i < callerSamples ? callerView.getInt16(i * 2, true) : 0;
|
|
64
|
+
const aiSample = i < aiSamples ? aiView.getInt16(i * 2, true) : 0;
|
|
65
|
+
// Left = caller, Right = AI
|
|
66
|
+
stereoView.setInt16(i * 4, callerSample, true);
|
|
67
|
+
stereoView.setInt16(i * 4 + 2, aiSample, true);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Build WAV file
|
|
71
|
+
const wav = buildWav(stereoData, SAMPLE_RATE, 2);
|
|
72
|
+
const filePath = join(CALLS_DIR, `${this.callId}.wav`);
|
|
73
|
+
writeFileSync(filePath, wav);
|
|
74
|
+
|
|
75
|
+
console.log(`[telephony] Saved call recording: ${filePath} (${(wav.byteLength / 1024).toFixed(0)} KB, ${(totalSamples / SAMPLE_RATE).toFixed(1)}s)`);
|
|
76
|
+
|
|
77
|
+
// Free memory
|
|
78
|
+
this.callerChunks = [];
|
|
79
|
+
this.aiChunks = [];
|
|
80
|
+
this.callerBytes = 0;
|
|
81
|
+
this.aiBytes = 0;
|
|
82
|
+
|
|
83
|
+
return filePath;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function mergeChunks(chunks: Uint8Array[], totalBytes: number): Uint8Array {
|
|
88
|
+
if (chunks.length === 0) return new Uint8Array(0);
|
|
89
|
+
if (chunks.length === 1) return chunks[0];
|
|
90
|
+
const merged = new Uint8Array(totalBytes);
|
|
91
|
+
let offset = 0;
|
|
92
|
+
for (const chunk of chunks) {
|
|
93
|
+
merged.set(chunk, offset);
|
|
94
|
+
offset += chunk.byteLength;
|
|
95
|
+
}
|
|
96
|
+
return merged;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** Build a WAV file from raw PCM data */
|
|
100
|
+
function buildWav(pcmData: Uint8Array, sampleRate: number, channels: number): Uint8Array {
|
|
101
|
+
const bitsPerSample = 16;
|
|
102
|
+
const byteRate = sampleRate * channels * (bitsPerSample / 8);
|
|
103
|
+
const blockAlign = channels * (bitsPerSample / 8);
|
|
104
|
+
const dataSize = pcmData.byteLength;
|
|
105
|
+
const headerSize = 44;
|
|
106
|
+
const fileSize = headerSize + dataSize;
|
|
107
|
+
|
|
108
|
+
const wav = new Uint8Array(fileSize);
|
|
109
|
+
const view = new DataView(wav.buffer);
|
|
110
|
+
|
|
111
|
+
// RIFF header
|
|
112
|
+
wav.set([0x52, 0x49, 0x46, 0x46], 0); // "RIFF"
|
|
113
|
+
view.setUint32(4, fileSize - 8, true);
|
|
114
|
+
wav.set([0x57, 0x41, 0x56, 0x45], 8); // "WAVE"
|
|
115
|
+
|
|
116
|
+
// fmt chunk
|
|
117
|
+
wav.set([0x66, 0x6D, 0x74, 0x20], 12); // "fmt "
|
|
118
|
+
view.setUint32(16, 16, true); // chunk size
|
|
119
|
+
view.setUint16(20, 1, true); // PCM format
|
|
120
|
+
view.setUint16(22, channels, true);
|
|
121
|
+
view.setUint32(24, sampleRate, true);
|
|
122
|
+
view.setUint32(28, byteRate, true);
|
|
123
|
+
view.setUint16(32, blockAlign, true);
|
|
124
|
+
view.setUint16(34, bitsPerSample, true);
|
|
125
|
+
|
|
126
|
+
// data chunk
|
|
127
|
+
wav.set([0x64, 0x61, 0x74, 0x61], 36); // "data"
|
|
128
|
+
view.setUint32(40, dataSize, true);
|
|
129
|
+
wav.set(pcmData, 44);
|
|
130
|
+
|
|
131
|
+
return wav;
|
|
132
|
+
}
|