heyhank 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -10
  3. package/bin/cli.ts +7 -7
  4. package/bin/ctl.ts +42 -42
  5. package/dist/assets/{AgentsPage-BPhirnCe.js → AgentsPage-B-AAmsMK.js} +3 -3
  6. package/dist/assets/AssistantPage-BV1Mfwdt.js +2 -0
  7. package/dist/assets/BusinessPage-tLpNEz19.js +1 -0
  8. package/dist/assets/{CronManager-DDbz-yiT.js → CronManager-B-K_n3Jg.js} +1 -1
  9. package/dist/assets/HelpPage-Bhf_j6Xr.js +1 -0
  10. package/dist/assets/{IntegrationsPage-CrOitCmJ.js → IntegrationsPage-DAMjs9tM.js} +1 -1
  11. package/dist/assets/JarvisHUD-C_TGXCCn.js +120 -0
  12. package/dist/assets/MediaPage-C48HTTrt.js +1 -0
  13. package/dist/assets/MemoryPage-JkC-qtgp.js +1 -0
  14. package/dist/assets/{PlatformDashboard-Do6F0O2p.js → PlatformDashboard-AUo7tNnE.js} +1 -1
  15. package/dist/assets/{Playground-Fc5cdc5p.js → Playground-AzNMsRBL.js} +1 -1
  16. package/dist/assets/{ProcessPanel-CslEiZkI.js → ProcessPanel-DpE_2sX3.js} +1 -1
  17. package/dist/assets/{PromptsPage-D2EhsdNO.js → PromptsPage-C2RQOs6p.js} +2 -2
  18. package/dist/assets/RunsPage-B9UOyO79.js +1 -0
  19. package/dist/assets/{SandboxManager-a1AVI5q2.js → SandboxManager-jHvYjwfh.js} +1 -1
  20. package/dist/assets/SettingsPage-BBJax6gt.js +51 -0
  21. package/dist/assets/SkillsMarketplace-IjmjfdjD.js +1 -0
  22. package/dist/assets/SocialMediaPage-DoPZHhr2.js +10 -0
  23. package/dist/assets/{TailscalePage-CHiFhZXF.js → TailscalePage-DDEY7ckO.js} +1 -1
  24. package/dist/assets/TelephonyPage-OPNBZYKt.js +9 -0
  25. package/dist/assets/{TerminalPage-Drwyrnfd.js → TerminalPage-BjMbHHW3.js} +1 -1
  26. package/dist/assets/{gemini-live-client-C7rqAW7G.js → gemini-live-client-C70FEtX2.js} +11 -8
  27. package/dist/assets/{index-CEqZnThB.js → index-BgYM4wXw.js} +94 -93
  28. package/dist/assets/index-BkjSoVgn.css +32 -0
  29. package/dist/assets/sw-register-C7NOHtIu.js +1 -0
  30. package/dist/assets/text-chat-client-BSbLJerZ.js +2 -0
  31. package/dist/index.html +2 -2
  32. package/dist/sw.js +1 -1
  33. package/package.json +6 -1
  34. package/server/agent-executor.ts +37 -2
  35. package/server/agent-store.ts +3 -3
  36. package/server/agent-types.ts +11 -0
  37. package/server/assistant-store.ts +232 -6
  38. package/server/auth-manager.ts +9 -0
  39. package/server/cache-headers.ts +1 -1
  40. package/server/calendar-service.ts +10 -0
  41. package/server/ceo/document-store.ts +129 -0
  42. package/server/ceo/finance-store.ts +343 -0
  43. package/server/ceo/kpi-store.ts +208 -0
  44. package/server/ceo/memory-import.ts +277 -0
  45. package/server/ceo/news-store.ts +208 -0
  46. package/server/ceo/template-store.ts +134 -0
  47. package/server/ceo/time-tracking-store.ts +227 -0
  48. package/server/claude-auth-monitor.ts +128 -0
  49. package/server/claude-code-worker.ts +86 -0
  50. package/server/claude-session-discovery.ts +74 -1
  51. package/server/cli-launcher.ts +32 -10
  52. package/server/codex-adapter.ts +2 -2
  53. package/server/codex-ws-proxy.cjs +1 -1
  54. package/server/container-manager.ts +4 -4
  55. package/server/content-intelligence/content-engine.ts +1112 -0
  56. package/server/content-intelligence/platform-knowledge.ts +870 -0
  57. package/server/cron-store.ts +3 -3
  58. package/server/embedding-service.ts +49 -0
  59. package/server/event-bus-types.ts +13 -0
  60. package/server/federation/node-store.ts +5 -4
  61. package/server/fs-utils.ts +28 -1
  62. package/server/hank-notifications-store.ts +91 -0
  63. package/server/hank-tool-executor.ts +1835 -0
  64. package/server/hank-tools.ts +2107 -0
  65. package/server/image-pull-manager.ts +2 -2
  66. package/server/index.ts +25 -2
  67. package/server/llm-providers-streaming.ts +541 -0
  68. package/server/llm-providers.ts +12 -0
  69. package/server/marketplace.ts +249 -0
  70. package/server/mcp-registry.ts +158 -0
  71. package/server/memory-service.ts +296 -0
  72. package/server/obsidian-sync.ts +184 -0
  73. package/server/provider-manager.ts +5 -2
  74. package/server/provider-registry.ts +12 -0
  75. package/server/reminder-scheduler.ts +37 -1
  76. package/server/routes/agent-routes.ts +2 -1
  77. package/server/routes/assistant-routes.ts +198 -5
  78. package/server/routes/ceo-finance-kpi-routes.ts +167 -0
  79. package/server/routes/ceo-news-time-routes.ts +137 -0
  80. package/server/routes/ceo-routes.ts +99 -0
  81. package/server/routes/content-routes.ts +116 -0
  82. package/server/routes/email-routes.ts +147 -0
  83. package/server/routes/env-routes.ts +3 -3
  84. package/server/routes/fs-routes.ts +12 -9
  85. package/server/routes/hank-chat-routes.ts +592 -0
  86. package/server/routes/llm-routes.ts +12 -0
  87. package/server/routes/marketplace-routes.ts +63 -0
  88. package/server/routes/media-routes.ts +1 -1
  89. package/server/routes/memory-routes.ts +127 -0
  90. package/server/routes/platform-routes.ts +14 -675
  91. package/server/routes/sandbox-routes.ts +1 -1
  92. package/server/routes/settings-routes.ts +51 -1
  93. package/server/routes/socialmedia-routes.ts +152 -2
  94. package/server/routes/system-routes.ts +2 -2
  95. package/server/routes/team-routes.ts +71 -0
  96. package/server/routes/telephony-routes.ts +98 -18
  97. package/server/routes.ts +36 -9
  98. package/server/session-creation-service.ts +2 -2
  99. package/server/session-orchestrator.ts +54 -2
  100. package/server/session-types.ts +2 -0
  101. package/server/settings-manager.ts +50 -2
  102. package/server/skill-discovery.ts +68 -0
  103. package/server/socialmedia/adapters/browser-adapter.ts +179 -0
  104. package/server/socialmedia/adapters/postiz-adapter.ts +291 -14
  105. package/server/socialmedia/manager.ts +234 -15
  106. package/server/socialmedia/store.ts +51 -1
  107. package/server/socialmedia/types.ts +35 -2
  108. package/server/socialview/browser-manager.ts +150 -0
  109. package/server/socialview/extractors.ts +1298 -0
  110. package/server/socialview/image-describe.ts +188 -0
  111. package/server/socialview/library.ts +119 -0
  112. package/server/socialview/poster.ts +276 -0
  113. package/server/socialview/routes.ts +371 -0
  114. package/server/socialview/style-analyzer.ts +187 -0
  115. package/server/socialview/style-profiles.ts +67 -0
  116. package/server/socialview/types.ts +166 -0
  117. package/server/socialview/vision.ts +127 -0
  118. package/server/socialview/vnc-manager.ts +110 -0
  119. package/server/style-injector.ts +135 -0
  120. package/server/team-service.ts +239 -0
  121. package/server/team-store.ts +75 -0
  122. package/server/team-types.ts +52 -0
  123. package/server/telephony/audio-bridge.ts +281 -35
  124. package/server/telephony/audio-recorder.ts +132 -0
  125. package/server/telephony/call-manager.ts +803 -104
  126. package/server/telephony/call-types.ts +67 -1
  127. package/server/telephony/esl-client.ts +319 -0
  128. package/server/telephony/freeswitch-sync.ts +155 -0
  129. package/server/telephony/phone-utils.ts +63 -0
  130. package/server/telephony/telephony-store.ts +9 -8
  131. package/server/url-validator.ts +82 -0
  132. package/server/vault-markdown.ts +317 -0
  133. package/server/vault-migration.ts +121 -0
  134. package/server/vault-store.ts +466 -0
  135. package/server/vault-watcher.ts +59 -0
  136. package/server/vector-store.ts +210 -0
  137. package/server/voice-pipeline/gemini-live-adapter.ts +97 -0
  138. package/server/voice-pipeline/greeting-cache.ts +200 -0
  139. package/server/voice-pipeline/manager.ts +249 -0
  140. package/server/voice-pipeline/pipeline.ts +335 -0
  141. package/server/voice-pipeline/providers/index.ts +47 -0
  142. package/server/voice-pipeline/providers/llm-internal.ts +527 -0
  143. package/server/voice-pipeline/providers/stt-google.ts +157 -0
  144. package/server/voice-pipeline/providers/tts-google.ts +126 -0
  145. package/server/voice-pipeline/types.ts +247 -0
  146. package/server/ws-bridge-types.ts +6 -1
  147. package/dist/assets/AssistantPage-DJ-cMQfb.js +0 -1
  148. package/dist/assets/HelpPage-DMfkzERp.js +0 -1
  149. package/dist/assets/MediaPage-CE5rdvkC.js +0 -1
  150. package/dist/assets/RunsPage-C5BZF5Rx.js +0 -1
  151. package/dist/assets/SettingsPage-DirhjQrJ.js +0 -51
  152. package/dist/assets/SocialMediaPage-DBuM28vD.js +0 -1
  153. package/dist/assets/TelephonyPage-x0VV0fOo.js +0 -1
  154. package/dist/assets/index-C8M_PUmX.css +0 -32
  155. package/dist/assets/sw-register-LSSpj6RU.js +0 -1
  156. package/server/socialmedia/adapters/ayrshare-adapter.ts +0 -169
@@ -0,0 +1,527 @@
1
+ // ─── Internal LLM Provider (provider-agnostic streaming + tool use) ──────────
2
+ // Wraps the existing HeyHank provider system but adds:
3
+ // - multi-turn message arrays (system + user/assistant)
4
+ // - streaming via SSE for low latency (Anthropic + OpenAI-compatible)
5
+ // - tool use support (Anthropic Tool Use + OpenAI Function Calling)
6
+ //
7
+ // Provider resolution:
8
+ // - voicePipeline.llm.provider (explicit, required) → provider-manager.getProviderConfig()
9
+ // - If the chosen provider has no API key / isn't configured → null (call-time error).
10
+ //
11
+ // Falls back to non-streaming `callInternalAI` if no streaming-capable provider.
12
+
13
+ import { callInternalAI } from "../../internal-ai.js";
14
+ import { getProviderConfig } from "../../provider-manager.js";
15
+ import { getProviderById } from "../../provider-registry.js";
16
+ import type {
17
+ LLMConfig,
18
+ LLMMessage,
19
+ LLMProvider,
20
+ LLMProviderId,
21
+ LLMStreamCallbacks,
22
+ LLMToolCall,
23
+ LLMToolDef,
24
+ LLMToolResult,
25
+ } from "../types.js";
26
+
27
+ // ─── Provider resolution ─────────────────────────────────────────────────────
28
+
29
+ const PROVIDER_BASE_URLS: Record<string, string> = {
30
+ openai: "https://api.openai.com/v1",
31
+ deepseek: "https://api.deepseek.com/v1",
32
+ mistral: "https://api.mistral.ai/v1",
33
+ together: "https://api.together.xyz/v1",
34
+ openrouter: "https://openrouter.ai/api/v1",
35
+ xai: "https://api.x.ai/v1",
36
+ groq: "https://api.groq.com/openai/v1",
37
+ huggingface: "https://api-inference.huggingface.co/v1",
38
+ venice: "https://api.venice.ai/api/v1",
39
+ minimax: "https://api.minimax.chat/v1",
40
+ moonshot: "https://api.moonshot.cn/v1",
41
+ qwen: "https://dashscope.aliyuncs.com/compatible-mode/v1",
42
+ "qwen-alibaba": "https://dashscope.aliyuncs.com/compatible-mode/v1",
43
+ chutes: "https://api.chutes.ai/v1",
44
+ zai: "https://open.bigmodel.cn/api/paas/v4",
45
+ };
46
+
47
+ const PROVIDER_DEFAULT_MODELS: Record<string, string> = {
48
+ anthropic: "claude-haiku-4-5-20251001",
49
+ openai: "gpt-4o-mini",
50
+ groq: "llama-3.3-70b-versatile",
51
+ mistral: "mistral-small-latest",
52
+ deepseek: "deepseek-chat",
53
+ together: "meta-llama/Llama-3.1-8B-Instruct-Turbo",
54
+ openrouter: "meta-llama/llama-3.1-8b-instruct:free",
55
+ xai: "grok-3-mini",
56
+ qwen: "qwen-turbo",
57
+ };
58
+
59
+ interface ResolvedProvider {
60
+ /** "anthropic" → Anthropic Messages API; everything else → OpenAI-compatible chat/completions */
61
+ kind: "anthropic" | "openai-compat";
62
+ providerId: string;
63
+ apiKey: string;
64
+ /** For openai-compat: base URL (without trailing slash, no /chat/completions suffix) */
65
+ baseUrl: string;
66
+ model: string;
67
+ }
68
+
69
+ function resolveProvider(providerId: string): ResolvedProvider | null {
70
+ return resolveById(providerId);
71
+ }
72
+
73
+ function resolveById(id: string): ResolvedProvider | null {
74
+ const def = getProviderById(id);
75
+ if (!def) return null;
76
+ const cfg = getProviderConfig(id);
77
+ if (!cfg) return null;
78
+
79
+ const secretField = def.envFields.find((f) => f.secret && f.required);
80
+ const apiKey = secretField ? (cfg.envValues[secretField.key] || "") : "";
81
+ if (!apiKey && id !== "ollama") return null;
82
+
83
+ const urlField = def.envFields.find((f) => f.key.includes("BASE_URL"));
84
+ let baseUrl = urlField ? (cfg.envValues[urlField.key] || "") : "";
85
+ if (!baseUrl) baseUrl = PROVIDER_BASE_URLS[id] || "";
86
+
87
+ const model = cfg.customModel || PROVIDER_DEFAULT_MODELS[id] || def.defaultModel || "";
88
+
89
+ if (id === "anthropic") {
90
+ return { kind: "anthropic", providerId: id, apiKey, baseUrl: "https://api.anthropic.com/v1", model };
91
+ }
92
+ if (!baseUrl || !model) return null;
93
+ // Strip trailing slash for consistency
94
+ return { kind: "openai-compat", providerId: id, apiKey, baseUrl: baseUrl.replace(/\/+$/, ""), model };
95
+ }
96
+
97
+ // ─── Anthropic streaming with tool use ───────────────────────────────────────
98
+
99
+ interface AssistantBlock {
100
+ type: "text" | "tool_use";
101
+ text?: string;
102
+ id?: string;
103
+ name?: string;
104
+ inputJson?: string;
105
+ }
106
+
107
+ function toAnthropicTools(tools: LLMToolDef[] | undefined): Array<Record<string, unknown>> | undefined {
108
+ if (!tools || tools.length === 0) return undefined;
109
+ return tools.map((t) => ({
110
+ name: t.name,
111
+ description: t.description,
112
+ input_schema: t.parameters,
113
+ }));
114
+ }
115
+
116
+ async function streamAnthropicOnce(
117
+ messages: LLMMessage[],
118
+ history: Array<{ role: string; content: unknown }>,
119
+ config: LLMConfig | undefined,
120
+ provider: ResolvedProvider,
121
+ onTextDelta: (chunk: string) => void,
122
+ ): Promise<{ blocks: AssistantBlock[]; stopReason: string | null; ok: boolean; error?: string }> {
123
+ const system = messages.find((m) => m.role === "system")?.content;
124
+ const body: Record<string, unknown> = {
125
+ model: config?.model || provider.model,
126
+ max_tokens: config?.maxTokens ?? 512,
127
+ messages: history,
128
+ temperature: config?.temperature ?? 0.6,
129
+ stream: true,
130
+ };
131
+ if (system) body.system = system;
132
+ const tools = toAnthropicTools(config?.tools);
133
+ if (tools) body.tools = tools;
134
+
135
+ const res = await fetch(`${provider.baseUrl}/messages`, {
136
+ method: "POST",
137
+ headers: {
138
+ "Content-Type": "application/json",
139
+ "x-api-key": provider.apiKey,
140
+ "anthropic-version": "2023-06-01",
141
+ },
142
+ body: JSON.stringify(body),
143
+ });
144
+
145
+ if (!res.ok || !res.body) {
146
+ const errText = res.body ? await res.text() : "";
147
+ return { blocks: [], stopReason: null, ok: false, error: `Anthropic stream error ${res.status}: ${errText.slice(0, 200)}` };
148
+ }
149
+
150
+ const blocks: AssistantBlock[] = [];
151
+ let stopReason: string | null = null;
152
+
153
+ const reader = res.body.getReader();
154
+ const decoder = new TextDecoder();
155
+ let buffer = "";
156
+
157
+ try {
158
+ while (true) {
159
+ const { value, done } = await reader.read();
160
+ if (done) break;
161
+ buffer += decoder.decode(value, { stream: true });
162
+
163
+ let idx;
164
+ while ((idx = buffer.indexOf("\n\n")) >= 0) {
165
+ const event = buffer.slice(0, idx);
166
+ buffer = buffer.slice(idx + 2);
167
+
168
+ const dataLine = event.split("\n").find((l) => l.startsWith("data:"));
169
+ if (!dataLine) continue;
170
+ const jsonStr = dataLine.slice(5).trim();
171
+ if (!jsonStr || jsonStr === "[DONE]") continue;
172
+
173
+ let data: Record<string, unknown>;
174
+ try { data = JSON.parse(jsonStr); } catch { continue; }
175
+
176
+ const type = data.type as string | undefined;
177
+ if (type === "content_block_start") {
178
+ const idxNum = data.index as number;
179
+ const cb = data.content_block as { type: string; id?: string; name?: string };
180
+ if (cb.type === "text") blocks[idxNum] = { type: "text", text: "" };
181
+ else if (cb.type === "tool_use") blocks[idxNum] = { type: "tool_use", id: cb.id, name: cb.name, inputJson: "" };
182
+ } else if (type === "content_block_delta") {
183
+ const idxNum = data.index as number;
184
+ const delta = data.delta as { type: string; text?: string; partial_json?: string };
185
+ const block = blocks[idxNum];
186
+ if (!block) continue;
187
+ if (delta.type === "text_delta" && delta.text) {
188
+ block.text = (block.text || "") + delta.text;
189
+ onTextDelta(delta.text);
190
+ } else if (delta.type === "input_json_delta" && delta.partial_json !== undefined) {
191
+ block.inputJson = (block.inputJson || "") + delta.partial_json;
192
+ }
193
+ } else if (type === "message_delta") {
194
+ const delta = data.delta as { stop_reason?: string };
195
+ if (delta.stop_reason) stopReason = delta.stop_reason;
196
+ }
197
+ }
198
+ }
199
+ } catch (e) {
200
+ return { blocks, stopReason, ok: false, error: e instanceof Error ? e.message : String(e) };
201
+ }
202
+
203
+ return { blocks, stopReason, ok: true };
204
+ }
205
+
206
+ function anthropicBlocksToContent(blocks: AssistantBlock[]): Array<Record<string, unknown>> {
207
+ return blocks
208
+ .filter((b) => b)
209
+ .map((b) => {
210
+ if (b.type === "text") return { type: "text", text: b.text || "" };
211
+ let parsed: Record<string, unknown> = {};
212
+ try { parsed = b.inputJson ? JSON.parse(b.inputJson) : {}; } catch { /* ignore */ }
213
+ return { type: "tool_use", id: b.id, name: b.name, input: parsed };
214
+ });
215
+ }
216
+
217
+ function anthropicExtractToolCalls(blocks: AssistantBlock[]): LLMToolCall[] {
218
+ const calls: LLMToolCall[] = [];
219
+ for (const b of blocks) {
220
+ if (!b || b.type !== "tool_use") continue;
221
+ let args: Record<string, unknown> = {};
222
+ try { args = b.inputJson ? JSON.parse(b.inputJson) : {}; } catch { /* ignore */ }
223
+ calls.push({ id: b.id || "", name: b.name || "", args });
224
+ }
225
+ return calls;
226
+ }
227
+
228
+ async function streamAnthropicWithTools(
229
+ messages: LLMMessage[],
230
+ callbacks: LLMStreamCallbacks,
231
+ config: LLMConfig | undefined,
232
+ provider: ResolvedProvider,
233
+ ): Promise<{ text: string; ok: boolean; error?: string }> {
234
+ const turns: Array<{ role: string; content: unknown }> = messages
235
+ .filter((m) => m.role !== "system")
236
+ .map((m) => ({ role: m.role, content: m.content }));
237
+
238
+ let fullText = "";
239
+ const maxIterations = 4;
240
+
241
+ for (let iter = 0; iter < maxIterations; iter++) {
242
+ const result = await streamAnthropicOnce(messages, turns, config, provider, (chunk) => {
243
+ fullText += chunk;
244
+ callbacks.onChunk(chunk);
245
+ });
246
+ if (!result.ok) return { text: fullText, ok: false, error: result.error };
247
+
248
+ const content = anthropicBlocksToContent(result.blocks);
249
+ if (content.length > 0) turns.push({ role: "assistant", content });
250
+
251
+ if (result.stopReason !== "tool_use") return { text: fullText, ok: true };
252
+
253
+ const toolCalls = anthropicExtractToolCalls(result.blocks);
254
+ if (toolCalls.length === 0 || !callbacks.onToolCalls) return { text: fullText, ok: true };
255
+
256
+ let toolResults: LLMToolResult[] = [];
257
+ try {
258
+ toolResults = await callbacks.onToolCalls(toolCalls);
259
+ } catch (e) {
260
+ console.error("[voice-pipeline] tool handler error:", e);
261
+ toolResults = toolCalls.map((c) => ({ id: c.id, name: c.name, response: { error: e instanceof Error ? e.message : String(e) } }));
262
+ }
263
+
264
+ // If the model called `end_call`, stop the loop. The goodbye text has
265
+ // already been spoken in this iteration — another LLM round would just
266
+ // produce a second redundant goodbye that gets TTS'd on top.
267
+ if (toolCalls.some((c) => c.name === "end_call")) {
268
+ return { text: fullText, ok: true };
269
+ }
270
+
271
+ turns.push({
272
+ role: "user",
273
+ content: toolResults.map((r) => ({
274
+ type: "tool_result",
275
+ tool_use_id: r.id,
276
+ content: typeof r.response === "string" ? r.response : JSON.stringify(r.response),
277
+ })),
278
+ });
279
+ }
280
+
281
+ console.warn("[voice-pipeline] Anthropic tool loop hit max iterations");
282
+ return { text: fullText, ok: true };
283
+ }
284
+
285
+ // ─── OpenAI-compatible streaming with tool calling ───────────────────────────
286
+
287
+ interface OpenAIToolCallAccumulator {
288
+ index: number;
289
+ id?: string;
290
+ name?: string;
291
+ argsJson: string;
292
+ }
293
+
294
+ function toOpenAITools(tools: LLMToolDef[] | undefined): Array<Record<string, unknown>> | undefined {
295
+ if (!tools || tools.length === 0) return undefined;
296
+ return tools.map((t) => ({
297
+ type: "function",
298
+ function: { name: t.name, description: t.description, parameters: t.parameters },
299
+ }));
300
+ }
301
+
302
+ /** Convert LLMMessage[] (with optional tool history) to OpenAI messages */
303
+ function buildOpenAIMessages(messages: LLMMessage[]): Array<Record<string, unknown>> {
304
+ return messages.map((m) => ({ role: m.role, content: m.content }));
305
+ }
306
+
307
+ interface OpenAIStreamResult {
308
+ text: string;
309
+ toolCalls: LLMToolCall[];
310
+ finishReason: string | null;
311
+ ok: boolean;
312
+ error?: string;
313
+ }
314
+
315
+ async function streamOpenAIOnce(
316
+ history: Array<Record<string, unknown>>,
317
+ config: LLMConfig | undefined,
318
+ provider: ResolvedProvider,
319
+ onTextDelta: (chunk: string) => void,
320
+ ): Promise<OpenAIStreamResult> {
321
+ const body: Record<string, unknown> = {
322
+ model: config?.model || provider.model,
323
+ max_tokens: config?.maxTokens ?? 512,
324
+ messages: history,
325
+ temperature: config?.temperature ?? 0.6,
326
+ stream: true,
327
+ };
328
+ const tools = toOpenAITools(config?.tools);
329
+ if (tools) {
330
+ body.tools = tools;
331
+ body.tool_choice = "auto";
332
+ }
333
+
334
+ const headers: Record<string, string> = { "Content-Type": "application/json" };
335
+ if (provider.apiKey) headers["Authorization"] = `Bearer ${provider.apiKey}`;
336
+
337
+ const res = await fetch(`${provider.baseUrl}/chat/completions`, {
338
+ method: "POST",
339
+ headers,
340
+ body: JSON.stringify(body),
341
+ });
342
+
343
+ if (!res.ok || !res.body) {
344
+ const errText = res.body ? await res.text() : "";
345
+ return { text: "", toolCalls: [], finishReason: null, ok: false, error: `${provider.providerId} stream error ${res.status}: ${errText.slice(0, 200)}` };
346
+ }
347
+
348
+ let fullText = "";
349
+ let finishReason: string | null = null;
350
+ /** Accumulators keyed by index (OpenAI streams tool_calls in chunks per index) */
351
+ const toolAcc = new Map<number, OpenAIToolCallAccumulator>();
352
+
353
+ const reader = res.body.getReader();
354
+ const decoder = new TextDecoder();
355
+ let buffer = "";
356
+
357
+ try {
358
+ while (true) {
359
+ const { value, done } = await reader.read();
360
+ if (done) break;
361
+ buffer += decoder.decode(value, { stream: true });
362
+
363
+ let idx;
364
+ while ((idx = buffer.indexOf("\n")) >= 0) {
365
+ const line = buffer.slice(0, idx).trim();
366
+ buffer = buffer.slice(idx + 1);
367
+ if (!line.startsWith("data:")) continue;
368
+ const jsonStr = line.slice(5).trim();
369
+ if (!jsonStr || jsonStr === "[DONE]") continue;
370
+
371
+ let data: Record<string, unknown>;
372
+ try { data = JSON.parse(jsonStr); } catch { continue; }
373
+
374
+ const choices = data.choices as Array<{
375
+ delta?: { content?: string; tool_calls?: Array<{ index: number; id?: string; function?: { name?: string; arguments?: string } }> };
376
+ finish_reason?: string | null;
377
+ }> | undefined;
378
+ if (!choices || choices.length === 0) continue;
379
+ const choice = choices[0];
380
+ const delta = choice.delta;
381
+
382
+ if (delta?.content) {
383
+ fullText += delta.content;
384
+ onTextDelta(delta.content);
385
+ }
386
+ if (delta?.tool_calls) {
387
+ for (const tc of delta.tool_calls) {
388
+ const acc = toolAcc.get(tc.index) ?? { index: tc.index, argsJson: "" };
389
+ if (tc.id) acc.id = tc.id;
390
+ if (tc.function?.name) acc.name = tc.function.name;
391
+ if (tc.function?.arguments) acc.argsJson += tc.function.arguments;
392
+ toolAcc.set(tc.index, acc);
393
+ }
394
+ }
395
+ if (choice.finish_reason) finishReason = choice.finish_reason;
396
+ }
397
+ }
398
+ } catch (e) {
399
+ return { text: fullText, toolCalls: [], finishReason, ok: false, error: e instanceof Error ? e.message : String(e) };
400
+ }
401
+
402
+ // Materialize tool calls
403
+ const toolCalls: LLMToolCall[] = [];
404
+ for (const acc of [...toolAcc.values()].sort((a, b) => a.index - b.index)) {
405
+ let args: Record<string, unknown> = {};
406
+ try { args = acc.argsJson ? JSON.parse(acc.argsJson) : {}; } catch { /* keep empty */ }
407
+ toolCalls.push({ id: acc.id || `call_${acc.index}`, name: acc.name || "", args });
408
+ }
409
+
410
+ return { text: fullText, toolCalls, finishReason, ok: true };
411
+ }
412
+
413
+ async function streamOpenAIWithTools(
414
+ messages: LLMMessage[],
415
+ callbacks: LLMStreamCallbacks,
416
+ config: LLMConfig | undefined,
417
+ provider: ResolvedProvider,
418
+ ): Promise<{ text: string; ok: boolean; error?: string }> {
419
+ // OpenAI history is a single flat array (system + user/assistant + tool)
420
+ const history: Array<Record<string, unknown>> = buildOpenAIMessages(messages);
421
+
422
+ let fullText = "";
423
+ const maxIterations = 4;
424
+
425
+ for (let iter = 0; iter < maxIterations; iter++) {
426
+ const result = await streamOpenAIOnce(history, config, provider, (chunk) => {
427
+ fullText += chunk;
428
+ callbacks.onChunk(chunk);
429
+ });
430
+ if (!result.ok) return { text: fullText, ok: false, error: result.error };
431
+
432
+ // Append assistant turn (text + tool_calls) to history
433
+ const assistantMsg: Record<string, unknown> = { role: "assistant", content: result.text || null };
434
+ if (result.toolCalls.length > 0) {
435
+ assistantMsg.tool_calls = result.toolCalls.map((c) => ({
436
+ id: c.id,
437
+ type: "function",
438
+ function: { name: c.name, arguments: JSON.stringify(c.args) },
439
+ }));
440
+ }
441
+ history.push(assistantMsg);
442
+
443
+ if (result.finishReason !== "tool_calls" || result.toolCalls.length === 0 || !callbacks.onToolCalls) {
444
+ return { text: fullText, ok: true };
445
+ }
446
+
447
+ let toolResults: LLMToolResult[] = [];
448
+ try {
449
+ toolResults = await callbacks.onToolCalls(result.toolCalls);
450
+ } catch (e) {
451
+ console.error("[voice-pipeline] tool handler error:", e);
452
+ toolResults = result.toolCalls.map((c) => ({ id: c.id, name: c.name, response: { error: e instanceof Error ? e.message : String(e) } }));
453
+ }
454
+
455
+ // Append tool results (one message per tool_call)
456
+ for (const r of toolResults) {
457
+ history.push({
458
+ role: "tool",
459
+ tool_call_id: r.id,
460
+ content: typeof r.response === "string" ? r.response : JSON.stringify(r.response),
461
+ });
462
+ }
463
+
464
+ // If the model called `end_call`, stop the loop (see Anthropic branch above
465
+ // for rationale — avoids a second redundant goodbye).
466
+ if (result.toolCalls.some((c) => c.name === "end_call")) {
467
+ return { text: fullText, ok: true };
468
+ }
469
+ }
470
+
471
+ console.warn(`[voice-pipeline] ${provider.providerId} tool loop hit max iterations`);
472
+ return { text: fullText, ok: true };
473
+ }
474
+
475
+ // ─── Public Provider ─────────────────────────────────────────────────────────
476
+
477
+ export class InternalLLMProvider implements LLMProvider {
478
+ readonly id: LLMProviderId;
479
+
480
+ constructor(providerId: LLMProviderId) {
481
+ this.id = providerId;
482
+ }
483
+
484
+ async generate(messages: LLMMessage[], config?: LLMConfig): Promise<{ text: string; ok: boolean; error?: string }> {
485
+ // One-shot: prefer streaming (collect into a string) for tool support;
486
+ // fall back to callInternalAI for legacy callers without tools.
487
+ if (config?.tools && config.tools.length > 0) {
488
+ let text = "";
489
+ const r = await this.generateStream(messages, { onChunk: (c) => { text += c; } }, config);
490
+ return { text: text || r.text, ok: r.ok, error: r.error };
491
+ }
492
+
493
+ const system = messages.find((m) => m.role === "system")?.content;
494
+ const turns = messages
495
+ .filter((m) => m.role !== "system")
496
+ .map((m) => `${m.role === "user" ? "User" : "Assistant"}: ${m.content}`)
497
+ .join("\n\n");
498
+
499
+ return await callInternalAI({
500
+ systemPrompt: system,
501
+ userPrompt: turns + "\n\nAssistant:",
502
+ maxTokens: config?.maxTokens ?? 512,
503
+ temperature: config?.temperature ?? 0.6,
504
+ timeoutMs: 30_000,
505
+ });
506
+ }
507
+
508
+ async generateStream(
509
+ messages: LLMMessage[],
510
+ callbacks: LLMStreamCallbacks,
511
+ config?: LLMConfig,
512
+ ): Promise<{ text: string; ok: boolean; error?: string }> {
513
+ const provider = resolveProvider(this.id);
514
+ if (!provider) {
515
+ return {
516
+ text: "",
517
+ ok: false,
518
+ error: `Voice-Pipeline Provider "${this.id}" ist nicht konfiguriert. In Settings → Providers den API-Key hinterlegen und aktivieren — oder in Settings → Telephony → Voice Engine einen anderen Provider wählen.`,
519
+ };
520
+ }
521
+
522
+ if (provider.kind === "anthropic") {
523
+ return streamAnthropicWithTools(messages, callbacks, config, provider);
524
+ }
525
+ return streamOpenAIWithTools(messages, callbacks, config, provider);
526
+ }
527
+ }
@@ -0,0 +1,157 @@
1
+ // ─── Google Cloud STT Provider (streaming) ──────────────────────────────────
2
+ // Streams 8kHz LINEAR16 from FreeSWITCH directly into Cloud Speech-to-Text.
3
+ // Uses the v2 streaming gRPC client for low latency.
4
+
5
+ import { v1 as speechV1 } from "@google-cloud/speech";
6
+ import type { STTConfig, STTProvider, STTResult, STTSession } from "../types.js";
7
+
8
+ // Re-use the same service account key as Vertex AI (configured in telephony settings)
9
+ function getKeyFile(): string {
10
+ return process.env.GCP_SERVICE_ACCOUNT_KEY
11
+ || "/opt/agentplatform/gcp-service-account.json";
12
+ }
13
+
14
+ // Google Cloud streaming recognize has a hard 305s limit per stream and the
15
+ // underlying gRPC stream gets destroyed on any transient error. To keep the
16
+ // call alive we auto-restart the stream whenever it ends/errors, unless the
17
+ // session was explicitly closed.
18
+ const STREAM_MAX_AGE_MS = 4 * 60 * 1000; // 4 min, safely below Google's 5 min cap
19
+
20
+ class GoogleSTTSession implements STTSession {
21
+ private stream: ReturnType<speechV1.SpeechClient["streamingRecognize"]> | null = null;
22
+ private resultHandlers: Array<(r: STTResult) => void> = [];
23
+ private errorHandlers: Array<(e: Error) => void> = [];
24
+ private closed = false;
25
+ private streamStartedAt = 0;
26
+ private restartTimer: ReturnType<typeof setTimeout> | null = null;
27
+ private restarting = false;
28
+
29
+ constructor(private client: speechV1.SpeechClient, private config: STTConfig) {
30
+ this.start();
31
+ }
32
+
33
+ private start(): void {
34
+ const request = {
35
+ config: {
36
+ encoding: "LINEAR16" as const,
37
+ sampleRateHertz: this.config.sampleRateHertz,
38
+ languageCode: this.config.language,
39
+ enableAutomaticPunctuation: true,
40
+ // `latest_long` is best for conversational telephony
41
+ model: "latest_long",
42
+ useEnhanced: true,
43
+ },
44
+ interimResults: this.config.interimResults ?? true,
45
+ // Single-utterance is FALSE — we want continuous turn-taking
46
+ singleUtterance: false,
47
+ };
48
+
49
+ this.streamStartedAt = Date.now();
50
+ this.restarting = false;
51
+
52
+ this.stream = this.client
53
+ .streamingRecognize(request)
54
+ .on("data", (data: {
55
+ results?: Array<{
56
+ alternatives?: Array<{ transcript?: string; confidence?: number }>;
57
+ isFinal?: boolean;
58
+ }>;
59
+ }) => {
60
+ const result = data.results?.[0];
61
+ const alt = result?.alternatives?.[0];
62
+ if (!alt?.transcript) return;
63
+ const out: STTResult = {
64
+ text: alt.transcript,
65
+ isFinal: !!result?.isFinal,
66
+ confidence: alt.confidence,
67
+ };
68
+ for (const h of this.resultHandlers) h(out);
69
+ })
70
+ .on("error", (err: Error) => {
71
+ // Log once per stream instance, then restart silently so the call stays live
72
+ for (const h of this.errorHandlers) h(err);
73
+ this.scheduleRestart();
74
+ })
75
+ .on("end", () => {
76
+ this.scheduleRestart();
77
+ });
78
+ }
79
+
80
+ private scheduleRestart(): void {
81
+ if (this.closed || this.restarting) return;
82
+ this.restarting = true;
83
+ // Destroy the old stream reference so pushAudio() stops trying to write to it
84
+ const old = this.stream;
85
+ this.stream = null;
86
+ try { old?.destroy(); } catch { /* ignore */ }
87
+ // Small backoff to avoid hammering the API if something is truly broken
88
+ this.restartTimer = setTimeout(() => {
89
+ if (this.closed) return;
90
+ try {
91
+ this.start();
92
+ } catch (e) {
93
+ for (const h of this.errorHandlers) h(e instanceof Error ? e : new Error(String(e)));
94
+ }
95
+ }, 150);
96
+ }
97
+
98
+ pushAudio(pcm: Buffer | Uint8Array): void {
99
+ if (this.closed) return;
100
+ // Rotate stream before Google cuts us off at ~5 min
101
+ if (this.stream && Date.now() - this.streamStartedAt > STREAM_MAX_AGE_MS) {
102
+ this.scheduleRestart();
103
+ }
104
+ const s = this.stream;
105
+ if (!s || (s as unknown as { destroyed?: boolean }).destroyed) return;
106
+ try {
107
+ // The helper client wraps raw audio bytes into { audioContent } itself
108
+ // via its internal PassThrough transform — writing an object here would
109
+ // end up double-wrapped and Google rejects it as "Malordered Data".
110
+ s.write(Buffer.isBuffer(pcm) ? pcm : Buffer.from(pcm));
111
+ } catch {
112
+ // Stream was destroyed between the check and the write — recover silently
113
+ this.scheduleRestart();
114
+ }
115
+ }
116
+
117
+ async close(): Promise<void> {
118
+ if (this.closed) return;
119
+ this.closed = true;
120
+ if (this.restartTimer) {
121
+ clearTimeout(this.restartTimer);
122
+ this.restartTimer = null;
123
+ }
124
+ try {
125
+ this.stream?.end();
126
+ } catch {
127
+ // ignore
128
+ }
129
+ this.stream = null;
130
+ }
131
+
132
+ onResult(handler: (r: STTResult) => void): void {
133
+ this.resultHandlers.push(handler);
134
+ }
135
+
136
+ onError(handler: (e: Error) => void): void {
137
+ this.errorHandlers.push(handler);
138
+ }
139
+ }
140
+
141
+ export class GoogleSTTProvider implements STTProvider {
142
+ readonly id = "google" as const;
143
+ private client: speechV1.SpeechClient | null = null;
144
+
145
+ private getClient(): speechV1.SpeechClient {
146
+ if (!this.client) {
147
+ this.client = new speechV1.SpeechClient({
148
+ keyFilename: getKeyFile(),
149
+ });
150
+ }
151
+ return this.client;
152
+ }
153
+
154
+ async start(config: STTConfig): Promise<STTSession> {
155
+ return new GoogleSTTSession(this.getClient(), config);
156
+ }
157
+ }