@shykaruu/jarvis-brain 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +153 -0
- package/README.md +428 -0
- package/bin/jarvis.ts +449 -0
- package/package.json +79 -0
- package/roles/activity-observer.yaml +60 -0
- package/roles/ceo-founder.yaml +144 -0
- package/roles/chief-of-staff.yaml +158 -0
- package/roles/dev-lead.yaml +182 -0
- package/roles/executive-assistant.yaml +77 -0
- package/roles/marketing-director.yaml +168 -0
- package/roles/personal-assistant.yaml +266 -0
- package/roles/research-specialist.yaml +60 -0
- package/roles/specialists/content-writer.yaml +53 -0
- package/roles/specialists/customer-support.yaml +57 -0
- package/roles/specialists/data-analyst.yaml +57 -0
- package/roles/specialists/financial-analyst.yaml +56 -0
- package/roles/specialists/hr-specialist.yaml +55 -0
- package/roles/specialists/legal-advisor.yaml +58 -0
- package/roles/specialists/marketing-strategist.yaml +56 -0
- package/roles/specialists/project-coordinator.yaml +55 -0
- package/roles/specialists/research-analyst.yaml +58 -0
- package/roles/specialists/software-engineer.yaml +57 -0
- package/roles/specialists/system-administrator.yaml +57 -0
- package/roles/system-admin.yaml +76 -0
- package/scripts/ensure-bun.cjs +16 -0
- package/src/actions/README.md +421 -0
- package/src/actions/app-control/desktop-controller.test.ts +26 -0
- package/src/actions/app-control/desktop-controller.ts +438 -0
- package/src/actions/app-control/interface.ts +64 -0
- package/src/actions/app-control/linux.ts +273 -0
- package/src/actions/app-control/macos.ts +54 -0
- package/src/actions/app-control/sidecar-launcher.test.ts +23 -0
- package/src/actions/app-control/sidecar-launcher.ts +286 -0
- package/src/actions/app-control/windows.ts +44 -0
- package/src/actions/browser/cdp.ts +138 -0
- package/src/actions/browser/chrome-launcher.ts +261 -0
- package/src/actions/browser/session.ts +506 -0
- package/src/actions/browser/stealth.ts +49 -0
- package/src/actions/index.ts +20 -0
- package/src/actions/terminal/executor.ts +157 -0
- package/src/actions/terminal/wsl-bridge.ts +126 -0
- package/src/actions/test.ts +93 -0
- package/src/actions/tools/agents.ts +363 -0
- package/src/actions/tools/builtin.ts +950 -0
- package/src/actions/tools/commitments.ts +192 -0
- package/src/actions/tools/content.ts +217 -0
- package/src/actions/tools/delegate.ts +147 -0
- package/src/actions/tools/desktop.test.ts +55 -0
- package/src/actions/tools/desktop.ts +305 -0
- package/src/actions/tools/documents.ts +169 -0
- package/src/actions/tools/goals.ts +376 -0
- package/src/actions/tools/local-tools-guard.ts +31 -0
- package/src/actions/tools/registry.ts +173 -0
- package/src/actions/tools/research.ts +111 -0
- package/src/actions/tools/sidecar-list.ts +57 -0
- package/src/actions/tools/sidecar-route.ts +105 -0
- package/src/actions/tools/workflows.ts +216 -0
- package/src/agents/agent.ts +132 -0
- package/src/agents/delegation.ts +107 -0
- package/src/agents/hierarchy.ts +113 -0
- package/src/agents/index.ts +19 -0
- package/src/agents/messaging.ts +125 -0
- package/src/agents/orchestrator.ts +592 -0
- package/src/agents/role-discovery.ts +61 -0
- package/src/agents/sub-agent-runner.ts +309 -0
- package/src/agents/task-manager.ts +151 -0
- package/src/authority/approval-delivery.ts +59 -0
- package/src/authority/approval.ts +196 -0
- package/src/authority/audit.ts +158 -0
- package/src/authority/authority.test.ts +519 -0
- package/src/authority/deferred-executor.ts +103 -0
- package/src/authority/emergency.ts +66 -0
- package/src/authority/engine.ts +301 -0
- package/src/authority/index.ts +12 -0
- package/src/authority/learning.ts +111 -0
- package/src/authority/tool-action-map.ts +74 -0
- package/src/awareness/analytics.ts +466 -0
- package/src/awareness/awareness.test.ts +332 -0
- package/src/awareness/capture-engine.ts +305 -0
- package/src/awareness/context-graph.ts +130 -0
- package/src/awareness/context-tracker.ts +349 -0
- package/src/awareness/index.ts +25 -0
- package/src/awareness/intelligence.ts +321 -0
- package/src/awareness/ocr-engine.ts +88 -0
- package/src/awareness/service.ts +528 -0
- package/src/awareness/struggle-detector.ts +342 -0
- package/src/awareness/suggestion-engine.ts +476 -0
- package/src/awareness/types.ts +201 -0
- package/src/cli/autostart.ts +417 -0
- package/src/cli/deps.ts +449 -0
- package/src/cli/doctor.ts +238 -0
- package/src/cli/helpers.ts +401 -0
- package/src/cli/onboard.ts +827 -0
- package/src/cli/uninstall.test.ts +37 -0
- package/src/cli/uninstall.ts +202 -0
- package/src/comms/README.md +329 -0
- package/src/comms/auth-error.html +48 -0
- package/src/comms/channels/discord.ts +228 -0
- package/src/comms/channels/signal.ts +56 -0
- package/src/comms/channels/telegram.ts +316 -0
- package/src/comms/channels/whatsapp.ts +60 -0
- package/src/comms/channels.test.ts +173 -0
- package/src/comms/dashboard-auth.ts +75 -0
- package/src/comms/desktop-notify.ts +114 -0
- package/src/comms/example.ts +129 -0
- package/src/comms/index.ts +129 -0
- package/src/comms/streaming.ts +149 -0
- package/src/comms/voice.test.ts +504 -0
- package/src/comms/voice.ts +341 -0
- package/src/comms/websocket.test.ts +409 -0
- package/src/comms/websocket.ts +669 -0
- package/src/config/README.md +389 -0
- package/src/config/index.ts +6 -0
- package/src/config/loader.test.ts +183 -0
- package/src/config/loader.ts +148 -0
- package/src/config/types.ts +293 -0
- package/src/daemon/README.md +232 -0
- package/src/daemon/agent-service-interface.ts +9 -0
- package/src/daemon/agent-service.ts +667 -0
- package/src/daemon/api-routes.ts +3067 -0
- package/src/daemon/background-agent-service.ts +396 -0
- package/src/daemon/background-agent.test.ts +78 -0
- package/src/daemon/channel-service.ts +201 -0
- package/src/daemon/commitment-executor.ts +297 -0
- package/src/daemon/dashboard-auth.test.ts +170 -0
- package/src/daemon/event-classifier.ts +239 -0
- package/src/daemon/event-coalescer.ts +123 -0
- package/src/daemon/event-reactor.ts +214 -0
- package/src/daemon/flock.c +7 -0
- package/src/daemon/health.ts +220 -0
- package/src/daemon/index.ts +1070 -0
- package/src/daemon/llm-settings.test.ts +78 -0
- package/src/daemon/llm-settings.ts +450 -0
- package/src/daemon/observer-service.ts +150 -0
- package/src/daemon/pid.test.ts +283 -0
- package/src/daemon/pid.ts +224 -0
- package/src/daemon/research-queue.ts +155 -0
- package/src/daemon/services.ts +175 -0
- package/src/daemon/ws-service.ts +926 -0
- package/src/global.d.ts +4 -0
- package/src/goals/accountability.ts +240 -0
- package/src/goals/awareness-bridge.ts +185 -0
- package/src/goals/estimator.ts +185 -0
- package/src/goals/events.ts +28 -0
- package/src/goals/goals.test.ts +400 -0
- package/src/goals/integration.test.ts +329 -0
- package/src/goals/nl-builder.test.ts +220 -0
- package/src/goals/nl-builder.ts +256 -0
- package/src/goals/rhythm.test.ts +177 -0
- package/src/goals/rhythm.ts +275 -0
- package/src/goals/service.test.ts +135 -0
- package/src/goals/service.ts +407 -0
- package/src/goals/types.ts +106 -0
- package/src/goals/workflow-bridge.ts +96 -0
- package/src/integrations/google-api.ts +134 -0
- package/src/integrations/google-auth.ts +175 -0
- package/src/llm/README.md +291 -0
- package/src/llm/anthropic.ts +400 -0
- package/src/llm/gemini.ts +380 -0
- package/src/llm/groq.ts +406 -0
- package/src/llm/history.ts +147 -0
- package/src/llm/index.ts +21 -0
- package/src/llm/manager.ts +226 -0
- package/src/llm/ollama.ts +316 -0
- package/src/llm/openai.ts +411 -0
- package/src/llm/openrouter.ts +390 -0
- package/src/llm/provider.test.ts +487 -0
- package/src/llm/provider.ts +61 -0
- package/src/llm/test.ts +88 -0
- package/src/observers/README.md +278 -0
- package/src/observers/calendar.ts +113 -0
- package/src/observers/clipboard.ts +136 -0
- package/src/observers/email.ts +109 -0
- package/src/observers/example.ts +58 -0
- package/src/observers/file-watcher.ts +124 -0
- package/src/observers/index.ts +159 -0
- package/src/observers/notifications.ts +197 -0
- package/src/observers/observers.test.ts +203 -0
- package/src/observers/processes.ts +225 -0
- package/src/personality/README.md +61 -0
- package/src/personality/adapter.ts +196 -0
- package/src/personality/index.ts +20 -0
- package/src/personality/learner.ts +209 -0
- package/src/personality/model.ts +132 -0
- package/src/personality/personality.test.ts +236 -0
- package/src/roles/README.md +252 -0
- package/src/roles/authority.ts +120 -0
- package/src/roles/example-usage.ts +198 -0
- package/src/roles/index.ts +42 -0
- package/src/roles/loader.ts +143 -0
- package/src/roles/prompt-builder.ts +218 -0
- package/src/roles/test-multi.ts +102 -0
- package/src/roles/test-role.yaml +77 -0
- package/src/roles/test-utils.ts +93 -0
- package/src/roles/test.ts +106 -0
- package/src/roles/tool-guide.ts +195 -0
- package/src/roles/types.ts +36 -0
- package/src/roles/utils.ts +200 -0
- package/src/scripts/google-setup.ts +168 -0
- package/src/sidecar/connection.ts +179 -0
- package/src/sidecar/index.ts +6 -0
- package/src/sidecar/manager.ts +542 -0
- package/src/sidecar/protocol.ts +85 -0
- package/src/sidecar/rpc.ts +161 -0
- package/src/sidecar/scheduler.ts +136 -0
- package/src/sidecar/types.ts +112 -0
- package/src/sidecar/validator.ts +144 -0
- package/src/sites/builder-tools.ts +215 -0
- package/src/sites/dev-server-manager.ts +286 -0
- package/src/sites/fixtures/security-test-site/.jarvis-project.json +6 -0
- package/src/sites/fixtures/security-test-site/Makefile +15 -0
- package/src/sites/fixtures/security-test-site/README.md +18 -0
- package/src/sites/fixtures/security-test-site/index.html +12 -0
- package/src/sites/fixtures/security-test-site/index.ts +16 -0
- package/src/sites/fixtures/security-test-site/package.json +13 -0
- package/src/sites/fixtures/security-test-site/src/app.tsx +780 -0
- package/src/sites/fixtures/security-test-site/tsconfig.json +10 -0
- package/src/sites/git-manager.ts +240 -0
- package/src/sites/github-manager.ts +355 -0
- package/src/sites/index.ts +25 -0
- package/src/sites/project-manager.ts +389 -0
- package/src/sites/proxy.ts +133 -0
- package/src/sites/service.ts +136 -0
- package/src/sites/templates.ts +169 -0
- package/src/sites/types.ts +89 -0
- package/src/user/profile-followup.test.ts +84 -0
- package/src/user/profile-followup.ts +185 -0
- package/src/user/profile.ts +224 -0
- package/src/vault/README.md +110 -0
- package/src/vault/awareness.ts +341 -0
- package/src/vault/commitments.ts +299 -0
- package/src/vault/content-pipeline.ts +270 -0
- package/src/vault/conversations.ts +173 -0
- package/src/vault/dashboard-sessions.ts +44 -0
- package/src/vault/documents.ts +130 -0
- package/src/vault/entities.ts +185 -0
- package/src/vault/extractor.test.ts +356 -0
- package/src/vault/extractor.ts +345 -0
- package/src/vault/facts.ts +190 -0
- package/src/vault/goals.ts +477 -0
- package/src/vault/index.ts +87 -0
- package/src/vault/keychain.ts +99 -0
- package/src/vault/observations.ts +115 -0
- package/src/vault/relationships.ts +178 -0
- package/src/vault/retrieval.test.ts +139 -0
- package/src/vault/retrieval.ts +258 -0
- package/src/vault/schema.ts +709 -0
- package/src/vault/settings.ts +38 -0
- package/src/vault/user-profile.test.ts +113 -0
- package/src/vault/user-profile.ts +176 -0
- package/src/vault/vectors.ts +92 -0
- package/src/vault/webapp-template-seeds.ts +116 -0
- package/src/vault/webapp-templates.ts +244 -0
- package/src/vault/workflows.ts +403 -0
- package/src/workflows/auto-suggest.ts +290 -0
- package/src/workflows/engine.ts +366 -0
- package/src/workflows/events.ts +24 -0
- package/src/workflows/executor.ts +207 -0
- package/src/workflows/nl-builder.ts +198 -0
- package/src/workflows/nodes/actions/agent-task.ts +73 -0
- package/src/workflows/nodes/actions/calendar-action.ts +85 -0
- package/src/workflows/nodes/actions/code-execution.ts +73 -0
- package/src/workflows/nodes/actions/discord.ts +77 -0
- package/src/workflows/nodes/actions/file-write.ts +73 -0
- package/src/workflows/nodes/actions/gmail.ts +69 -0
- package/src/workflows/nodes/actions/http-request.ts +117 -0
- package/src/workflows/nodes/actions/notification.ts +85 -0
- package/src/workflows/nodes/actions/run-tool.ts +55 -0
- package/src/workflows/nodes/actions/send-message.ts +82 -0
- package/src/workflows/nodes/actions/shell-command.ts +76 -0
- package/src/workflows/nodes/actions/telegram.ts +60 -0
- package/src/workflows/nodes/builtin.ts +119 -0
- package/src/workflows/nodes/error/error-handler.ts +37 -0
- package/src/workflows/nodes/error/fallback.ts +47 -0
- package/src/workflows/nodes/error/retry.ts +82 -0
- package/src/workflows/nodes/logic/delay.ts +42 -0
- package/src/workflows/nodes/logic/if-else.ts +41 -0
- package/src/workflows/nodes/logic/loop.ts +90 -0
- package/src/workflows/nodes/logic/merge.ts +38 -0
- package/src/workflows/nodes/logic/race.ts +40 -0
- package/src/workflows/nodes/logic/switch.ts +59 -0
- package/src/workflows/nodes/logic/template-render.ts +53 -0
- package/src/workflows/nodes/logic/variable-get.ts +37 -0
- package/src/workflows/nodes/logic/variable-set.ts +59 -0
- package/src/workflows/nodes/registry.ts +99 -0
- package/src/workflows/nodes/transform/aggregate.ts +99 -0
- package/src/workflows/nodes/transform/csv-parse.ts +70 -0
- package/src/workflows/nodes/transform/json-parse.ts +63 -0
- package/src/workflows/nodes/transform/map-filter.ts +84 -0
- package/src/workflows/nodes/transform/regex-match.ts +89 -0
- package/src/workflows/nodes/triggers/calendar.ts +33 -0
- package/src/workflows/nodes/triggers/clipboard.ts +32 -0
- package/src/workflows/nodes/triggers/cron.ts +40 -0
- package/src/workflows/nodes/triggers/email.ts +40 -0
- package/src/workflows/nodes/triggers/file-change.ts +45 -0
- package/src/workflows/nodes/triggers/git.ts +46 -0
- package/src/workflows/nodes/triggers/manual.ts +23 -0
- package/src/workflows/nodes/triggers/poll.ts +81 -0
- package/src/workflows/nodes/triggers/process.ts +44 -0
- package/src/workflows/nodes/triggers/screen-event.ts +37 -0
- package/src/workflows/nodes/triggers/webhook.ts +39 -0
- package/src/workflows/safe-eval.ts +139 -0
- package/src/workflows/template.ts +118 -0
- package/src/workflows/triggers/cron.ts +311 -0
- package/src/workflows/triggers/manager.ts +285 -0
- package/src/workflows/triggers/observer-bridge.ts +172 -0
- package/src/workflows/triggers/poller.ts +201 -0
- package/src/workflows/triggers/screen-condition.ts +218 -0
- package/src/workflows/triggers/triggers.test.ts +740 -0
- package/src/workflows/triggers/webhook.ts +191 -0
- package/src/workflows/types.ts +133 -0
- package/src/workflows/variables.ts +72 -0
- package/src/workflows/workflows.test.ts +383 -0
- package/src/workflows/yaml.ts +104 -0
- package/ui/dist/index-3gr23jt9.js +112614 -0
- package/ui/dist/index-9vmj8127.css +14239 -0
- package/ui/dist/index-hy9pc1gm.js +112873 -0
- package/ui/dist/index-j2ep5d1w.js +112374 -0
- package/ui/dist/index-jt00vjqs.js +112858 -0
- package/ui/dist/index-k9ymx5qb.js +112374 -0
- package/ui/dist/index.html +16 -0
- package/ui/public/audio/pcm-capture-processor.js +11 -0
- package/ui/public/openwakeword/models/embedding_model.onnx +0 -0
- package/ui/public/openwakeword/models/hey_jarvis_v0.1.onnx +0 -0
- package/ui/public/openwakeword/models/melspectrogram.onnx +0 -0
- package/ui/public/openwakeword/models/silero_vad.onnx +0 -0
- package/ui/public/ort/ort-wasm-simd-threaded.jsep.mjs +106 -0
- package/ui/public/ort/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/ui/public/ort/ort-wasm-simd-threaded.mjs +59 -0
- package/ui/public/ort/ort-wasm-simd-threaded.wasm +0 -0
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
import type { STTConfig, TTSConfig } from '../config/types.ts';
|
|
2
|
+
import { Communicate } from 'edge-tts-universal';
|
|
3
|
+
import { resolveOpenAIBaseUrl } from '../llm/openai.ts';
|
|
4
|
+
|
|
5
|
+
export interface STTProvider {
|
|
6
|
+
transcribe(audio: Buffer): Promise<string>;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export interface TTSProvider {
|
|
10
|
+
synthesize(text: string): Promise<Buffer>;
|
|
11
|
+
synthesizeStream(text: string): AsyncIterable<Buffer>;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* OpenAI Whisper STT — uses the resolved OpenAI-compatible audio/transcriptions endpoint.
|
|
16
|
+
*/
|
|
17
|
+
export class OpenAIWhisperSTT implements STTProvider {
|
|
18
|
+
private apiKey: string;
|
|
19
|
+
private model: string;
|
|
20
|
+
private apiUrl: string;
|
|
21
|
+
|
|
22
|
+
constructor(apiKey: string, model: string = 'whisper-1', baseUrl?: string) {
|
|
23
|
+
this.apiKey = apiKey;
|
|
24
|
+
this.model = model;
|
|
25
|
+
this.apiUrl = new URL('audio/transcriptions', resolveOpenAIBaseUrl(baseUrl)).toString();
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async transcribe(audio: Buffer): Promise<string> {
|
|
29
|
+
const formData = new FormData();
|
|
30
|
+
formData.append('file', new Blob([new Uint8Array(audio)], { type: 'audio/webm' }), 'audio.webm');
|
|
31
|
+
formData.append('model', this.model);
|
|
32
|
+
formData.append('language', 'en');
|
|
33
|
+
|
|
34
|
+
const response = await fetch(this.apiUrl, {
|
|
35
|
+
method: 'POST',
|
|
36
|
+
headers: { 'Authorization': `Bearer ${this.apiKey}` },
|
|
37
|
+
body: formData,
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
if (!response.ok) {
|
|
41
|
+
const err = await response.text();
|
|
42
|
+
throw new Error(`OpenAI STT error (${response.status}): ${err}`);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const result = await response.json() as any;
|
|
46
|
+
return result.text;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Groq Whisper STT — uses Groq's OpenAI-compatible transcriptions endpoint.
|
|
52
|
+
*/
|
|
53
|
+
export class GroqWhisperSTT implements STTProvider {
|
|
54
|
+
private apiKey: string;
|
|
55
|
+
private model: string;
|
|
56
|
+
|
|
57
|
+
constructor(apiKey: string, model: string = 'whisper-large-v3-turbo') {
|
|
58
|
+
this.apiKey = apiKey;
|
|
59
|
+
this.model = model;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async transcribe(audio: Buffer): Promise<string> {
|
|
63
|
+
const formData = new FormData();
|
|
64
|
+
formData.append('file', new Blob([new Uint8Array(audio)], { type: 'audio/webm' }), 'audio.webm');
|
|
65
|
+
formData.append('model', this.model);
|
|
66
|
+
formData.append('language', 'en');
|
|
67
|
+
|
|
68
|
+
const response = await fetch('https://api.groq.com/openai/v1/audio/transcriptions', {
|
|
69
|
+
method: 'POST',
|
|
70
|
+
headers: { 'Authorization': `Bearer ${this.apiKey}` },
|
|
71
|
+
body: formData,
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
if (!response.ok) {
|
|
75
|
+
const err = await response.text();
|
|
76
|
+
throw new Error(`Groq STT error (${response.status}): ${err}`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const result = await response.json() as any;
|
|
80
|
+
return result.text;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Local Whisper STT — connects to a whisper.cpp HTTP server or OpenAI-compatible endpoint.
|
|
86
|
+
*/
|
|
87
|
+
export type LocalWhisperServerType = 'whisper_cpp' | 'openai_compatible';
|
|
88
|
+
|
|
89
|
+
export class LocalWhisperSTT implements STTProvider {
|
|
90
|
+
private endpoint: string;
|
|
91
|
+
private model: string;
|
|
92
|
+
private serverType: LocalWhisperServerType;
|
|
93
|
+
|
|
94
|
+
constructor(
|
|
95
|
+
endpoint: string = 'http://localhost:8080',
|
|
96
|
+
model?: string,
|
|
97
|
+
serverType: LocalWhisperServerType = 'whisper_cpp',
|
|
98
|
+
) {
|
|
99
|
+
this.endpoint = endpoint;
|
|
100
|
+
this.model = model ?? 'base';
|
|
101
|
+
this.serverType = serverType;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
private resolveUrl(): string {
|
|
105
|
+
const normalized = this.endpoint.replace(/\/+$/, '');
|
|
106
|
+
if (this.serverType === 'whisper_cpp') {
|
|
107
|
+
const hasPath = /\/(inference|asr|transcribe)$/.test(normalized);
|
|
108
|
+
return hasPath ? normalized : `${normalized}/inference`;
|
|
109
|
+
}
|
|
110
|
+
return normalized;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
private buildForm(audio: Buffer): FormData {
|
|
114
|
+
const formData = new FormData();
|
|
115
|
+
if (this.serverType === 'whisper_cpp') {
|
|
116
|
+
formData.append('file', new Blob([new Uint8Array(audio)], { type: 'audio/wav' }), 'audio.wav');
|
|
117
|
+
formData.append('response_format', 'json');
|
|
118
|
+
formData.append('temperature', '0.0');
|
|
119
|
+
formData.append('temperature_inc', '0.2');
|
|
120
|
+
} else {
|
|
121
|
+
formData.append('file', new Blob([new Uint8Array(audio)], { type: 'audio/wav' }), 'audio.wav');
|
|
122
|
+
formData.append('model', this.model);
|
|
123
|
+
formData.append('language', 'en');
|
|
124
|
+
}
|
|
125
|
+
return formData;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
private async parseResponse(response: Response): Promise<string> {
|
|
129
|
+
const contentType = response.headers.get('content-type') ?? '';
|
|
130
|
+
if (contentType.includes('application/json')) {
|
|
131
|
+
const result = await response.json() as any;
|
|
132
|
+
return String(
|
|
133
|
+
result.text ??
|
|
134
|
+
result.transcript ??
|
|
135
|
+
result.data?.text ??
|
|
136
|
+
''
|
|
137
|
+
).trim();
|
|
138
|
+
}
|
|
139
|
+
return (await response.text()).trim();
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async transcribe(audio: Buffer): Promise<string> {
|
|
143
|
+
const url = this.resolveUrl();
|
|
144
|
+
const formData = this.buildForm(audio);
|
|
145
|
+
|
|
146
|
+
const response = await fetch(url, {
|
|
147
|
+
method: 'POST',
|
|
148
|
+
body: formData,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
if (!response.ok) {
|
|
152
|
+
const err = await response.text();
|
|
153
|
+
throw new Error(`Local Whisper STT error (${response.status}): ${err}`);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const transcript = await this.parseResponse(response);
|
|
157
|
+
if (!transcript) {
|
|
158
|
+
throw new Error('Local Whisper STT returned empty transcription');
|
|
159
|
+
}
|
|
160
|
+
return transcript;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Factory: create the right STT provider from config.
|
|
166
|
+
* Returns null if the selected provider lacks required credentials.
|
|
167
|
+
*/
|
|
168
|
+
export function createSTTProvider(config: STTConfig): STTProvider | null {
|
|
169
|
+
switch (config.provider) {
|
|
170
|
+
case 'openai':
|
|
171
|
+
if (!config.openai?.api_key) return null;
|
|
172
|
+
return new OpenAIWhisperSTT(config.openai.api_key, config.openai.model, config.openai.base_url);
|
|
173
|
+
case 'groq':
|
|
174
|
+
if (!config.groq?.api_key) return null;
|
|
175
|
+
return new GroqWhisperSTT(config.groq.api_key, config.groq.model);
|
|
176
|
+
case 'local':
|
|
177
|
+
return new LocalWhisperSTT(config.local?.endpoint, config.local?.model, config.local?.server_type);
|
|
178
|
+
default:
|
|
179
|
+
return null;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Edge TTS Provider — uses Microsoft Edge's online TTS service (free, no API key).
|
|
185
|
+
* Runs server-side only (browser WebSocket can't set required headers).
|
|
186
|
+
*/
|
|
187
|
+
export class EdgeTTSProvider implements TTSProvider {
|
|
188
|
+
private voice: string;
|
|
189
|
+
private rate: string;
|
|
190
|
+
private volume: string;
|
|
191
|
+
|
|
192
|
+
constructor(voice = 'en-US-AriaNeural', rate = '+0%', volume = '+0%') {
|
|
193
|
+
this.voice = voice;
|
|
194
|
+
this.rate = rate;
|
|
195
|
+
this.volume = volume;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
async synthesize(text: string): Promise<Buffer> {
|
|
199
|
+
const comm = new Communicate(text, {
|
|
200
|
+
voice: this.voice,
|
|
201
|
+
rate: this.rate,
|
|
202
|
+
volume: this.volume,
|
|
203
|
+
});
|
|
204
|
+
const chunks: Buffer[] = [];
|
|
205
|
+
for await (const chunk of comm.stream()) {
|
|
206
|
+
if (chunk.type === 'audio' && chunk.data) {
|
|
207
|
+
chunks.push(chunk.data);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
return Buffer.concat(chunks);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Streaming variant: synthesizes text and yields a single complete MP3 buffer.
|
|
215
|
+
* Called per-sentence so the caller can pipeline multiple sentences.
|
|
216
|
+
* Each yielded buffer is a valid, decodable MP3 file.
|
|
217
|
+
*/
|
|
218
|
+
async *synthesizeStream(text: string): AsyncIterable<Buffer> {
|
|
219
|
+
// Collect all chunks into a complete MP3 — individual edge-tts
|
|
220
|
+
// fragments are not valid standalone audio files
|
|
221
|
+
const audio = await this.synthesize(text);
|
|
222
|
+
if (audio.length > 0) {
|
|
223
|
+
yield audio;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* ElevenLabs TTS Provider — high-quality personalized voices via ElevenLabs API.
|
|
230
|
+
* Supports true streaming (chunks are valid playable audio).
|
|
231
|
+
*/
|
|
232
|
+
export class ElevenLabsTTSProvider implements TTSProvider {
|
|
233
|
+
private apiKey: string;
|
|
234
|
+
private voiceId: string;
|
|
235
|
+
private model: string;
|
|
236
|
+
private stability: number;
|
|
237
|
+
private similarityBoost: number;
|
|
238
|
+
|
|
239
|
+
constructor(config: NonNullable<TTSConfig['elevenlabs']>) {
|
|
240
|
+
this.apiKey = config.api_key;
|
|
241
|
+
this.voiceId = config.voice_id ?? '21m00Tcm4TlvDq8ikWAM'; // Rachel (default)
|
|
242
|
+
this.model = config.model ?? 'eleven_flash_v2_5';
|
|
243
|
+
this.stability = config.stability ?? 0.5;
|
|
244
|
+
this.similarityBoost = config.similarity_boost ?? 0.75;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
async synthesize(text: string): Promise<Buffer> {
|
|
248
|
+
const response = await fetch(
|
|
249
|
+
`https://api.elevenlabs.io/v1/text-to-speech/${this.voiceId}/stream?output_format=mp3_44100_128`,
|
|
250
|
+
{
|
|
251
|
+
method: 'POST',
|
|
252
|
+
headers: {
|
|
253
|
+
'xi-api-key': this.apiKey,
|
|
254
|
+
'Content-Type': 'application/json',
|
|
255
|
+
},
|
|
256
|
+
body: JSON.stringify({
|
|
257
|
+
text,
|
|
258
|
+
model_id: this.model,
|
|
259
|
+
voice_settings: {
|
|
260
|
+
stability: this.stability,
|
|
261
|
+
similarity_boost: this.similarityBoost,
|
|
262
|
+
},
|
|
263
|
+
}),
|
|
264
|
+
}
|
|
265
|
+
);
|
|
266
|
+
|
|
267
|
+
if (!response.ok) {
|
|
268
|
+
const err = await response.text();
|
|
269
|
+
throw new Error(`ElevenLabs TTS error (${response.status}): ${err}`);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
273
|
+
return Buffer.from(arrayBuffer);
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
async *synthesizeStream(text: string): AsyncIterable<Buffer> {
|
|
277
|
+
// Collect into a complete MP3 per sentence — individual streaming
|
|
278
|
+
// fragments are not decodable by the browser's AudioContext.decodeAudioData
|
|
279
|
+
const audio = await this.synthesize(text);
|
|
280
|
+
if (audio.length > 0) {
|
|
281
|
+
yield audio;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Fetch available voices from ElevenLabs API.
|
|
288
|
+
*/
|
|
289
|
+
export async function listElevenLabsVoices(apiKey: string): Promise<{
|
|
290
|
+
voice_id: string;
|
|
291
|
+
name: string;
|
|
292
|
+
category: string;
|
|
293
|
+
}[]> {
|
|
294
|
+
const response = await fetch('https://api.elevenlabs.io/v1/voices', {
|
|
295
|
+
headers: { 'xi-api-key': apiKey },
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
if (!response.ok) {
|
|
299
|
+
const err = await response.text();
|
|
300
|
+
throw new Error(`ElevenLabs voices error (${response.status}): ${err}`);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
const data = await response.json() as any;
|
|
304
|
+
return (data.voices ?? []).map((v: any) => ({
|
|
305
|
+
voice_id: v.voice_id,
|
|
306
|
+
name: v.name,
|
|
307
|
+
category: v.category ?? 'unknown',
|
|
308
|
+
}));
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Factory: create the right TTS provider from config.
|
|
313
|
+
* Returns null if TTS is disabled.
|
|
314
|
+
*/
|
|
315
|
+
export function createTTSProvider(config: TTSConfig): TTSProvider | null {
|
|
316
|
+
if (!config.enabled) return null;
|
|
317
|
+
|
|
318
|
+
if (config.provider === 'elevenlabs') {
|
|
319
|
+
if (!config.elevenlabs?.api_key) return null;
|
|
320
|
+
return new ElevenLabsTTSProvider(config.elevenlabs);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Default: Edge TTS
|
|
324
|
+
return new EdgeTTSProvider(config.voice, config.rate, config.volume);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Split text into sentences for streaming TTS.
|
|
329
|
+
* Each sentence is synthesized and played independently for low latency.
|
|
330
|
+
*/
|
|
331
|
+
export function splitIntoSentences(text: string): string[] {
|
|
332
|
+
// Collapse code blocks to avoid splitting on periods inside code
|
|
333
|
+
const collapsed = text.replace(/```[\s\S]*?```/g, '[code block]');
|
|
334
|
+
// Split on sentence-ending punctuation followed by whitespace + capital letter,
|
|
335
|
+
// or on double newlines (paragraph breaks)
|
|
336
|
+
const sentences = collapsed
|
|
337
|
+
.split(/(?<=[.!?])\s+(?=[A-Z])|(?<=\n\n)/)
|
|
338
|
+
.map(s => s.trim())
|
|
339
|
+
.filter(s => s.length > 0);
|
|
340
|
+
return sentences.length > 0 ? sentences : [text];
|
|
341
|
+
}
|