@oscharko-dev/keiko-server 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -1
- package/dist/chat-handlers.d.ts +18 -2
- package/dist/chat-handlers.d.ts.map +1 -1
- package/dist/chat-handlers.js +185 -3
- package/dist/command-runner-errors.d.ts +17 -0
- package/dist/command-runner-errors.d.ts.map +1 -0
- package/dist/command-runner-errors.js +37 -0
- package/dist/command-runner-evidence.d.ts +23 -0
- package/dist/command-runner-evidence.d.ts.map +1 -0
- package/dist/command-runner-evidence.js +69 -0
- package/dist/command-runner-routes.d.ts +7 -0
- package/dist/command-runner-routes.d.ts.map +1 -0
- package/dist/command-runner-routes.js +175 -0
- package/dist/command-runner.d.ts +29 -0
- package/dist/command-runner.d.ts.map +1 -0
- package/dist/command-runner.js +348 -0
- package/dist/conversation-prompt.d.ts +2 -2
- package/dist/conversation-prompt.d.ts.map +1 -1
- package/dist/conversation-prompt.js +17 -1
- package/dist/csp.d.ts.map +1 -1
- package/dist/csp.js +3 -0
- package/dist/deps.d.ts +28 -1
- package/dist/deps.d.ts.map +1 -1
- package/dist/deps.js +288 -13
- package/dist/discussion-prompt.d.ts +4 -0
- package/dist/discussion-prompt.d.ts.map +1 -0
- package/dist/discussion-prompt.js +19 -0
- package/dist/editor/agentActionAudit.d.ts +18 -0
- package/dist/editor/agentActionAudit.d.ts.map +1 -0
- package/dist/editor/agentActionAudit.js +80 -0
- package/dist/editor/agentRoutes.d.ts +1 -0
- package/dist/editor/agentRoutes.d.ts.map +1 -1
- package/dist/editor/agentRoutes.js +292 -55
- package/dist/editor/agentSessionRegistry.d.ts +35 -0
- package/dist/editor/agentSessionRegistry.d.ts.map +1 -0
- package/dist/editor/agentSessionRegistry.js +243 -0
- package/dist/editor/completionRoutes.d.ts.map +1 -1
- package/dist/editor/completionRoutes.js +5 -10
- package/dist/editor/languageRoutes.d.ts +12 -1
- package/dist/editor/languageRoutes.d.ts.map +1 -1
- package/dist/editor/languageRoutes.js +71 -8
- package/dist/editor/languageService.d.ts +3 -2
- package/dist/editor/languageService.d.ts.map +1 -1
- package/dist/editor/languageService.js +41 -3
- package/dist/editor/languageServiceHost.d.ts.map +1 -1
- package/dist/editor/languageServiceHost.js +2 -2
- package/dist/editor/lsp/hostLanguageOperation.d.ts +17 -0
- package/dist/editor/lsp/hostLanguageOperation.d.ts.map +1 -0
- package/dist/editor/lsp/hostLanguageOperation.js +436 -0
- package/dist/editor/lsp/hostLanguageProviders.d.ts +26 -0
- package/dist/editor/lsp/hostLanguageProviders.d.ts.map +1 -0
- package/dist/editor/lsp/hostLanguageProviders.js +161 -0
- package/dist/editor/lsp/lspFrameCodec.d.ts +13 -0
- package/dist/editor/lsp/lspFrameCodec.d.ts.map +1 -0
- package/dist/editor/lsp/lspFrameCodec.js +164 -0
- package/dist/editor/lsp/lspJsonRpcClient.d.ts +34 -0
- package/dist/editor/lsp/lspJsonRpcClient.d.ts.map +1 -0
- package/dist/editor/lsp/lspJsonRpcClient.js +173 -0
- package/dist/editor/lsp/lspLanguageProvider.d.ts +7 -0
- package/dist/editor/lsp/lspLanguageProvider.d.ts.map +1 -0
- package/dist/editor/lsp/lspLanguageProvider.js +29 -0
- package/dist/editor/lsp/lspLifecycleLedger.d.ts +5 -0
- package/dist/editor/lsp/lspLifecycleLedger.d.ts.map +1 -0
- package/dist/editor/lsp/lspLifecycleLedger.js +37 -0
- package/dist/editor/lsp/lspNodeAdapter.d.ts +31 -0
- package/dist/editor/lsp/lspNodeAdapter.d.ts.map +1 -0
- package/dist/editor/lsp/lspNodeAdapter.js +230 -0
- package/dist/editor/lsp/lspProcessManager.d.ts +24 -0
- package/dist/editor/lsp/lspProcessManager.d.ts.map +1 -0
- package/dist/editor/lsp/lspProcessManager.js +255 -0
- package/dist/editor/lsp/lspRestartThrottle.d.ts +6 -0
- package/dist/editor/lsp/lspRestartThrottle.d.ts.map +1 -0
- package/dist/editor/lsp/lspRestartThrottle.js +24 -0
- package/dist/editor/lsp/lspStatusRoute.d.ts +8 -0
- package/dist/editor/lsp/lspStatusRoute.d.ts.map +1 -0
- package/dist/editor/lsp/lspStatusRoute.js +22 -0
- package/dist/editor/lsp/lspTransport.d.ts +19 -0
- package/dist/editor/lsp/lspTransport.d.ts.map +1 -0
- package/dist/editor/lsp/lspTransport.js +55 -0
- package/dist/editor/lsp/testing/fakeLspProcess.d.ts +23 -0
- package/dist/editor/lsp/testing/fakeLspProcess.d.ts.map +1 -0
- package/dist/editor/lsp/testing/fakeLspProcess.js +132 -0
- package/dist/files.d.ts +63 -0
- package/dist/files.d.ts.map +1 -1
- package/dist/files.js +799 -1
- package/dist/gateway-readiness.d.ts +6 -0
- package/dist/gateway-readiness.d.ts.map +1 -0
- package/dist/gateway-readiness.js +624 -0
- package/dist/gateway-setup.d.ts +2 -0
- package/dist/gateway-setup.d.ts.map +1 -1
- package/dist/gateway-setup.js +275 -11
- package/dist/gitDelivery/actionSheetProjection.d.ts +30 -0
- package/dist/gitDelivery/actionSheetProjection.d.ts.map +1 -0
- package/dist/gitDelivery/actionSheetProjection.js +206 -0
- package/dist/gitDelivery/actionSheetRoutes.d.ts +29 -0
- package/dist/gitDelivery/actionSheetRoutes.d.ts.map +1 -0
- package/dist/gitDelivery/actionSheetRoutes.js +293 -0
- package/dist/gitDelivery/agentOperationsRoutes.d.ts +33 -0
- package/dist/gitDelivery/agentOperationsRoutes.d.ts.map +1 -0
- package/dist/gitDelivery/agentOperationsRoutes.js +405 -0
- package/dist/gitDelivery/commitRoutes.d.ts +23 -0
- package/dist/gitDelivery/commitRoutes.d.ts.map +1 -0
- package/dist/gitDelivery/commitRoutes.js +204 -0
- package/dist/gitDelivery/evidenceRoutes.d.ts +9 -0
- package/dist/gitDelivery/evidenceRoutes.d.ts.map +1 -0
- package/dist/gitDelivery/evidenceRoutes.js +101 -0
- package/dist/gitDelivery/execution.d.ts +38 -0
- package/dist/gitDelivery/execution.d.ts.map +1 -0
- package/dist/gitDelivery/execution.js +117 -0
- package/dist/gitDelivery/localMutationRoutes.d.ts +30 -0
- package/dist/gitDelivery/localMutationRoutes.d.ts.map +1 -0
- package/dist/gitDelivery/localMutationRoutes.js +165 -0
- package/dist/gitDelivery/mergeExecution.d.ts +63 -0
- package/dist/gitDelivery/mergeExecution.d.ts.map +1 -0
- package/dist/gitDelivery/mergeExecution.js +168 -0
- package/dist/gitDelivery/mergeRoutes.d.ts +12 -0
- package/dist/gitDelivery/mergeRoutes.d.ts.map +1 -0
- package/dist/gitDelivery/mergeRoutes.js +218 -0
- package/dist/gitDelivery/mutationEvidenceLedger.d.ts +23 -0
- package/dist/gitDelivery/mutationEvidenceLedger.d.ts.map +1 -0
- package/dist/gitDelivery/mutationEvidenceLedger.js +87 -0
- package/dist/gitDelivery/prExecution.d.ts +54 -0
- package/dist/gitDelivery/prExecution.d.ts.map +1 -0
- package/dist/gitDelivery/prExecution.js +192 -0
- package/dist/gitDelivery/prRoutes.d.ts +12 -0
- package/dist/gitDelivery/prRoutes.d.ts.map +1 -0
- package/dist/gitDelivery/prRoutes.js +256 -0
- package/dist/gitDelivery/pushExecution.d.ts +43 -0
- package/dist/gitDelivery/pushExecution.d.ts.map +1 -0
- package/dist/gitDelivery/pushExecution.js +124 -0
- package/dist/gitDelivery/pushRoutes.d.ts +12 -0
- package/dist/gitDelivery/pushRoutes.d.ts.map +1 -0
- package/dist/gitDelivery/pushRoutes.js +200 -0
- package/dist/gitDelivery/requestGuards.d.ts +15 -0
- package/dist/gitDelivery/requestGuards.d.ts.map +1 -0
- package/dist/gitDelivery/requestGuards.js +97 -0
- package/dist/gitDelivery/syncEvidence.d.ts +37 -0
- package/dist/gitDelivery/syncEvidence.d.ts.map +1 -0
- package/dist/gitDelivery/syncEvidence.js +85 -0
- package/dist/gitDelivery/syncExecution.d.ts +30 -0
- package/dist/gitDelivery/syncExecution.d.ts.map +1 -0
- package/dist/gitDelivery/syncExecution.js +266 -0
- package/dist/gitDelivery/syncRoutes.d.ts +13 -0
- package/dist/gitDelivery/syncRoutes.d.ts.map +1 -0
- package/dist/gitDelivery/syncRoutes.js +200 -0
- package/dist/gitPorcelainStatus.d.ts +15 -0
- package/dist/gitPorcelainStatus.d.ts.map +1 -0
- package/dist/gitPorcelainStatus.js +104 -0
- package/dist/gitRepositoryReads.d.ts +10 -0
- package/dist/gitRepositoryReads.d.ts.map +1 -0
- package/dist/gitRepositoryReads.js +314 -0
- package/dist/gitRepositoryRoutes.d.ts +7 -0
- package/dist/gitRepositoryRoutes.d.ts.map +1 -0
- package/dist/gitRepositoryRoutes.js +221 -0
- package/dist/gitRoutes.d.ts +66 -0
- package/dist/gitRoutes.d.ts.map +1 -0
- package/dist/gitRoutes.js +543 -0
- package/dist/governed-workflow.d.ts +2 -0
- package/dist/governed-workflow.d.ts.map +1 -1
- package/dist/governed-workflow.js +4 -0
- package/dist/grounded-qa-hybrid.d.ts.map +1 -1
- package/dist/grounded-qa-hybrid.js +2 -0
- package/dist/grounded-qa-multi-source.d.ts.map +1 -1
- package/dist/grounded-qa-multi-source.js +1 -0
- package/dist/grounded-qa.d.ts +11 -0
- package/dist/grounded-qa.d.ts.map +1 -1
- package/dist/grounded-qa.js +14 -4
- package/dist/headers.d.ts +4 -1
- package/dist/headers.d.ts.map +1 -1
- package/dist/headers.js +11 -4
- package/dist/index.d.ts +8 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -1
- package/dist/local-knowledge-grounded-qa.d.ts.map +1 -1
- package/dist/local-knowledge-grounded-qa.js +11 -2
- package/dist/qualityIntelligence/figmaSnapshotRoutes.d.ts +1 -1
- package/dist/qualityIntelligence/figmaSnapshotRoutes.d.ts.map +1 -1
- package/dist/qualityIntelligence/figmaSnapshotRoutes.js +1 -1
- package/dist/read-handlers.d.ts +5 -0
- package/dist/read-handlers.d.ts.map +1 -1
- package/dist/read-handlers.js +57 -1
- package/dist/routes.d.ts.map +1 -1
- package/dist/routes.js +260 -12
- package/dist/run-engine.d.ts.map +1 -1
- package/dist/run-engine.js +3 -0
- package/dist/run-handlers.d.ts +0 -1
- package/dist/run-handlers.d.ts.map +1 -1
- package/dist/run-handlers.js +64 -211
- package/dist/run-request.d.ts +11 -0
- package/dist/run-request.d.ts.map +1 -1
- package/dist/run-request.js +158 -10
- package/dist/runtime/capabilityDetector.d.ts +38 -0
- package/dist/runtime/capabilityDetector.d.ts.map +1 -0
- package/dist/runtime/capabilityDetector.js +443 -0
- package/dist/runtime/capabilityRoutes.d.ts +9 -0
- package/dist/runtime/capabilityRoutes.d.ts.map +1 -0
- package/dist/runtime/capabilityRoutes.js +45 -0
- package/dist/runtime/containerEngineDetector.d.ts +17 -0
- package/dist/runtime/containerEngineDetector.d.ts.map +1 -0
- package/dist/runtime/containerEngineDetector.js +222 -0
- package/dist/runtime/containerRoutes.d.ts +8 -0
- package/dist/runtime/containerRoutes.d.ts.map +1 -0
- package/dist/runtime/containerRoutes.js +207 -0
- package/dist/runtime/containerRunner-errors.d.ts +18 -0
- package/dist/runtime/containerRunner-errors.d.ts.map +1 -0
- package/dist/runtime/containerRunner-errors.js +42 -0
- package/dist/runtime/containerRunner-evidence.d.ts +24 -0
- package/dist/runtime/containerRunner-evidence.d.ts.map +1 -0
- package/dist/runtime/containerRunner-evidence.js +74 -0
- package/dist/runtime/containerRunner.d.ts +37 -0
- package/dist/runtime/containerRunner.d.ts.map +1 -0
- package/dist/runtime/containerRunner.js +443 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +24 -4
- package/dist/store/db.d.ts.map +1 -1
- package/dist/store/db.js +2 -1
- package/dist/store/index.d.ts +1 -1
- package/dist/store/index.d.ts.map +1 -1
- package/dist/store/messages.d.ts +2 -1
- package/dist/store/messages.d.ts.map +1 -1
- package/dist/store/messages.js +46 -4
- package/dist/store/schema.d.ts +1 -1
- package/dist/store/schema.d.ts.map +1 -1
- package/dist/store/schema.js +68 -1
- package/dist/store/types.d.ts +3 -2
- package/dist/store/types.d.ts.map +1 -1
- package/dist/task-workspace/active-store.d.ts +21 -0
- package/dist/task-workspace/active-store.d.ts.map +1 -0
- package/dist/task-workspace/active-store.js +55 -0
- package/dist/task-workspace/authorization.d.ts +7 -0
- package/dist/task-workspace/authorization.d.ts.map +1 -0
- package/dist/task-workspace/authorization.js +54 -0
- package/dist/task-workspace/binding.d.ts +3 -0
- package/dist/task-workspace/binding.d.ts.map +1 -0
- package/dist/task-workspace/binding.js +22 -0
- package/dist/task-workspace/cleanup.d.ts +4 -0
- package/dist/task-workspace/cleanup.d.ts.map +1 -0
- package/dist/task-workspace/cleanup.js +428 -0
- package/dist/task-workspace/errors.d.ts +14 -0
- package/dist/task-workspace/errors.d.ts.map +1 -0
- package/dist/task-workspace/errors.js +81 -0
- package/dist/task-workspace/evidence.d.ts +32 -0
- package/dist/task-workspace/evidence.d.ts.map +1 -0
- package/dist/task-workspace/evidence.js +52 -0
- package/dist/task-workspace/field-safety.d.ts +3 -0
- package/dist/task-workspace/field-safety.d.ts.map +1 -0
- package/dist/task-workspace/field-safety.js +42 -0
- package/dist/task-workspace/health.d.ts +4 -0
- package/dist/task-workspace/health.d.ts.map +1 -0
- package/dist/task-workspace/health.js +163 -0
- package/dist/task-workspace/lifecycle.d.ts +3 -0
- package/dist/task-workspace/lifecycle.d.ts.map +1 -0
- package/dist/task-workspace/lifecycle.js +248 -0
- package/dist/task-workspace/locks.d.ts +13 -0
- package/dist/task-workspace/locks.d.ts.map +1 -0
- package/dist/task-workspace/locks.js +44 -0
- package/dist/task-workspace/managed-root.d.ts +7 -0
- package/dist/task-workspace/managed-root.d.ts.map +1 -0
- package/dist/task-workspace/managed-root.js +98 -0
- package/dist/task-workspace/mutex.d.ts +8 -0
- package/dist/task-workspace/mutex.d.ts.map +1 -0
- package/dist/task-workspace/mutex.js +82 -0
- package/dist/task-workspace/naming.d.ts +15 -0
- package/dist/task-workspace/naming.d.ts.map +1 -0
- package/dist/task-workspace/naming.js +0 -0
- package/dist/task-workspace/provisioning.d.ts +3 -0
- package/dist/task-workspace/provisioning.d.ts.map +1 -0
- package/dist/task-workspace/provisioning.js +528 -0
- package/dist/task-workspace/reconciliation.d.ts +15 -0
- package/dist/task-workspace/reconciliation.d.ts.map +1 -0
- package/dist/task-workspace/reconciliation.js +274 -0
- package/dist/task-workspace/repair.d.ts +3 -0
- package/dist/task-workspace/repair.d.ts.map +1 -0
- package/dist/task-workspace/repair.js +286 -0
- package/dist/task-workspace/routes.d.ts +19 -0
- package/dist/task-workspace/routes.d.ts.map +1 -0
- package/dist/task-workspace/routes.js +481 -0
- package/dist/task-workspace/store.d.ts +12 -0
- package/dist/task-workspace/store.d.ts.map +1 -0
- package/dist/task-workspace/store.js +128 -0
- package/dist/task-workspace/types.d.ts +170 -0
- package/dist/task-workspace/types.d.ts.map +1 -0
- package/dist/task-workspace/types.js +5 -0
- package/dist/voice-action-governance.d.ts +23 -0
- package/dist/voice-action-governance.d.ts.map +1 -0
- package/dist/voice-action-governance.js +126 -0
- package/dist/voice-handlers.d.ts +6 -0
- package/dist/voice-handlers.d.ts.map +1 -0
- package/dist/voice-handlers.js +570 -0
- package/dist/voice-realtime-grounded-tool.d.ts +31 -0
- package/dist/voice-realtime-grounded-tool.d.ts.map +1 -0
- package/dist/voice-realtime-grounded-tool.js +322 -0
- package/dist/voice-realtime.d.ts +69 -0
- package/dist/voice-realtime.d.ts.map +1 -0
- package/dist/voice-realtime.js +787 -0
- package/dist/workspace-state-handlers.d.ts +5 -0
- package/dist/workspace-state-handlers.d.ts.map +1 -0
- package/dist/workspace-state-handlers.js +106 -0
- package/package.json +20 -19
- package/dist/grounded-handoff.d.ts +0 -4
- package/dist/grounded-handoff.d.ts.map +0 -1
- package/dist/grounded-handoff.js +0 -445
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
// BFF voice dictation route (Issue #494, Epic #491, ADR-0058 D1/D2/D4/D6). `POST /api/voice/transcribe`
|
|
2
|
+
// accepts one short controlled composer-dictation clip and returns its transcript. The route is
|
|
3
|
+
// capability-gated: it transcribes only when the resolved voice capability advertises speech-to-text
|
|
4
|
+
// (AC1), and otherwise answers a deterministic, secret-free `VOICE_UNAVAILABLE` so Keiko stays fully
|
|
5
|
+
// usable in no-voice / policy-disabled / unreachable deployments (AC2, ADR-0058 D1).
|
|
6
|
+
//
|
|
7
|
+
// The audio rides inside the existing JSON + CSRF request envelope (base64 `audio` field) so the
|
|
8
|
+
// server's "state-changing requests must be JSON and carry the CSRF guard" invariant is preserved
|
|
9
|
+
// unchanged — no relaxation of the BFF media-type or CSRF gate. The decoded audio is forwarded once
|
|
10
|
+
// to the configured STT provider through the Model Gateway egress seam (`gatewayFetch`, ADR-0038)
|
|
11
|
+
// and is held only in memory for the duration of the request: it is never written to the evidence
|
|
12
|
+
// store, a side file, a log, or any other on-disk location (AC3, "no raw audio persistence").
|
|
13
|
+
// Provider base URLs and credentials never appear in any response (AC4), and every failure is a
|
|
14
|
+
// static, redacted envelope carrying no provider body, URL, path, or network detail (AC5).
|
|
15
|
+
import { requestSpeechToText, requestTextToSpeech, requestTextToSpeechStream, resolveVoiceCapability, selectSpeechOutputModel, selectSpeechToTextModel, selectVoicePersonaVoice, VOICE_PERSONAS, } from "@oscharko-dev/keiko-model-gateway";
|
|
16
|
+
import { errorBody, STREAMING } from "./routes.js";
|
|
17
|
+
import { currentGatewayConfig, currentGatewayEgressConfig } from "./deps.js";
|
|
18
|
+
import { isVoiceDisabledByPolicy } from "./read-handlers.js";
|
|
19
|
+
// The decoded-audio ceiling for one dictation clip. This is the authoritative bound on the
|
|
20
|
+
// transcribable duration: regardless of codec, a clip cannot exceed this many bytes, so the maximum
|
|
21
|
+
// possible duration is bounded even though precise server-side duration measurement would require
|
|
22
|
+
// decoding the container (out of scope — no audio-processing dependency, ADR-0058 supply-chain D8).
|
|
23
|
+
const MAX_AUDIO_BYTES = 4_000_000;
|
|
24
|
+
// The JSON envelope ceiling. base64 inflates by ~4/3, so this comfortably holds MAX_AUDIO_BYTES of
|
|
25
|
+
// base64 plus the small JSON field overhead while still rejecting an oversized body early (413).
|
|
26
|
+
const MAX_BODY_BYTES = 6_000_000;
|
|
27
|
+
// Upper bound on a declared dictation length. "Short controlled dictation" is bounded to two minutes;
|
|
28
|
+
// a clip declaring a longer duration is rejected before any provider call.
|
|
29
|
+
const MAX_DICTATION_MS = 120_000;
|
|
30
|
+
// Closed allowlist of accepted audio container MIME types (base type, parameters such as
|
|
31
|
+
// `;codecs=opus` are stripped before the check). Matches what a browser MediaRecorder commonly emits.
|
|
32
|
+
const ALLOWED_AUDIO_MIME = new Set([
|
|
33
|
+
"audio/webm",
|
|
34
|
+
"audio/ogg",
|
|
35
|
+
"audio/wav",
|
|
36
|
+
"audio/x-wav",
|
|
37
|
+
"audio/wave",
|
|
38
|
+
"audio/mp4",
|
|
39
|
+
"audio/m4a",
|
|
40
|
+
"audio/x-m4a",
|
|
41
|
+
"audio/mpeg",
|
|
42
|
+
"audio/mp3",
|
|
43
|
+
"audio/flac",
|
|
44
|
+
]);
|
|
45
|
+
// A pure base64 payload (no data: URI prefix, no whitespace), length a multiple of four. Anchored so
|
|
46
|
+
// a malformed or injection-shaped value is rejected deterministically before decoding.
|
|
47
|
+
const BASE64_PATTERN = /^[A-Za-z0-9+/]+={0,2}$/;
|
|
48
|
+
// BCP-47-ish language tag, bounded length, anchored so it can never break the multipart field header.
|
|
49
|
+
const LANGUAGE_PATTERN = /^[A-Za-z]{2,3}(?:-[A-Za-z0-9]{2,8})*$/;
|
|
50
|
+
const MAX_LANGUAGE_LENGTH = 16;
|
|
51
|
+
class BodyTooLargeError extends Error {
|
|
52
|
+
constructor() {
|
|
53
|
+
super("request body too large");
|
|
54
|
+
this.name = "BodyTooLargeError";
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
function isRecord(value) {
|
|
58
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
59
|
+
}
|
|
60
|
+
function isRouteResult(value) {
|
|
61
|
+
return isRecord(value) && typeof value.status === "number" && "body" in value;
|
|
62
|
+
}
|
|
63
|
+
function readBody(req) {
|
|
64
|
+
return new Promise((resolveBody, reject) => {
|
|
65
|
+
const chunks = [];
|
|
66
|
+
let total = 0;
|
|
67
|
+
let capped = false;
|
|
68
|
+
req.on("data", (chunk) => {
|
|
69
|
+
total += chunk.length;
|
|
70
|
+
if (total > MAX_BODY_BYTES) {
|
|
71
|
+
if (!capped) {
|
|
72
|
+
capped = true;
|
|
73
|
+
chunks.length = 0;
|
|
74
|
+
reject(new BodyTooLargeError());
|
|
75
|
+
req.resume();
|
|
76
|
+
}
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
chunks.push(chunk);
|
|
80
|
+
});
|
|
81
|
+
req.on("end", () => {
|
|
82
|
+
if (!capped) {
|
|
83
|
+
resolveBody(Buffer.concat(chunks).toString("utf8"));
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
req.on("error", reject);
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
async function readJsonObject(req) {
|
|
90
|
+
let raw;
|
|
91
|
+
try {
|
|
92
|
+
raw = await readBody(req);
|
|
93
|
+
}
|
|
94
|
+
catch (error) {
|
|
95
|
+
if (error instanceof BodyTooLargeError) {
|
|
96
|
+
return {
|
|
97
|
+
status: 413,
|
|
98
|
+
body: errorBody("PAYLOAD_TOO_LARGE", "Request body exceeds the size limit."),
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
throw error;
|
|
102
|
+
}
|
|
103
|
+
let parsed;
|
|
104
|
+
try {
|
|
105
|
+
parsed = raw.length === 0 ? {} : JSON.parse(raw);
|
|
106
|
+
}
|
|
107
|
+
catch {
|
|
108
|
+
return { status: 400, body: errorBody("BAD_REQUEST", "Request body is not valid JSON.") };
|
|
109
|
+
}
|
|
110
|
+
if (!isRecord(parsed)) {
|
|
111
|
+
return { status: 400, body: errorBody("BAD_REQUEST", "Request body must be a JSON object.") };
|
|
112
|
+
}
|
|
113
|
+
return parsed;
|
|
114
|
+
}
|
|
115
|
+
// Deterministic, secret-free disabled response (AC1/AC2, ADR-0058 D1). Returned both when no STT
|
|
116
|
+
// capability is configured/enabled and when a configured provider is selected but missing — the
|
|
117
|
+
// browser sees a stable shape and Keiko remains fully usable.
|
|
118
|
+
function unavailable(deps) {
|
|
119
|
+
return {
|
|
120
|
+
status: 503,
|
|
121
|
+
body: deps.redactor(errorBody("VOICE_UNAVAILABLE", "Speech-to-text dictation is not available.")),
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
function badRequest(deps, code, message) {
|
|
125
|
+
return { status: 400, body: deps.redactor(errorBody(code, message)) };
|
|
126
|
+
}
|
|
127
|
+
// Static, secret-free mapping from a coded adapter failure to an HTTP response (AC5). No provider
|
|
128
|
+
// body, URL, path, IP, or credential is ever interpolated — only fixed operator-safe text.
|
|
129
|
+
function providerErrorResult(deps, kind) {
|
|
130
|
+
if (kind === "rate-limited") {
|
|
131
|
+
return {
|
|
132
|
+
status: 429,
|
|
133
|
+
body: deps.redactor(errorBody("VOICE_RATE_LIMITED", "The speech-to-text provider is rate-limited. Retry shortly.")),
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
if (kind === "timeout") {
|
|
137
|
+
return {
|
|
138
|
+
status: 504,
|
|
139
|
+
body: deps.redactor(errorBody("VOICE_TIMEOUT", "The speech-to-text request timed out.")),
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
if (kind === "payload-too-large") {
|
|
143
|
+
return {
|
|
144
|
+
status: 413,
|
|
145
|
+
body: deps.redactor(errorBody("PAYLOAD_TOO_LARGE", "The audio clip is too large for the speech-to-text provider.")),
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
if (kind === "unsupported-model") {
|
|
149
|
+
// The configured model is not available at the provider — effectively unavailable for dictation.
|
|
150
|
+
return unavailable(deps);
|
|
151
|
+
}
|
|
152
|
+
return {
|
|
153
|
+
status: 502,
|
|
154
|
+
body: deps.redactor(errorBody("VOICE_PROVIDER_ERROR", "The speech-to-text provider could not transcribe the audio.")),
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
function normalizeMimeType(raw) {
|
|
158
|
+
if (typeof raw !== "string") {
|
|
159
|
+
return undefined;
|
|
160
|
+
}
|
|
161
|
+
const base = raw.split(";", 1)[0]?.trim().toLowerCase() ?? "";
|
|
162
|
+
return ALLOWED_AUDIO_MIME.has(base) ? base : undefined;
|
|
163
|
+
}
|
|
164
|
+
function decodeAudio(raw) {
|
|
165
|
+
if (typeof raw !== "string" || raw.length === 0 || !BASE64_PATTERN.test(raw)) {
|
|
166
|
+
return "invalid";
|
|
167
|
+
}
|
|
168
|
+
if (raw.length % 4 !== 0) {
|
|
169
|
+
return "invalid";
|
|
170
|
+
}
|
|
171
|
+
const decoded = Buffer.from(raw, "base64");
|
|
172
|
+
if (decoded.byteLength === 0) {
|
|
173
|
+
return "empty";
|
|
174
|
+
}
|
|
175
|
+
return decoded;
|
|
176
|
+
}
|
|
177
|
+
function validateDurationMs(raw) {
|
|
178
|
+
if (raw === undefined) {
|
|
179
|
+
return "ok";
|
|
180
|
+
}
|
|
181
|
+
if (typeof raw !== "number" ||
|
|
182
|
+
!Number.isFinite(raw) ||
|
|
183
|
+
!Number.isInteger(raw) ||
|
|
184
|
+
raw <= 0 ||
|
|
185
|
+
raw > MAX_DICTATION_MS) {
|
|
186
|
+
return "invalid";
|
|
187
|
+
}
|
|
188
|
+
return "ok";
|
|
189
|
+
}
|
|
190
|
+
function validateLanguage(raw) {
|
|
191
|
+
if (raw === undefined) {
|
|
192
|
+
return { ok: true, language: undefined };
|
|
193
|
+
}
|
|
194
|
+
if (typeof raw !== "string" || raw.length > MAX_LANGUAGE_LENGTH || !LANGUAGE_PATTERN.test(raw)) {
|
|
195
|
+
return { ok: false };
|
|
196
|
+
}
|
|
197
|
+
return { ok: true, language: raw };
|
|
198
|
+
}
|
|
199
|
+
// Validates and normalizes the request fields, returning either the audio payload to transcribe or a
|
|
200
|
+
// deterministic 4xx RouteResult. Order: size → MIME → audio → duration → language.
|
|
201
|
+
function validateRequest(body, deps) {
|
|
202
|
+
const mimeType = normalizeMimeType(body.mimeType);
|
|
203
|
+
if (mimeType === undefined) {
|
|
204
|
+
return badRequest(deps, "UNSUPPORTED_AUDIO_FORMAT", "The audio mimeType is missing or not a supported dictation format.");
|
|
205
|
+
}
|
|
206
|
+
const decoded = decodeAudio(body.audio);
|
|
207
|
+
if (decoded === "invalid") {
|
|
208
|
+
return badRequest(deps, "INVALID_AUDIO", "The audio field must be non-empty base64 data.");
|
|
209
|
+
}
|
|
210
|
+
if (decoded === "empty") {
|
|
211
|
+
return badRequest(deps, "INVALID_AUDIO", "The decoded audio is empty.");
|
|
212
|
+
}
|
|
213
|
+
if (decoded.byteLength > MAX_AUDIO_BYTES) {
|
|
214
|
+
return {
|
|
215
|
+
status: 413,
|
|
216
|
+
body: deps.redactor(errorBody("PAYLOAD_TOO_LARGE", "The audio clip exceeds the size limit.")),
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
if (validateDurationMs(body.durationMs) === "invalid") {
|
|
220
|
+
return badRequest(deps, "INVALID_DURATION", "The declared durationMs must be a positive integer within the dictation limit.");
|
|
221
|
+
}
|
|
222
|
+
const language = validateLanguage(body.language);
|
|
223
|
+
if (!language.ok) {
|
|
224
|
+
return badRequest(deps, "INVALID_LANGUAGE", "The language tag is not a valid BCP-47 language.");
|
|
225
|
+
}
|
|
226
|
+
return {
|
|
227
|
+
audio: decoded,
|
|
228
|
+
mimeType,
|
|
229
|
+
...(language.language !== undefined ? { language: language.language } : {}),
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
// Resolves the configured STT provider to transcribe with, or undefined when none is configured /
|
|
233
|
+
// usable. Mirrors the capability gate: a selected model always has a matching provider record.
|
|
234
|
+
function resolveSttProvider(config) {
|
|
235
|
+
const modelId = selectSpeechToTextModel(config);
|
|
236
|
+
if (modelId === undefined) {
|
|
237
|
+
return undefined;
|
|
238
|
+
}
|
|
239
|
+
return config.providers.find((provider) => provider.modelId === modelId);
|
|
240
|
+
}
|
|
241
|
+
// Capability gate (AC1, ADR-0058 D1): resolves the STT provider to dictate against, or a clean
|
|
242
|
+
// VOICE_UNAVAILABLE RouteResult when no speech-to-text capability is configured, enabled, reachable,
|
|
243
|
+
// or backed by a provider record. Runs before any audio is read so a disabled deployment does zero
|
|
244
|
+
// audio work.
|
|
245
|
+
function selectDictationProvider(deps) {
|
|
246
|
+
const config = currentGatewayConfig(deps);
|
|
247
|
+
const policyDisabled = isVoiceDisabledByPolicy(deps.env);
|
|
248
|
+
const voice = resolveVoiceCapability(config ?? { providers: [] }, { policyDisabled });
|
|
249
|
+
if (config === undefined || !voice.available || !voice.capabilities.speechToText) {
|
|
250
|
+
return unavailable(deps);
|
|
251
|
+
}
|
|
252
|
+
return resolveSttProvider(config) ?? unavailable(deps);
|
|
253
|
+
}
|
|
254
|
+
function buildSttRequest(provider, validated, deps) {
|
|
255
|
+
const egress = provider.egress ?? currentGatewayEgressConfig(deps);
|
|
256
|
+
return {
|
|
257
|
+
endpoint: provider.baseUrl,
|
|
258
|
+
apiKey: provider.apiKey,
|
|
259
|
+
...(provider.apiKeyHeaderName !== undefined
|
|
260
|
+
? { apiKeyHeaderName: provider.apiKeyHeaderName }
|
|
261
|
+
: {}),
|
|
262
|
+
...(provider.endpointStyle !== undefined ? { endpointStyle: provider.endpointStyle } : {}),
|
|
263
|
+
...(provider.apiVersion !== undefined ? { apiVersion: provider.apiVersion } : {}),
|
|
264
|
+
modelId: provider.modelId,
|
|
265
|
+
audio: validated.audio,
|
|
266
|
+
mimeType: validated.mimeType,
|
|
267
|
+
...(validated.language !== undefined ? { language: validated.language } : {}),
|
|
268
|
+
...(egress !== undefined ? { egress } : {}),
|
|
269
|
+
timeoutMs: provider.timeoutMs,
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
// Success body: only the transcript and content-free provider metadata, redacted defensively. The
|
|
273
|
+
// audio buffer is not persisted and goes out of scope (AC3); no credential or base URL is present
|
|
274
|
+
// in this payload (AC4).
|
|
275
|
+
function transcriptResult(deps, value) {
|
|
276
|
+
return {
|
|
277
|
+
status: 200,
|
|
278
|
+
body: deps.redactor({
|
|
279
|
+
transcript: value.transcript,
|
|
280
|
+
...(value.confidence !== undefined ? { confidence: value.confidence } : {}),
|
|
281
|
+
...(value.language !== undefined ? { language: value.language } : {}),
|
|
282
|
+
...(value.durationMs !== undefined ? { durationMs: value.durationMs } : {}),
|
|
283
|
+
}),
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
export async function handleVoiceTranscribe(ctx, deps) {
|
|
287
|
+
const provider = selectDictationProvider(deps);
|
|
288
|
+
if (isRouteResult(provider)) {
|
|
289
|
+
return provider;
|
|
290
|
+
}
|
|
291
|
+
const parsed = await readJsonObject(ctx.req);
|
|
292
|
+
if (isRouteResult(parsed)) {
|
|
293
|
+
return parsed;
|
|
294
|
+
}
|
|
295
|
+
const validated = validateRequest(parsed, deps);
|
|
296
|
+
if (isRouteResult(validated)) {
|
|
297
|
+
return validated;
|
|
298
|
+
}
|
|
299
|
+
const transcribe = deps.voiceTranscriptionRequest ?? requestSpeechToText;
|
|
300
|
+
const outcome = await transcribe(buildSttRequest(provider, validated, deps));
|
|
301
|
+
return outcome.ok
|
|
302
|
+
? transcriptResult(deps, outcome.value)
|
|
303
|
+
: providerErrorResult(deps, outcome.kind);
|
|
304
|
+
}
|
|
305
|
+
// ---------------------------------------------------------------------------
|
|
306
|
+
// BFF assistant speech-output (synthesis) route (Issue #1558, Epic #1556, ADR-0095).
|
|
307
|
+
// `POST /api/voice/speak` synthesizes the visible assistant answer text into audible output through
|
|
308
|
+
// the Model Gateway text-to-speech adapter and returns the audio as base64 inside the standard JSON
|
|
309
|
+
// envelope. It is capability-gated: it synthesizes only when the resolved voice capability advertises
|
|
310
|
+
// speech output (AC1), and otherwise answers a deterministic, secret-free `VOICE_UNAVAILABLE` so the
|
|
311
|
+
// conversation degrades to text without breaking (AC4). The answer text rides inside the existing
|
|
312
|
+
// JSON + CSRF request envelope, so the server's state-changing-request invariant is preserved
|
|
313
|
+
// unchanged. The synthesized audio is held only in memory for the duration of the request and is
|
|
314
|
+
// never written to the evidence store, a side file, a log, or any on-disk location ("no raw generated
|
|
315
|
+
// audio persistence"). Provider base URLs, credentials, and the credential-tier persona → voice-id
|
|
316
|
+
// mapping never appear in any response (the voice id stays server-side), and every failure is a
|
|
317
|
+
// static, redacted envelope carrying no provider body, URL, path, or network detail.
|
|
318
|
+
// ---------------------------------------------------------------------------
|
|
319
|
+
// The upper bound on the answer text submitted for synthesis. It matches the OpenAI-compatible
|
|
320
|
+
// `/audio/speech` input ceiling: rather than truncate a longer answer (which would make the spoken
|
|
321
|
+
// output diverge from the visible text, breaking AC2), an over-long answer is rejected and the spoken
|
|
322
|
+
// layer degrades to text (AC4). The visible assistant text is always present in the transcript.
|
|
323
|
+
const MAX_SPEECH_INPUT_CHARS = 4096;
|
|
324
|
+
// Server-side allowlist of audio container MIME types the synthesis route will label a response with.
|
|
325
|
+
// The adapter resolves the type from the provider response, but the value handed to the browser is
|
|
326
|
+
// canonicalized against this closed set so no provider-controlled string crosses the BFF boundary; an
|
|
327
|
+
// unrecognized type falls back to the broadest-playback default.
|
|
328
|
+
const ALLOWED_SPEECH_MIME = new Set([
|
|
329
|
+
"audio/mpeg",
|
|
330
|
+
"audio/ogg",
|
|
331
|
+
"audio/aac",
|
|
332
|
+
"audio/flac",
|
|
333
|
+
"audio/wav",
|
|
334
|
+
"audio/pcm",
|
|
335
|
+
]);
|
|
336
|
+
const DEFAULT_SPEECH_MIME = "audio/mpeg";
|
|
337
|
+
// Deterministic, secret-free disabled response for the synthesis route (AC1/AC4). Returned both when
|
|
338
|
+
// no speech-output capability is configured/enabled and when a configured provider cannot be resolved.
|
|
339
|
+
function speechUnavailable(deps) {
|
|
340
|
+
return {
|
|
341
|
+
status: 503,
|
|
342
|
+
body: deps.redactor(errorBody("VOICE_UNAVAILABLE", "Assistant speech output is not available.")),
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
// Static, secret-free mapping from a coded synthesis failure to an HTTP response. No provider body,
|
|
346
|
+
// URL, path, IP, or credential is ever interpolated — only fixed operator-safe text (AC4).
|
|
347
|
+
function speechProviderErrorResult(deps, kind) {
|
|
348
|
+
if (kind === "rate-limited") {
|
|
349
|
+
return {
|
|
350
|
+
status: 429,
|
|
351
|
+
body: deps.redactor(errorBody("VOICE_RATE_LIMITED", "The speech-output provider is rate-limited. Retry shortly.")),
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
if (kind === "timeout") {
|
|
355
|
+
return {
|
|
356
|
+
status: 504,
|
|
357
|
+
body: deps.redactor(errorBody("VOICE_TIMEOUT", "The speech-output request timed out.")),
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
if (kind === "payload-too-large") {
|
|
361
|
+
return {
|
|
362
|
+
status: 413,
|
|
363
|
+
body: deps.redactor(errorBody("PAYLOAD_TOO_LARGE", "The assistant response is too long for speech synthesis.")),
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
if (kind === "unsupported-model") {
|
|
367
|
+
// The configured model is not available at the provider — effectively unavailable for synthesis.
|
|
368
|
+
return speechUnavailable(deps);
|
|
369
|
+
}
|
|
370
|
+
return {
|
|
371
|
+
status: 502,
|
|
372
|
+
body: deps.redactor(errorBody("VOICE_PROVIDER_ERROR", "The speech-output provider could not synthesize the audio.")),
|
|
373
|
+
};
|
|
374
|
+
}
|
|
375
|
+
// Capability gate (AC1): confirms speech output is configured, enabled, and reachable before any text
|
|
376
|
+
// is read, so a disabled deployment does zero synthesis work. Returns the active gateway config or a
|
|
377
|
+
// clean VOICE_UNAVAILABLE RouteResult.
|
|
378
|
+
function gateSpeechOutput(deps) {
|
|
379
|
+
const config = currentGatewayConfig(deps);
|
|
380
|
+
const policyDisabled = isVoiceDisabledByPolicy(deps.env);
|
|
381
|
+
const voice = resolveVoiceCapability(config ?? { providers: [] }, { policyDisabled });
|
|
382
|
+
if (config === undefined || !voice.available || !voice.capabilities.speechOutput) {
|
|
383
|
+
return speechUnavailable(deps);
|
|
384
|
+
}
|
|
385
|
+
return config;
|
|
386
|
+
}
|
|
387
|
+
function isVoicePersona(value) {
|
|
388
|
+
return typeof value === "string" && VOICE_PERSONAS.includes(value);
|
|
389
|
+
}
|
|
390
|
+
// Validates the synthesis request: a non-empty bounded answer text and an optional persona drawn from
|
|
391
|
+
// the closed VOICE_PERSONAS set. Order: text presence → text length → persona.
|
|
392
|
+
function validateSpeakRequest(body, deps) {
|
|
393
|
+
const text = body.text;
|
|
394
|
+
if (typeof text !== "string" || text.trim().length === 0) {
|
|
395
|
+
return badRequest(deps, "INVALID_TEXT", "The text field must be a non-empty string.");
|
|
396
|
+
}
|
|
397
|
+
if (text.length > MAX_SPEECH_INPUT_CHARS) {
|
|
398
|
+
return {
|
|
399
|
+
status: 413,
|
|
400
|
+
body: deps.redactor(errorBody("PAYLOAD_TOO_LARGE", "The assistant response is too long for speech synthesis.")),
|
|
401
|
+
};
|
|
402
|
+
}
|
|
403
|
+
if (body.persona !== undefined && !isVoicePersona(body.persona)) {
|
|
404
|
+
return badRequest(deps, "INVALID_PERSONA", "The persona is not a supported voice persona.");
|
|
405
|
+
}
|
|
406
|
+
return {
|
|
407
|
+
text,
|
|
408
|
+
...(isVoicePersona(body.persona) ? { persona: body.persona } : {}),
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
// Resolves the model + provider voice id to synthesize with (Issue #1557 seam, ADR-0094 D6). A
|
|
412
|
+
// requested persona is honored when mapped; otherwise the first persona-mapped provider in canonical
|
|
413
|
+
// order is used; otherwise the cheapest speech-output model with the adapter's default voice. The
|
|
414
|
+
// resolved voice id stays server-side and never reaches a response.
|
|
415
|
+
function resolveSpeechTarget(config, persona) {
|
|
416
|
+
if (persona !== undefined) {
|
|
417
|
+
const mapped = selectVoicePersonaVoice(config, persona);
|
|
418
|
+
if (mapped !== undefined) {
|
|
419
|
+
return mapped;
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
for (const candidate of VOICE_PERSONAS) {
|
|
423
|
+
const mapped = selectVoicePersonaVoice(config, candidate);
|
|
424
|
+
if (mapped !== undefined) {
|
|
425
|
+
return mapped;
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
const modelId = selectSpeechOutputModel(config);
|
|
429
|
+
return modelId === undefined ? undefined : { modelId };
|
|
430
|
+
}
|
|
431
|
+
// The audio container requested for interactive assistant speech. Opus (audio/ogg) is browser-playable
|
|
432
|
+
// and measured ~25-35% faster end-to-end than the previous mp3 default while transferring ~4x fewer
|
|
433
|
+
// bytes (e.g. for a short reply: opus ~1.1s / ~18KB vs mp3 ~1.4s / ~72KB against the live endpoint),
|
|
434
|
+
// which lowers both the synth-to-first-audio wait and the base64 inflation of the JSON envelope. The
|
|
435
|
+
// MIME stays inside the server ALLOWED_SPEECH_MIME allowlist (audio/ogg).
|
|
436
|
+
const INTERACTIVE_SPEECH_FORMAT = "opus";
|
|
437
|
+
function buildTtsRequest(provider, target, validated, deps) {
|
|
438
|
+
const egress = provider.egress ?? currentGatewayEgressConfig(deps);
|
|
439
|
+
return {
|
|
440
|
+
endpoint: provider.baseUrl,
|
|
441
|
+
apiKey: provider.apiKey,
|
|
442
|
+
...(provider.apiKeyHeaderName !== undefined
|
|
443
|
+
? { apiKeyHeaderName: provider.apiKeyHeaderName }
|
|
444
|
+
: {}),
|
|
445
|
+
...(provider.endpointStyle !== undefined ? { endpointStyle: provider.endpointStyle } : {}),
|
|
446
|
+
...(provider.apiVersion !== undefined ? { apiVersion: provider.apiVersion } : {}),
|
|
447
|
+
modelId: provider.modelId,
|
|
448
|
+
input: validated.text,
|
|
449
|
+
responseFormat: INTERACTIVE_SPEECH_FORMAT,
|
|
450
|
+
...(target.voiceId !== undefined ? { voice: target.voiceId } : {}),
|
|
451
|
+
...(egress !== undefined ? { egress } : {}),
|
|
452
|
+
timeoutMs: provider.timeoutMs,
|
|
453
|
+
};
|
|
454
|
+
}
|
|
455
|
+
// Success body: the synthesized audio as base64 plus a canonicalized audio MIME type. The audio is
|
|
456
|
+
// content-free synthesized speech of the already-visible assistant text and carries no credential or
|
|
457
|
+
// URL, so it is NOT passed through the secret redactor — redacting a multi-megabyte base64 blob would
|
|
458
|
+
// risk corrupting the audio with no security benefit. The MIME type is canonicalized against a closed
|
|
459
|
+
// server allowlist so no provider-controlled string crosses the boundary. The audio buffer goes out
|
|
460
|
+
// of scope after this response and is never persisted ("no raw generated audio persistence").
|
|
461
|
+
function speechResult(value) {
|
|
462
|
+
const mimeType = ALLOWED_SPEECH_MIME.has(value.mimeType) ? value.mimeType : DEFAULT_SPEECH_MIME;
|
|
463
|
+
return {
|
|
464
|
+
status: 200,
|
|
465
|
+
body: { audio: Buffer.from(value.audio).toString("base64"), mimeType },
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
// Shared front-matter for both speak routes: gate the capability, parse + validate the request, and
|
|
469
|
+
// resolve the provider + voice target. Returns the resolved request, or a RouteResult to return as-is.
|
|
470
|
+
async function resolveSpeakRequest(ctx, deps) {
|
|
471
|
+
const gated = gateSpeechOutput(deps);
|
|
472
|
+
if (isRouteResult(gated)) {
|
|
473
|
+
return gated;
|
|
474
|
+
}
|
|
475
|
+
const parsed = await readJsonObject(ctx.req);
|
|
476
|
+
if (isRouteResult(parsed)) {
|
|
477
|
+
return parsed;
|
|
478
|
+
}
|
|
479
|
+
const validated = validateSpeakRequest(parsed, deps);
|
|
480
|
+
if (isRouteResult(validated)) {
|
|
481
|
+
return validated;
|
|
482
|
+
}
|
|
483
|
+
const target = resolveSpeechTarget(gated, validated.persona);
|
|
484
|
+
if (target === undefined) {
|
|
485
|
+
return speechUnavailable(deps);
|
|
486
|
+
}
|
|
487
|
+
const provider = gated.providers.find((candidate) => candidate.modelId === target.modelId);
|
|
488
|
+
if (provider === undefined) {
|
|
489
|
+
return speechUnavailable(deps);
|
|
490
|
+
}
|
|
491
|
+
return { validated, provider, target };
|
|
492
|
+
}
|
|
493
|
+
export async function handleVoiceSpeak(ctx, deps) {
|
|
494
|
+
const resolved = await resolveSpeakRequest(ctx, deps);
|
|
495
|
+
if (isRouteResult(resolved)) {
|
|
496
|
+
return resolved;
|
|
497
|
+
}
|
|
498
|
+
const synthesize = deps.voiceSpeechRequest ?? requestTextToSpeech;
|
|
499
|
+
const outcome = await synthesize(buildTtsRequest(resolved.provider, resolved.target, resolved.validated, deps));
|
|
500
|
+
return outcome.ok ? speechResult(outcome.value) : speechProviderErrorResult(deps, outcome.kind);
|
|
501
|
+
}
|
|
502
|
+
// The streaming speak path requests raw PCM (the fastest provider format to first audio) and forwards
|
|
503
|
+
// the bytes to the browser un-buffered (no base64 JSON envelope) for AudioWorklet start-on-first-chunk
|
|
504
|
+
// playback. The buffered /api/voice/speak route stays as the universal fallback.
|
|
505
|
+
const STREAM_SPEECH_FORMAT = "pcm";
|
|
506
|
+
function buildStreamTtsRequest(resolved, deps, signal) {
|
|
507
|
+
return {
|
|
508
|
+
...buildTtsRequest(resolved.provider, resolved.target, resolved.validated, deps),
|
|
509
|
+
responseFormat: STREAM_SPEECH_FORMAT,
|
|
510
|
+
signal,
|
|
511
|
+
};
|
|
512
|
+
}
|
|
513
|
+
// Aborts the synthesis when the client disconnects (res "close" is the canonical signal), so a barge-in
|
|
514
|
+
// or navigation stops the provider stream rather than producing audio no one will hear.
|
|
515
|
+
function abortOnResClose(ctx) {
|
|
516
|
+
const controller = new AbortController();
|
|
517
|
+
ctx.res.on("close", () => {
|
|
518
|
+
controller.abort();
|
|
519
|
+
});
|
|
520
|
+
return controller;
|
|
521
|
+
}
|
|
522
|
+
// Pipes the provider audio stream to the response honoring backpressure (res.write → false aborts) and
|
|
523
|
+
// client disconnect. Once 200 + audio headers are sent no JSON error is possible, so a mid-stream
|
|
524
|
+
// failure just ends the partial stream — the client falls back to the buffered route on the next turn.
|
|
525
|
+
async function pipeAudioStream(ctx, body, controller) {
|
|
526
|
+
const reader = body.getReader();
|
|
527
|
+
try {
|
|
528
|
+
for (;;) {
|
|
529
|
+
const { done, value } = await reader.read();
|
|
530
|
+
if (done || controller.signal.aborted) {
|
|
531
|
+
break;
|
|
532
|
+
}
|
|
533
|
+
if (!ctx.res.write(value)) {
|
|
534
|
+
controller.abort();
|
|
535
|
+
ctx.res.destroy();
|
|
536
|
+
break;
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
catch {
|
|
541
|
+
// partial stream — ended in finally
|
|
542
|
+
}
|
|
543
|
+
finally {
|
|
544
|
+
try {
|
|
545
|
+
await reader.cancel();
|
|
546
|
+
}
|
|
547
|
+
catch {
|
|
548
|
+
// already released
|
|
549
|
+
}
|
|
550
|
+
ctx.res.end();
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
export async function handleVoiceSpeakStream(ctx, deps) {
|
|
554
|
+
const resolved = await resolveSpeakRequest(ctx, deps);
|
|
555
|
+
if (isRouteResult(resolved)) {
|
|
556
|
+
return resolved;
|
|
557
|
+
}
|
|
558
|
+
const controller = abortOnResClose(ctx);
|
|
559
|
+
const synthesizeStream = deps.voiceSpeechStreamRequest ?? requestTextToSpeechStream;
|
|
560
|
+
const outcome = await synthesizeStream(buildStreamTtsRequest(resolved, deps, controller.signal));
|
|
561
|
+
if (!outcome.ok) {
|
|
562
|
+
return speechProviderErrorResult(deps, outcome.kind);
|
|
563
|
+
}
|
|
564
|
+
const mimeType = ALLOWED_SPEECH_MIME.has(outcome.value.mimeType)
|
|
565
|
+
? outcome.value.mimeType
|
|
566
|
+
: DEFAULT_SPEECH_MIME;
|
|
567
|
+
ctx.res.writeHead(200, { "Content-Type": mimeType, "Cache-Control": "no-store" });
|
|
568
|
+
await pipeAudioStream(ctx, outcome.value.body, controller);
|
|
569
|
+
return STREAMING;
|
|
570
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { ConversationMemoryResultWire, GroundedAnswer } from "@oscharko-dev/keiko-contracts/bff-wire";
|
|
2
|
+
import { type UiHandlerDeps } from "./deps.js";
|
|
3
|
+
import { type RouteContext, type RouteResult } from "./routes.js";
|
|
4
|
+
import type { Chat, ChatMessage } from "./store/index.js";
|
|
5
|
+
interface RealtimeGroundedToolOutput {
|
|
6
|
+
readonly status: "ok";
|
|
7
|
+
readonly answer: string;
|
|
8
|
+
readonly groundingKind: GroundedAnswer["groundingKind"];
|
|
9
|
+
readonly elapsedMs: number;
|
|
10
|
+
readonly citations: readonly {
|
|
11
|
+
readonly marker: string;
|
|
12
|
+
readonly label: string;
|
|
13
|
+
readonly source?: string | undefined;
|
|
14
|
+
}[];
|
|
15
|
+
readonly evidenceRunId?: string | undefined;
|
|
16
|
+
readonly persisted: {
|
|
17
|
+
readonly userMessageId: string;
|
|
18
|
+
readonly assistantMessageId: string;
|
|
19
|
+
};
|
|
20
|
+
readonly instruction: string;
|
|
21
|
+
}
|
|
22
|
+
export interface RealtimeGroundedToolResponse {
|
|
23
|
+
readonly chat: Chat;
|
|
24
|
+
readonly messages: readonly ChatMessage[];
|
|
25
|
+
readonly groundedAnswer: GroundedAnswer;
|
|
26
|
+
readonly toolOutput: RealtimeGroundedToolOutput;
|
|
27
|
+
readonly memory?: ConversationMemoryResultWire | undefined;
|
|
28
|
+
}
|
|
29
|
+
export declare function handleRealtimeGroundedVoiceTool(ctx: RouteContext, deps: UiHandlerDeps): Promise<RouteResult>;
|
|
30
|
+
export {};
|
|
31
|
+
//# sourceMappingURL=voice-realtime-grounded-tool.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"voice-realtime-grounded-tool.d.ts","sourceRoot":"","sources":["../src/voice-realtime-grounded-tool.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EACV,4BAA4B,EAC5B,cAAc,EAGf,MAAM,wCAAwC,CAAC;AAGhD,OAAO,EAAwB,KAAK,aAAa,EAAE,MAAM,WAAW,CAAC;AACrE,OAAO,EAAa,KAAK,YAAY,EAAE,KAAK,WAAW,EAAE,MAAM,aAAa,CAAC;AAU7E,OAAO,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAyB1D,UAAU,0BAA0B;IAClC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,aAAa,EAAE,cAAc,CAAC,eAAe,CAAC,CAAC;IACxD,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,SAAS;QAC3B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;QACxB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;QACvB,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;KACtC,EAAE,CAAC;IACJ,QAAQ,CAAC,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5C,QAAQ,CAAC,SAAS,EAAE;QAClB,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;QAC/B,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;KACrC,CAAC;IACF,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,4BAA4B;IAC3C,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;IACpB,QAAQ,CAAC,QAAQ,EAAE,SAAS,WAAW,EAAE,CAAC;IAC1C,QAAQ,CAAC,cAAc,EAAE,cAAc,CAAC;IACxC,QAAQ,CAAC,UAAU,EAAE,0BAA0B,CAAC;IAChD,QAAQ,CAAC,MAAM,CAAC,EAAE,4BAA4B,GAAG,SAAS,CAAC;CAC5D;AA4TD,wBAAsB,+BAA+B,CACnD,GAAG,EAAE,YAAY,EACjB,IAAI,EAAE,aAAa,GAClB,OAAO,CAAC,WAAW,CAAC,CA6BtB"}
|