@juspay/neurolink 9.61.1 → 9.62.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +23 -17
- package/dist/adapters/tts/googleTTSHandler.js +1 -1
- package/dist/browser/neurolink.min.js +382 -364
- package/dist/cli/commands/serve.js +9 -0
- package/dist/cli/commands/voiceServer.d.ts +7 -0
- package/dist/cli/commands/voiceServer.js +9 -1
- package/dist/cli/factories/commandFactory.js +136 -11
- package/dist/cli/loop/optionsSchema.d.ts +1 -1
- package/dist/cli/utils/audioFileUtils.d.ts +3 -3
- package/dist/cli/utils/audioFileUtils.js +5 -1
- package/dist/core/baseProvider.js +29 -6
- package/dist/factories/providerRegistry.d.ts +14 -0
- package/dist/factories/providerRegistry.js +141 -2
- package/dist/lib/adapters/tts/googleTTSHandler.js +1 -1
- package/dist/lib/core/baseProvider.js +29 -6
- package/dist/lib/factories/providerRegistry.d.ts +14 -0
- package/dist/lib/factories/providerRegistry.js +141 -2
- package/dist/lib/mcp/toolRegistry.js +7 -1
- package/dist/lib/neurolink.d.ts +19 -0
- package/dist/lib/neurolink.js +252 -14
- package/dist/lib/observability/exporters/laminarExporter.js +1 -0
- package/dist/lib/observability/exporters/posthogExporter.js +1 -0
- package/dist/lib/observability/utils/spanSerializer.js +1 -0
- package/dist/lib/server/voice/tokenCompare.d.ts +14 -0
- package/dist/lib/server/voice/tokenCompare.js +23 -0
- package/dist/lib/server/voice/voiceServerApp.js +62 -3
- package/dist/lib/server/voice/voiceWebSocketHandler.d.ts +20 -3
- package/dist/lib/server/voice/voiceWebSocketHandler.js +555 -435
- package/dist/lib/types/generate.d.ts +47 -0
- package/dist/lib/types/hitl.d.ts +3 -0
- package/dist/lib/types/index.d.ts +1 -1
- package/dist/lib/types/index.js +1 -1
- package/dist/lib/types/realtime.d.ts +243 -0
- package/dist/lib/types/realtime.js +70 -0
- package/dist/lib/types/server.d.ts +68 -0
- package/dist/lib/types/span.d.ts +2 -0
- package/dist/lib/types/span.js +2 -0
- package/dist/lib/types/stream.d.ts +36 -14
- package/dist/lib/types/stt.d.ts +585 -0
- package/dist/lib/types/stt.js +90 -0
- package/dist/lib/types/tools.d.ts +2 -0
- package/dist/lib/types/tts.d.ts +23 -11
- package/dist/lib/types/tts.js +7 -0
- package/dist/lib/types/voice.d.ts +272 -0
- package/dist/lib/types/voice.js +137 -0
- package/dist/lib/utils/audioFormatDetector.d.ts +15 -0
- package/dist/lib/utils/audioFormatDetector.js +34 -0
- package/dist/lib/utils/errorHandling.js +4 -0
- package/dist/lib/utils/sttProcessor.d.ts +115 -0
- package/dist/lib/utils/sttProcessor.js +295 -0
- package/dist/lib/voice/RealtimeVoiceAPI.d.ts +183 -0
- package/dist/lib/voice/RealtimeVoiceAPI.js +439 -0
- package/dist/lib/voice/audio-utils.d.ts +135 -0
- package/dist/lib/voice/audio-utils.js +435 -0
- package/dist/lib/voice/errors.d.ts +123 -0
- package/dist/lib/voice/errors.js +386 -0
- package/dist/lib/voice/index.d.ts +26 -0
- package/dist/lib/voice/index.js +55 -0
- package/dist/lib/voice/providers/AzureSTT.d.ts +47 -0
- package/dist/lib/voice/providers/AzureSTT.js +345 -0
- package/dist/lib/voice/providers/AzureTTS.d.ts +59 -0
- package/dist/lib/voice/providers/AzureTTS.js +349 -0
- package/dist/lib/voice/providers/DeepgramSTT.d.ts +40 -0
- package/dist/lib/voice/providers/DeepgramSTT.js +550 -0
- package/dist/lib/voice/providers/ElevenLabsTTS.d.ts +53 -0
- package/dist/lib/voice/providers/ElevenLabsTTS.js +311 -0
- package/dist/lib/voice/providers/GeminiLive.d.ts +52 -0
- package/dist/lib/voice/providers/GeminiLive.js +372 -0
- package/dist/lib/voice/providers/GoogleSTT.d.ts +60 -0
- package/dist/lib/voice/providers/GoogleSTT.js +454 -0
- package/dist/lib/voice/providers/OpenAIRealtime.d.ts +47 -0
- package/dist/lib/voice/providers/OpenAIRealtime.js +412 -0
- package/dist/lib/voice/providers/OpenAISTT.d.ts +41 -0
- package/dist/lib/voice/providers/OpenAISTT.js +286 -0
- package/dist/lib/voice/providers/OpenAITTS.d.ts +49 -0
- package/dist/lib/voice/providers/OpenAITTS.js +271 -0
- package/dist/lib/voice/stream-handler.d.ts +166 -0
- package/dist/lib/voice/stream-handler.js +514 -0
- package/dist/mcp/toolRegistry.js +7 -1
- package/dist/neurolink.d.ts +19 -0
- package/dist/neurolink.js +252 -14
- package/dist/observability/exporters/laminarExporter.js +1 -0
- package/dist/observability/exporters/posthogExporter.js +1 -0
- package/dist/observability/utils/spanSerializer.js +1 -0
- package/dist/server/voice/tokenCompare.d.ts +14 -0
- package/dist/server/voice/tokenCompare.js +22 -0
- package/dist/server/voice/voiceServerApp.js +62 -3
- package/dist/server/voice/voiceWebSocketHandler.d.ts +20 -3
- package/dist/server/voice/voiceWebSocketHandler.js +555 -435
- package/dist/types/generate.d.ts +47 -0
- package/dist/types/hitl.d.ts +3 -0
- package/dist/types/index.d.ts +1 -1
- package/dist/types/index.js +1 -1
- package/dist/types/realtime.d.ts +243 -0
- package/dist/types/realtime.js +69 -0
- package/dist/types/server.d.ts +68 -0
- package/dist/types/span.d.ts +2 -0
- package/dist/types/span.js +2 -0
- package/dist/types/stream.d.ts +36 -14
- package/dist/types/stt.d.ts +585 -0
- package/dist/types/stt.js +89 -0
- package/dist/types/tools.d.ts +2 -0
- package/dist/types/tts.d.ts +23 -11
- package/dist/types/tts.js +7 -0
- package/dist/types/voice.d.ts +272 -0
- package/dist/types/voice.js +136 -0
- package/dist/utils/audioFormatDetector.d.ts +15 -0
- package/dist/utils/audioFormatDetector.js +33 -0
- package/dist/utils/errorHandling.js +4 -0
- package/dist/utils/sttProcessor.d.ts +115 -0
- package/dist/utils/sttProcessor.js +294 -0
- package/dist/voice/RealtimeVoiceAPI.d.ts +183 -0
- package/dist/voice/RealtimeVoiceAPI.js +438 -0
- package/dist/voice/audio-utils.d.ts +135 -0
- package/dist/voice/audio-utils.js +434 -0
- package/dist/voice/errors.d.ts +123 -0
- package/dist/voice/errors.js +385 -0
- package/dist/voice/index.d.ts +26 -0
- package/dist/voice/index.js +54 -0
- package/dist/voice/providers/AzureSTT.d.ts +47 -0
- package/dist/voice/providers/AzureSTT.js +344 -0
- package/dist/voice/providers/AzureTTS.d.ts +59 -0
- package/dist/voice/providers/AzureTTS.js +348 -0
- package/dist/voice/providers/DeepgramSTT.d.ts +40 -0
- package/dist/voice/providers/DeepgramSTT.js +549 -0
- package/dist/voice/providers/ElevenLabsTTS.d.ts +53 -0
- package/dist/voice/providers/ElevenLabsTTS.js +310 -0
- package/dist/voice/providers/GeminiLive.d.ts +52 -0
- package/dist/voice/providers/GeminiLive.js +371 -0
- package/dist/voice/providers/GoogleSTT.d.ts +60 -0
- package/dist/voice/providers/GoogleSTT.js +453 -0
- package/dist/voice/providers/OpenAIRealtime.d.ts +47 -0
- package/dist/voice/providers/OpenAIRealtime.js +411 -0
- package/dist/voice/providers/OpenAISTT.d.ts +41 -0
- package/dist/voice/providers/OpenAISTT.js +285 -0
- package/dist/voice/providers/OpenAITTS.d.ts +49 -0
- package/dist/voice/providers/OpenAITTS.js +270 -0
- package/dist/voice/stream-handler.d.ts +166 -0
- package/dist/voice/stream-handler.js +513 -0
- package/package.json +5 -2
package/dist/neurolink.d.ts
CHANGED
|
@@ -764,6 +764,25 @@ export declare class NeuroLink {
|
|
|
764
764
|
private validateStreamRequestOptions;
|
|
765
765
|
private maybeHandleWorkflowStreamRequest;
|
|
766
766
|
private runStandardStreamRequest;
|
|
767
|
+
/**
|
|
768
|
+
* TTS Mode 2 synthesis helper for the stream() pipeline.
|
|
769
|
+
*
|
|
770
|
+
* m5 — extracted from runStandardStreamRequest so the surrounding generator
|
|
771
|
+
* stays under the max-lines-per-function lint budget. Behaviour preserved
|
|
772
|
+
* exactly:
|
|
773
|
+
* - When Mode 2 is enabled (`tts.enabled && tts.useAiResponse`) AND the
|
|
774
|
+
* model produced non-empty content: synthesises one final audio buffer
|
|
775
|
+
* and returns it as an `audioChunk` for the caller to `yield`. Resolves
|
|
776
|
+
* `ttsResolver` with the `TTSResult`.
|
|
777
|
+
* - When Mode 2 is enabled but synthesis fails: logs a warning and resolves
|
|
778
|
+
* `ttsResolver` with `undefined`.
|
|
779
|
+
* - When Mode 2 is requested but skipped (empty content / wrong mode):
|
|
780
|
+
* resolves `ttsResolver` with `undefined` early so callers awaiting
|
|
781
|
+
* `result.audio` unblock before the surrounding `finally` cleanup
|
|
782
|
+
* completes (Issue 7 latency micro-opt — the finally block also resolves
|
|
783
|
+
* defensively, so this is a redundant early signal, not a coverage fix).
|
|
784
|
+
*/
|
|
785
|
+
private synthesizeStreamModeTwo;
|
|
767
786
|
/**
|
|
768
787
|
* Prepare stream options: initialize memory, MCP, retrieval, orchestration,
|
|
769
788
|
* Ollama tool auto-disable, factory processing, and tool detection.
|
package/dist/neurolink.js
CHANGED
|
@@ -59,7 +59,6 @@ import { TaskManager } from "./tasks/taskManager.js";
|
|
|
59
59
|
import { createTaskTools } from "./tasks/tools/taskTools.js";
|
|
60
60
|
import { ATTR } from "./telemetry/attributes.js";
|
|
61
61
|
import { tracers } from "./telemetry/tracers.js";
|
|
62
|
-
// NEW: Generate function imports
|
|
63
62
|
import { getConversationMessages, storeConversationTurn, } from "./utils/conversationMemory.js";
|
|
64
63
|
// Enhanced error handling imports
|
|
65
64
|
import { CircuitBreaker, ERROR_CODES, ErrorFactory, isAbortError, isRetriableError, logStructuredError, NeuroLinkError, withRetry, withTimeout, } from "./utils/errorHandling.js";
|
|
@@ -2933,7 +2932,15 @@ Current user's request: ${currentInput}`;
|
|
|
2933
2932
|
? optionsOrPrompt.length
|
|
2934
2933
|
: options.input?.text?.length || 0);
|
|
2935
2934
|
generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
|
|
2936
|
-
|
|
2935
|
+
// When STT audio is provided, ensure options.input exists (the transcription
|
|
2936
|
+
// will supply the text inside runStandardGenerateRequest) and skip text validation.
|
|
2937
|
+
const hasSttAudio = !!(options.stt?.enabled && options.stt?.audio);
|
|
2938
|
+
if (hasSttAudio && !options.input) {
|
|
2939
|
+
options.input = { text: "" };
|
|
2940
|
+
}
|
|
2941
|
+
if (!hasSttAudio) {
|
|
2942
|
+
this.assertInputText(options.input?.text, "Input text is required and must be a non-empty string");
|
|
2943
|
+
}
|
|
2937
2944
|
this.enforceSessionBudget(options.maxBudgetUsd);
|
|
2938
2945
|
this.applyGenerateLifecycleMiddleware(options);
|
|
2939
2946
|
await this.applyAuthenticatedRequestContext(options);
|
|
@@ -2941,11 +2948,27 @@ Current user's request: ${currentInput}`;
|
|
|
2941
2948
|
}
|
|
2942
2949
|
async maybeHandleEarlyGenerateResult(options, generateSpan) {
|
|
2943
2950
|
if (options.workflow || options.workflowConfig) {
|
|
2951
|
+
if (options.stt?.enabled && options.stt?.audio) {
|
|
2952
|
+
// prepareGenerateRequest synthesizes input.text = "" for audio-only
|
|
2953
|
+
// calls, so without this guard generateWithWorkflow runs with an
|
|
2954
|
+
// empty prompt. Fail fast when there's no text fallback.
|
|
2955
|
+
if (!options.input?.text?.trim()) {
|
|
2956
|
+
throw new Error("STT audio is not supported with workflow mode without input.text");
|
|
2957
|
+
}
|
|
2958
|
+
logger.warn("[NeuroLink] STT audio preprocessing is not supported with workflow mode; audio will be ignored");
|
|
2959
|
+
}
|
|
2944
2960
|
return this.generateWithWorkflow(options);
|
|
2945
2961
|
}
|
|
2946
2962
|
if (options.output?.mode !== "ppt") {
|
|
2947
2963
|
return null;
|
|
2948
2964
|
}
|
|
2965
|
+
if (options.stt?.enabled && options.stt?.audio) {
|
|
2966
|
+
// Same fail-fast as the workflow branch — see comment above.
|
|
2967
|
+
if (!options.input?.text?.trim()) {
|
|
2968
|
+
throw new Error("STT audio is not supported with PPT mode without input.text");
|
|
2969
|
+
}
|
|
2970
|
+
logger.warn("[NeuroLink] STT audio preprocessing is not supported with PPT mode; audio will be ignored");
|
|
2971
|
+
}
|
|
2949
2972
|
const pptResult = await this.generateWithPPT(options);
|
|
2950
2973
|
generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
|
|
2951
2974
|
if (pptResult.analytics) {
|
|
@@ -2976,16 +2999,72 @@ Current user's request: ${currentInput}`;
|
|
|
2976
2999
|
}
|
|
2977
3000
|
await this.prepareGenerateAugmentations(options);
|
|
2978
3001
|
const textOptions = await this.buildGenerateTextOptions(options, originalPrompt, factoryResult);
|
|
3002
|
+
// STT preprocessing: transcribe audio input before LLM generation
|
|
3003
|
+
let sttTranscription;
|
|
3004
|
+
if (options.stt?.enabled && options.stt.audio) {
|
|
3005
|
+
try {
|
|
3006
|
+
// Always call — registerAllProviders() is idempotent via internal
|
|
3007
|
+
// `registered` + `registrationPromise` deduplication. The previous
|
|
3008
|
+
// isRegistered() guard short-circuited even when STT handler
|
|
3009
|
+
// registration failed silently after AI providers were registered.
|
|
3010
|
+
await ProviderRegistry.registerAllProviders();
|
|
3011
|
+
const { STTProcessor } = await import("./utils/sttProcessor.js");
|
|
3012
|
+
const sttProvider = options.stt.provider ?? "whisper";
|
|
3013
|
+
sttTranscription = await STTProcessor.transcribe(options.stt.audio, sttProvider, options.stt);
|
|
3014
|
+
// Inject transcription into the LLM prompt
|
|
3015
|
+
if (sttTranscription.text) {
|
|
3016
|
+
const existingText = textOptions.prompt || textOptions.input?.text || "";
|
|
3017
|
+
if (!existingText) {
|
|
3018
|
+
// No user text — use transcription directly as the prompt
|
|
3019
|
+
textOptions.prompt = sttTranscription.text;
|
|
3020
|
+
if (textOptions.input) {
|
|
3021
|
+
textOptions.input.text = sttTranscription.text;
|
|
3022
|
+
}
|
|
3023
|
+
}
|
|
3024
|
+
else {
|
|
3025
|
+
// User provided text — prepend transcription as context
|
|
3026
|
+
const combined = `[Transcribed audio]: ${sttTranscription.text}\n\n${existingText}`;
|
|
3027
|
+
if (textOptions.prompt) {
|
|
3028
|
+
textOptions.prompt = combined;
|
|
3029
|
+
}
|
|
3030
|
+
if (textOptions.input?.text) {
|
|
3031
|
+
textOptions.input.text = combined;
|
|
3032
|
+
}
|
|
3033
|
+
}
|
|
3034
|
+
}
|
|
3035
|
+
}
|
|
3036
|
+
catch (sttError) {
|
|
3037
|
+
const existingText = textOptions.prompt || textOptions.input?.text || "";
|
|
3038
|
+
if (!existingText) {
|
|
3039
|
+
// Audio-only request — no text to fall back to, fail fast
|
|
3040
|
+
throw sttError;
|
|
3041
|
+
}
|
|
3042
|
+
logger.warn(`[NeuroLink] STT transcription failed, falling back to text: ${sttError instanceof Error ? sttError.message : String(sttError)}`);
|
|
3043
|
+
}
|
|
3044
|
+
}
|
|
2979
3045
|
const textResult = await this.generateTextInternal(textOptions);
|
|
2980
|
-
|
|
3046
|
+
// For STT-only calls, originalPrompt was captured before transcription.
|
|
3047
|
+
// Use the transcribed text as the effective prompt for telemetry, memory,
|
|
3048
|
+
// and trace attribution so traces don't show empty prompts.
|
|
3049
|
+
const effectiveOriginalPrompt = sttTranscription?.text
|
|
3050
|
+
? originalPrompt
|
|
3051
|
+
? `[Transcribed audio]: ${sttTranscription.text}\n\n${originalPrompt}`
|
|
3052
|
+
: sttTranscription.text
|
|
3053
|
+
: originalPrompt;
|
|
3054
|
+
// Attach STT transcription to result
|
|
3055
|
+
const generateResult = this.finalizeGenerateRequestResult({
|
|
2981
3056
|
generateSpan,
|
|
2982
3057
|
options,
|
|
2983
3058
|
textOptions,
|
|
2984
3059
|
textResult,
|
|
2985
3060
|
factoryResult,
|
|
2986
|
-
originalPrompt,
|
|
3061
|
+
originalPrompt: effectiveOriginalPrompt,
|
|
2987
3062
|
startTime,
|
|
2988
3063
|
});
|
|
3064
|
+
if (sttTranscription) {
|
|
3065
|
+
generateResult.transcription = sttTranscription;
|
|
3066
|
+
}
|
|
3067
|
+
return generateResult;
|
|
2989
3068
|
}
|
|
2990
3069
|
async maybeApplyGenerateOrchestration(options) {
|
|
2991
3070
|
if (!this.enableOrchestration || options.provider || options.model) {
|
|
@@ -3080,6 +3159,7 @@ Current user's request: ${currentInput}`;
|
|
|
3080
3159
|
input: options.input,
|
|
3081
3160
|
region: options.region,
|
|
3082
3161
|
tts: options.tts,
|
|
3162
|
+
stt: options.stt,
|
|
3083
3163
|
fileRegistry: this.fileRegistry,
|
|
3084
3164
|
timeout: options.timeout,
|
|
3085
3165
|
abortSignal: options.abortSignal,
|
|
@@ -3124,7 +3204,13 @@ Current user's request: ${currentInput}`;
|
|
|
3124
3204
|
toolsUsed: textResult.toolsUsed,
|
|
3125
3205
|
timestamp: Date.now(),
|
|
3126
3206
|
result: textResult,
|
|
3127
|
-
prompt
|
|
3207
|
+
// Use the effective prompt (which already incorporates STT-transcribed
|
|
3208
|
+
// text for audio-only calls) so observers see the real prompt instead
|
|
3209
|
+
// of an empty string. Falls back through the same chain as before for
|
|
3210
|
+
// text-only calls.
|
|
3211
|
+
prompt: originalPrompt ||
|
|
3212
|
+
options.input?.text ||
|
|
3213
|
+
options.prompt,
|
|
3128
3214
|
temperature: textOptions.temperature,
|
|
3129
3215
|
maxTokens: textOptions.maxTokens,
|
|
3130
3216
|
// A2 fix: Signal that Pipeline A (AI SDK → @langfuse/otel) already
|
|
@@ -3167,6 +3253,7 @@ Current user's request: ${currentInput}`;
|
|
|
3167
3253
|
}
|
|
3168
3254
|
: undefined,
|
|
3169
3255
|
audio: textResult.audio,
|
|
3256
|
+
transcription: textResult.transcription,
|
|
3170
3257
|
video: textResult.video,
|
|
3171
3258
|
ppt: textResult.ppt,
|
|
3172
3259
|
...(textResult.retries && { retries: textResult.retries }),
|
|
@@ -5088,7 +5175,38 @@ Current user's request: ${currentInput}`;
|
|
|
5088
5175
|
const startTime = Date.now();
|
|
5089
5176
|
const hrTimeStart = process.hrtime.bigint();
|
|
5090
5177
|
const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
|
5091
|
-
|
|
5178
|
+
// STT preprocessing for stream(): transcribe audio buffer (not realtime frames)
|
|
5179
|
+
// and inject into the prompt before validation/execution. Mirrors generate().
|
|
5180
|
+
const sttOptions = options.stt;
|
|
5181
|
+
const sttAudio = sttOptions?.audio;
|
|
5182
|
+
const hasStreamSttAudio = !!(sttOptions?.enabled && sttAudio);
|
|
5183
|
+
let streamSttTranscription;
|
|
5184
|
+
if (hasStreamSttAudio && sttOptions && sttAudio) {
|
|
5185
|
+
if (!options.input) {
|
|
5186
|
+
options.input = { text: "" };
|
|
5187
|
+
}
|
|
5188
|
+
try {
|
|
5189
|
+
// registerAllProviders() is idempotent; always call.
|
|
5190
|
+
await ProviderRegistry.registerAllProviders();
|
|
5191
|
+
const { STTProcessor } = await import("./utils/sttProcessor.js");
|
|
5192
|
+
const sttProvider = sttOptions.provider ?? "whisper";
|
|
5193
|
+
streamSttTranscription = await STTProcessor.transcribe(sttAudio, sttProvider, sttOptions);
|
|
5194
|
+
if (streamSttTranscription.text) {
|
|
5195
|
+
const existingText = options.input.text || "";
|
|
5196
|
+
options.input.text = existingText
|
|
5197
|
+
? `[Transcribed audio]: ${streamSttTranscription.text}\n\n${existingText}`
|
|
5198
|
+
: streamSttTranscription.text;
|
|
5199
|
+
}
|
|
5200
|
+
}
|
|
5201
|
+
catch (sttError) {
|
|
5202
|
+
const existingText = options.input.text || "";
|
|
5203
|
+
if (!existingText) {
|
|
5204
|
+
throw sttError;
|
|
5205
|
+
}
|
|
5206
|
+
logger.warn(`[NeuroLink] Stream STT transcription failed, falling back to text: ${sttError instanceof Error ? sttError.message : String(sttError)}`);
|
|
5207
|
+
}
|
|
5208
|
+
}
|
|
5209
|
+
const originalPrompt = options.input?.text ?? "";
|
|
5092
5210
|
options.fileRegistry = this.fileRegistry;
|
|
5093
5211
|
await this.validateStreamRequestOptions(options, startTime);
|
|
5094
5212
|
const workflowResult = await this.maybeHandleWorkflowStreamRequest({
|
|
@@ -5100,7 +5218,21 @@ Current user's request: ${currentInput}`;
|
|
|
5100
5218
|
if (workflowResult) {
|
|
5101
5219
|
return workflowResult;
|
|
5102
5220
|
}
|
|
5103
|
-
|
|
5221
|
+
// TTS Mode 2 deferred: stream() emits text first, then synthesizes the
|
|
5222
|
+
// accumulated response into a single audio chunk at end-of-stream and
|
|
5223
|
+
// resolves `streamResult.audio` with the same TTSResult. The resolver is
|
|
5224
|
+
// plumbed explicitly through the params bag (M11: previously a
|
|
5225
|
+
// `_streamTtsResolve` cast on the caller's options object — fragile if
|
|
5226
|
+
// the same options object was reused across concurrent stream() calls).
|
|
5227
|
+
const ttsOptions = options.tts;
|
|
5228
|
+
const wantsStreamTtsMode2 = !!(ttsOptions?.enabled && ttsOptions?.useAiResponse);
|
|
5229
|
+
let resolveStreamTtsAudio;
|
|
5230
|
+
const streamTtsAudioPromise = wantsStreamTtsMode2
|
|
5231
|
+
? new Promise((resolve) => {
|
|
5232
|
+
resolveStreamTtsAudio = resolve;
|
|
5233
|
+
})
|
|
5234
|
+
: undefined;
|
|
5235
|
+
const streamResult = await this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
|
|
5104
5236
|
options,
|
|
5105
5237
|
streamSpan,
|
|
5106
5238
|
spanStartTime,
|
|
@@ -5108,7 +5240,15 @@ Current user's request: ${currentInput}`;
|
|
|
5108
5240
|
hrTimeStart,
|
|
5109
5241
|
streamId,
|
|
5110
5242
|
originalPrompt,
|
|
5243
|
+
ttsResolver: resolveStreamTtsAudio,
|
|
5111
5244
|
}));
|
|
5245
|
+
if (streamSttTranscription) {
|
|
5246
|
+
streamResult.transcription = streamSttTranscription;
|
|
5247
|
+
}
|
|
5248
|
+
if (streamTtsAudioPromise) {
|
|
5249
|
+
streamResult.audio = streamTtsAudioPromise;
|
|
5250
|
+
}
|
|
5251
|
+
return streamResult;
|
|
5112
5252
|
}
|
|
5113
5253
|
catch (error) {
|
|
5114
5254
|
streamSpan.setStatus({
|
|
@@ -5159,7 +5299,7 @@ Current user's request: ${currentInput}`;
|
|
|
5159
5299
|
return result;
|
|
5160
5300
|
}
|
|
5161
5301
|
async runStandardStreamRequest(params) {
|
|
5162
|
-
const { options, streamSpan, spanStartTime, startTime, hrTimeStart, streamId, originalPrompt, } = params;
|
|
5302
|
+
const { options, streamSpan, spanStartTime, startTime, hrTimeStart, streamId, originalPrompt, ttsResolver, } = params;
|
|
5163
5303
|
logger.debug("[NeuroLink] Running standard stream request", {
|
|
5164
5304
|
streamId,
|
|
5165
5305
|
provider: options.provider,
|
|
@@ -5244,6 +5384,7 @@ Current user's request: ${currentInput}`;
|
|
|
5244
5384
|
typeof chunk === "object" &&
|
|
5245
5385
|
"type" in chunk &&
|
|
5246
5386
|
(chunk.type === "audio" ||
|
|
5387
|
+
chunk.type === "tts_audio" ||
|
|
5247
5388
|
chunk.type === "image");
|
|
5248
5389
|
if (!isNoOutputSentinel && (hasTextContent || hasMediaPayload)) {
|
|
5249
5390
|
realOutputChunks++;
|
|
@@ -5278,6 +5419,22 @@ Current user's request: ${currentInput}`;
|
|
|
5278
5419
|
accumulatedContent += content;
|
|
5279
5420
|
});
|
|
5280
5421
|
}
|
|
5422
|
+
// TTS Mode 2 for stream(): synthesize the accumulated response
|
|
5423
|
+
// and yield ONE final audio chunk so callers iterating the stream
|
|
5424
|
+
// get the audio inline; also resolve `streamResult.audio` so the
|
|
5425
|
+
// ergonomic `await result.audio` pattern works post-iteration.
|
|
5426
|
+
// m5: synthesis logic lives in a dedicated helper to keep this
|
|
5427
|
+
// generator under the max-lines-per-function lint budget.
|
|
5428
|
+
const ttsModeResult = await self.synthesizeStreamModeTwo({
|
|
5429
|
+
ttsOptions: enhancedOptions.tts,
|
|
5430
|
+
providerName,
|
|
5431
|
+
fallbackProvider: enhancedOptions.provider,
|
|
5432
|
+
accumulatedContent,
|
|
5433
|
+
ttsResolver,
|
|
5434
|
+
});
|
|
5435
|
+
if (ttsModeResult.audioChunk) {
|
|
5436
|
+
yield ttsModeResult.audioChunk;
|
|
5437
|
+
}
|
|
5281
5438
|
resolvedUsage = streamUsage;
|
|
5282
5439
|
if (!resolvedUsage && streamAnalytics) {
|
|
5283
5440
|
try {
|
|
@@ -5343,6 +5500,14 @@ Current user's request: ${currentInput}`;
|
|
|
5343
5500
|
throw error;
|
|
5344
5501
|
}
|
|
5345
5502
|
finally {
|
|
5503
|
+
// Belt-and-braces: if TTS Mode 2 was requested but synthesis never
|
|
5504
|
+
// ran (stream errored before reaching the TTS block, or Mode 2 path
|
|
5505
|
+
// was skipped), resolve the audio promise to undefined so callers
|
|
5506
|
+
// awaiting `streamResult.audio` never hang. Uses the explicit
|
|
5507
|
+
// `ttsResolver` param (M11), not a side-channel cast.
|
|
5508
|
+
// m4: a duplicate resolution is a silent no-op — Promise resolvers
|
|
5509
|
+
// never throw, so no try/catch needed here.
|
|
5510
|
+
ttsResolver?.(undefined);
|
|
5346
5511
|
logger.debug("[NeuroLink.stream] Stream finished, performing cleanup", {
|
|
5347
5512
|
provider: providerName,
|
|
5348
5513
|
model: enhancedOptions.model,
|
|
@@ -5489,6 +5654,67 @@ Current user's request: ${currentInput}`;
|
|
|
5489
5654
|
return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
|
|
5490
5655
|
}
|
|
5491
5656
|
}
|
|
5657
|
+
/**
|
|
5658
|
+
* TTS Mode 2 synthesis helper for the stream() pipeline.
|
|
5659
|
+
*
|
|
5660
|
+
* m5 — extracted from runStandardStreamRequest so the surrounding generator
|
|
5661
|
+
* stays under the max-lines-per-function lint budget. Behaviour preserved
|
|
5662
|
+
* exactly:
|
|
5663
|
+
* - When Mode 2 is enabled (`tts.enabled && tts.useAiResponse`) AND the
|
|
5664
|
+
* model produced non-empty content: synthesises one final audio buffer
|
|
5665
|
+
* and returns it as an `audioChunk` for the caller to `yield`. Resolves
|
|
5666
|
+
* `ttsResolver` with the `TTSResult`.
|
|
5667
|
+
* - When Mode 2 is enabled but synthesis fails: logs a warning and resolves
|
|
5668
|
+
* `ttsResolver` with `undefined`.
|
|
5669
|
+
* - When Mode 2 is requested but skipped (empty content / wrong mode):
|
|
5670
|
+
* resolves `ttsResolver` with `undefined` early so callers awaiting
|
|
5671
|
+
* `result.audio` unblock before the surrounding `finally` cleanup
|
|
5672
|
+
* completes (Issue 7 latency micro-opt — the finally block also resolves
|
|
5673
|
+
* defensively, so this is a redundant early signal, not a coverage fix).
|
|
5674
|
+
*/
|
|
5675
|
+
async synthesizeStreamModeTwo(params) {
|
|
5676
|
+
const { ttsOptions, providerName, fallbackProvider, accumulatedContent, ttsResolver, } = params;
|
|
5677
|
+
if (!ttsOptions?.enabled ||
|
|
5678
|
+
!ttsOptions.useAiResponse ||
|
|
5679
|
+
accumulatedContent.trim().length === 0) {
|
|
5680
|
+
ttsResolver?.(undefined);
|
|
5681
|
+
return {};
|
|
5682
|
+
}
|
|
5683
|
+
try {
|
|
5684
|
+
const { TTSProcessor } = await import("./utils/ttsProcessor.js");
|
|
5685
|
+
// ttsOptions.provider takes precedence; otherwise fall back to the
|
|
5686
|
+
// chat provider ID ONLY when it happens to be a registered TTS handler
|
|
5687
|
+
// (e.g. "google-ai" works for both LLM and TTS). For LLM-only IDs like
|
|
5688
|
+
// "anthropic", we'd otherwise complete generation and then fail synth —
|
|
5689
|
+
// surface that mismatch up front instead.
|
|
5690
|
+
const candidate = ttsOptions.provider ?? fallbackProvider ?? providerName;
|
|
5691
|
+
const ttsProvider = candidate && TTSProcessor.supports(candidate) ? candidate : undefined;
|
|
5692
|
+
if (!ttsProvider) {
|
|
5693
|
+
throw new Error(`No TTS provider resolved for stream Mode 2 (set tts.provider explicitly — chat provider "${candidate ?? "<unset>"}" is not a registered TTS handler)`);
|
|
5694
|
+
}
|
|
5695
|
+
const ttsResult = await TTSProcessor.synthesize(accumulatedContent, ttsProvider, ttsOptions);
|
|
5696
|
+
ttsResolver?.(ttsResult);
|
|
5697
|
+
return {
|
|
5698
|
+
audioChunk: {
|
|
5699
|
+
type: "tts_audio",
|
|
5700
|
+
audio: {
|
|
5701
|
+
data: ttsResult.buffer,
|
|
5702
|
+
format: ttsResult.format,
|
|
5703
|
+
index: 0,
|
|
5704
|
+
isFinal: true,
|
|
5705
|
+
cumulativeSize: ttsResult.size,
|
|
5706
|
+
voice: ttsResult.voice,
|
|
5707
|
+
sampleRate: ttsResult.sampleRate,
|
|
5708
|
+
},
|
|
5709
|
+
},
|
|
5710
|
+
};
|
|
5711
|
+
}
|
|
5712
|
+
catch (ttsError) {
|
|
5713
|
+
logger.warn(`[NeuroLink.stream] Stream TTS Mode 2 synthesis failed: ${ttsError instanceof Error ? ttsError.message : String(ttsError)}`);
|
|
5714
|
+
ttsResolver?.(undefined);
|
|
5715
|
+
return {};
|
|
5716
|
+
}
|
|
5717
|
+
}
|
|
5492
5718
|
/**
|
|
5493
5719
|
* Prepare stream options: initialize memory, MCP, retrieval, orchestration,
|
|
5494
5720
|
* Ollama tool auto-disable, factory processing, and tool detection.
|
|
@@ -5519,8 +5745,15 @@ Current user's request: ${currentInput}`;
|
|
|
5519
5745
|
orchestratedModel: orchestratedOptions.model,
|
|
5520
5746
|
prompt: options.input.text?.substring(0, 100),
|
|
5521
5747
|
});
|
|
5522
|
-
// Use orchestrated options
|
|
5523
|
-
|
|
5748
|
+
// Use orchestrated options — rebind the local `options` to a fresh
|
|
5749
|
+
// merged object instead of mutating the caller-supplied one
|
|
5750
|
+
// (NEW2: avoids cross-call contamination when callers reuse options).
|
|
5751
|
+
// Issue 6: extract to an explicit local so the rebind intent is
|
|
5752
|
+
// obvious to future readers, and the lint suppression is scoped
|
|
5753
|
+
// narrowly to the one statement that actually rebinds the param.
|
|
5754
|
+
const mergedOptions = { ...options, ...orchestratedOptions };
|
|
5755
|
+
// eslint-disable-next-line no-param-reassign -- see NEW2/Issue 6 above
|
|
5756
|
+
options = mergedOptions;
|
|
5524
5757
|
// Re-resolve model alias in case orchestration returned an alias
|
|
5525
5758
|
if (orchestratedOptions.model) {
|
|
5526
5759
|
options.model = resolveModel(options.model, this.modelAliasConfig);
|
|
@@ -5787,6 +6020,7 @@ Current user's request: ${currentInput}`;
|
|
|
5787
6020
|
typeof fallbackChunk === "object" &&
|
|
5788
6021
|
"type" in fallbackChunk &&
|
|
5789
6022
|
(fallbackChunk.type === "audio" ||
|
|
6023
|
+
fallbackChunk.type === "tts_audio" ||
|
|
5790
6024
|
fallbackChunk.type === "image");
|
|
5791
6025
|
if (!isFallbackNoOutputSentinel &&
|
|
5792
6026
|
(fallbackHasTextContent || fallbackHasMediaPayload)) {
|
|
@@ -5909,8 +6143,10 @@ Current user's request: ${currentInput}`;
|
|
|
5909
6143
|
const hasAudio = !!(options?.input?.audio &&
|
|
5910
6144
|
options.input.audio.frames &&
|
|
5911
6145
|
typeof options.input.audio.frames[Symbol.asyncIterator] === "function");
|
|
5912
|
-
|
|
5913
|
-
|
|
6146
|
+
// STT pre-recorded audio buffer counts as input — transcription will fill text.
|
|
6147
|
+
const hasSttAudio = !!(options?.stt?.enabled && options?.stt?.audio);
|
|
6148
|
+
if (!hasText && !hasAudio && !hasSttAudio) {
|
|
6149
|
+
throw new Error("Stream options must include either input.text, input.audio, or stt.audio");
|
|
5914
6150
|
}
|
|
5915
6151
|
}
|
|
5916
6152
|
/**
|
|
@@ -7199,6 +7435,7 @@ Current user's request: ${currentInput}`;
|
|
|
7199
7435
|
inputSize: inputStr.length,
|
|
7200
7436
|
truncatedInput: inputStr.length > 2048 ? inputStr.substring(0, 2048) : inputStr,
|
|
7201
7437
|
options,
|
|
7438
|
+
hitlState: { triggered: false },
|
|
7202
7439
|
};
|
|
7203
7440
|
}
|
|
7204
7441
|
async executeToolWithSpan(toolName, params, options, executionContext, toolSpan) {
|
|
@@ -7305,7 +7542,7 @@ Current user's request: ${currentInput}`;
|
|
|
7305
7542
|
circuitBreakerState: prepared.circuitBreaker.getState(),
|
|
7306
7543
|
});
|
|
7307
7544
|
const result = await prepared.circuitBreaker.execute(async () => {
|
|
7308
|
-
return withRetry(async () => withTimeout(this.executeToolInternal(toolName, params, prepared.finalOptions), prepared.finalOptions.timeout, ErrorFactory.toolTimeout(toolName, prepared.finalOptions.timeout)), {
|
|
7545
|
+
return withRetry(async () => withTimeout(this.executeToolInternal(toolName, params, prepared.finalOptions, executionContext.hitlState), prepared.finalOptions.timeout, ErrorFactory.toolTimeout(toolName, prepared.finalOptions.timeout)), {
|
|
7309
7546
|
maxAttempts: prepared.finalOptions.maxRetries + 1,
|
|
7310
7547
|
delayMs: prepared.finalOptions.retryDelayMs,
|
|
7311
7548
|
isRetriable: isRetriableError,
|
|
@@ -7526,7 +7763,7 @@ Current user's request: ${currentInput}`;
|
|
|
7526
7763
|
* - Annotations: skip cache for destructive tools, retry safe tools on failure
|
|
7527
7764
|
* - Middleware: apply global middleware chain before execution
|
|
7528
7765
|
*/
|
|
7529
|
-
async executeToolInternal(toolName, params, options) {
|
|
7766
|
+
async executeToolInternal(toolName, params, options, HITLState) {
|
|
7530
7767
|
const functionTag = "NeuroLink.executeToolInternal";
|
|
7531
7768
|
// === MCP ENHANCEMENT: Infer annotations for cache/retry decisions ===
|
|
7532
7769
|
const toolAnnotations = this.getToolAnnotationsForExecution(toolName);
|
|
@@ -7645,6 +7882,7 @@ Current user's request: ${currentInput}`;
|
|
|
7645
7882
|
const context = {
|
|
7646
7883
|
...storedContext,
|
|
7647
7884
|
...passedAuthContext,
|
|
7885
|
+
hitlState: HITLState,
|
|
7648
7886
|
};
|
|
7649
7887
|
logger.debug(`[Using merged context for unified registry tool:`, {
|
|
7650
7888
|
toolName,
|
|
@@ -233,6 +233,7 @@ export class LaminarExporter extends BaseExporter {
|
|
|
233
233
|
[SpanType.PPT_GENERATION]: "custom",
|
|
234
234
|
[SpanType.WORKFLOW]: "workflow",
|
|
235
235
|
[SpanType.TTS]: "custom",
|
|
236
|
+
[SpanType.STT]: "custom",
|
|
236
237
|
[SpanType.SERVER_REQUEST]: "custom",
|
|
237
238
|
[SpanType.CUSTOM]: "custom",
|
|
238
239
|
};
|
|
@@ -216,6 +216,7 @@ export class PostHogExporter extends BaseExporter {
|
|
|
216
216
|
[SpanType.PPT_GENERATION]: "ai_ppt_generation",
|
|
217
217
|
[SpanType.WORKFLOW]: "ai_workflow",
|
|
218
218
|
[SpanType.TTS]: "ai_tts_synthesis",
|
|
219
|
+
[SpanType.STT]: "ai_stt_transcription",
|
|
219
220
|
[SpanType.SERVER_REQUEST]: "ai_server_request",
|
|
220
221
|
[SpanType.CUSTOM]: "ai_custom_span",
|
|
221
222
|
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Constant-time bearer-token comparison.
|
|
3
|
+
*
|
|
4
|
+
* Bug 2 mitigation: a normal `===` compare on bearer tokens leaks the token
|
|
5
|
+
* length and the position of the first mismatching byte through timing
|
|
6
|
+
* differences, which is reachable when the voice server is bound publicly
|
|
7
|
+
* (`VOICE_SERVER_ALLOW_PUBLIC=1`).
|
|
8
|
+
*
|
|
9
|
+
* Returns `false` for any comparison whose lengths differ — this avoids the
|
|
10
|
+
* `RangeError` that `crypto.timingSafeEqual` throws on mismatched buffers
|
|
11
|
+
* while still preserving the constant-time property for equal-length inputs
|
|
12
|
+
* (which is the only case an attacker can probe).
|
|
13
|
+
*/
|
|
14
|
+
export declare function timingSafeEqualString(provided: string, expected: string): boolean;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import crypto from "crypto";
|
|
2
|
+
/**
|
|
3
|
+
* Constant-time bearer-token comparison.
|
|
4
|
+
*
|
|
5
|
+
* Bug 2 mitigation: a normal `===` compare on bearer tokens leaks the token
|
|
6
|
+
* length and the position of the first mismatching byte through timing
|
|
7
|
+
* differences, which is reachable when the voice server is bound publicly
|
|
8
|
+
* (`VOICE_SERVER_ALLOW_PUBLIC=1`).
|
|
9
|
+
*
|
|
10
|
+
* Returns `false` for any comparison whose lengths differ — this avoids the
|
|
11
|
+
* `RangeError` that `crypto.timingSafeEqual` throws on mismatched buffers
|
|
12
|
+
* while still preserving the constant-time property for equal-length inputs
|
|
13
|
+
* (which is the only case an attacker can probe).
|
|
14
|
+
*/
|
|
15
|
+
export function timingSafeEqualString(provided, expected) {
|
|
16
|
+
const providedBuf = Buffer.from(provided, "utf8");
|
|
17
|
+
const expectedBuf = Buffer.from(expected, "utf8");
|
|
18
|
+
if (providedBuf.length !== expectedBuf.length) {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
return crypto.timingSafeEqual(providedBuf, expectedBuf);
|
|
22
|
+
}
|
|
@@ -4,6 +4,7 @@ import http from "http";
|
|
|
4
4
|
import path from "path";
|
|
5
5
|
import { fileURLToPath } from "url";
|
|
6
6
|
import { setupWebSocket } from "./voiceWebSocketHandler.js";
|
|
7
|
+
import { timingSafeEqualString } from "./tokenCompare.js";
|
|
7
8
|
import { NeuroLink } from "../../neurolink.js";
|
|
8
9
|
import { logger } from "../../utils/logger.js";
|
|
9
10
|
import { withTimeout } from "../../utils/async/withTimeout.js";
|
|
@@ -30,6 +31,50 @@ function resolvePublicPath() {
|
|
|
30
31
|
}
|
|
31
32
|
export async function startVoiceServer(port = 3000) {
|
|
32
33
|
const app = express();
|
|
34
|
+
// NEW11: refuse to bind to non-loopback interfaces unless the operator
|
|
35
|
+
// has explicitly opted in. The voice server has minimal hardening and
|
|
36
|
+
// exposing it publicly without a token leaks Soniox / Cartesia / LLM
|
|
37
|
+
// credit usage to anyone who can reach the listener.
|
|
38
|
+
const allowPublic = process.env.VOICE_SERVER_ALLOW_PUBLIC === "1";
|
|
39
|
+
const host = allowPublic
|
|
40
|
+
? (process.env.VOICE_SERVER_HOST ?? "0.0.0.0")
|
|
41
|
+
: "127.0.0.1";
|
|
42
|
+
// NEW11: optional shared-secret bearer token for both HTTP and WebSocket
|
|
43
|
+
// upgrade. When VOICE_SERVER_AUTH_TOKEN is set, every HTTP request must
|
|
44
|
+
// carry `Authorization: Bearer <token>`. The WS upgrade additionally
|
|
45
|
+
// accepts `?token=<token>` because browser WebSocket constructors cannot
|
|
46
|
+
// set custom headers — see voiceWebSocketHandler.verifyClient. HTTP routes
|
|
47
|
+
// intentionally reject `?token=` (would leak via Referer + access logs).
|
|
48
|
+
const authToken = process.env.VOICE_SERVER_AUTH_TOKEN;
|
|
49
|
+
/* ---------- BODY LIMITS + AUTH ---------- */
|
|
50
|
+
// NEW11: cap JSON / urlencoded body to 100kb. Express's default is 100kb
|
|
51
|
+
// for json() but only when explicitly registered; without this any future
|
|
52
|
+
// body parser would default to whatever its own limit is.
|
|
53
|
+
app.use(express.json({ limit: "100kb" }));
|
|
54
|
+
app.use(express.urlencoded({ limit: "100kb", extended: false }));
|
|
55
|
+
// NEW11: minimal HTTP auth middleware. Skips when no token is configured
|
|
56
|
+
// (back-compat — local-only dev keeps working). Skips for /health so
|
|
57
|
+
// load-balancers can probe without credentials.
|
|
58
|
+
if (authToken) {
|
|
59
|
+
app.use((req, res, next) => {
|
|
60
|
+
if (req.path === "/health") {
|
|
61
|
+
return next();
|
|
62
|
+
}
|
|
63
|
+
const header = req.header("authorization");
|
|
64
|
+
// Bug 3 fix: HTTP routes only accept the bearer header. The `?token=`
|
|
65
|
+
// fallback exists only on the WS upgrade where the browser API cannot
|
|
66
|
+
// attach headers — using it on regular HTTP would leak credentials via
|
|
67
|
+
// Referer headers, browser history, server access logs, and proxies.
|
|
68
|
+
const provided = header?.startsWith("Bearer ")
|
|
69
|
+
? header.slice(7)
|
|
70
|
+
: undefined;
|
|
71
|
+
if (!provided || !timingSafeEqualString(provided, authToken)) {
|
|
72
|
+
res.status(401).json({ error: "Unauthorized" });
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
next();
|
|
76
|
+
});
|
|
77
|
+
}
|
|
33
78
|
/* ---------- STATIC FILES ---------- */
|
|
34
79
|
const publicPath = resolvePublicPath();
|
|
35
80
|
logger.info("[SERVER] Serving static from:", publicPath);
|
|
@@ -41,15 +86,29 @@ export async function startVoiceServer(port = 3000) {
|
|
|
41
86
|
app.get("/health", (_, res) => {
|
|
42
87
|
res.json({ status: "ok" });
|
|
43
88
|
});
|
|
89
|
+
/* ---------- ERROR HANDLER ---------- */
|
|
90
|
+
// NEW11: global Express error handler so synchronous and async errors are
|
|
91
|
+
// caught instead of crashing the process or leaking stack traces.
|
|
92
|
+
app.use((err, _req, res, _next) => {
|
|
93
|
+
logger.error(`[SERVER] Unhandled error: ${err instanceof Error ? err.message : String(err)}`);
|
|
94
|
+
if (!res.headersSent) {
|
|
95
|
+
res.status(500).json({ error: "Internal server error" });
|
|
96
|
+
}
|
|
97
|
+
});
|
|
44
98
|
const server = http.createServer(app);
|
|
45
99
|
/* ---------- WS ---------- */
|
|
46
|
-
|
|
100
|
+
// NEW11: pass the auth token + allow-public flag through to the WS handler
|
|
101
|
+
// so it can verify clients on upgrade and apply maxPayload caps.
|
|
102
|
+
setupWebSocket(server, { authToken, maxPayload: 1_048_576 });
|
|
47
103
|
/* ---------- START ---------- */
|
|
48
104
|
await new Promise((resolve, reject) => {
|
|
49
105
|
server.once("error", reject);
|
|
50
|
-
server.listen(port, () => {
|
|
106
|
+
server.listen(port, host, () => {
|
|
51
107
|
server.removeListener("error", reject);
|
|
52
|
-
|
|
108
|
+
const exposure = allowPublic
|
|
109
|
+
? `bound publicly on ${host}:${port} (VOICE_SERVER_ALLOW_PUBLIC=1)`
|
|
110
|
+
: `bound to loopback ${host}:${port} (set VOICE_SERVER_ALLOW_PUBLIC=1 to expose externally)`;
|
|
111
|
+
logger.info(`[SERVER] Voice server running — ${exposure}${authToken ? " (auth required)" : " (no auth — token via VOICE_SERVER_AUTH_TOKEN recommended)"}`);
|
|
53
112
|
resolve();
|
|
54
113
|
});
|
|
55
114
|
});
|
|
@@ -1,7 +1,24 @@
|
|
|
1
1
|
import type { Server as HttpServer } from "http";
|
|
2
2
|
/**
|
|
3
|
-
*
|
|
4
|
-
*
|
|
3
|
+
* Voice-server-mode environment configuration.
|
|
4
|
+
*
|
|
5
|
+
* @deprecated NEW12 — this used to mutate `process.env.NEUROLINK_DISABLE_MCP_TOOLS`
|
|
6
|
+
* which is process-wide. That broke any embedder that called this function in
|
|
7
|
+
* a process which ALSO used NeuroLink for non-voice work. The disable-tools
|
|
8
|
+
* intent is now passed explicitly via `disableTools: true` on every NeuroLink
|
|
9
|
+
* `generate()` / `stream()` call inside this server (see line ~167). Calling
|
|
10
|
+
* this function is now a no-op kept for backwards compatibility.
|
|
5
11
|
*/
|
|
6
12
|
export declare function configureVoiceServerEnvironment(): void;
|
|
7
|
-
|
|
13
|
+
/**
|
|
14
|
+
* Returns a copy of an outbound Soniox payload with the API key redacted.
|
|
15
|
+
*
|
|
16
|
+
* Use this whenever debug logging the auth frame — never JSON.stringify the
|
|
17
|
+
* raw object. (C3 mitigation: prevents the Soniox API key from leaking into
|
|
18
|
+
* any aggregated log sink even if a future debug statement serialises the
|
|
19
|
+
* outbound payload.)
|
|
20
|
+
*/
|
|
21
|
+
export declare function redactSonioxAuth<T extends {
|
|
22
|
+
api_key?: string;
|
|
23
|
+
}>(payload: T): T;
|
|
24
|
+
export declare function setupWebSocket(server: HttpServer, options?: import("../../types/index.js").ServerVoiceWebSocketOptions): void;
|