@juspay/neurolink 9.61.1 → 9.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +23 -17
  3. package/dist/adapters/tts/googleTTSHandler.js +1 -1
  4. package/dist/browser/neurolink.min.js +382 -364
  5. package/dist/cli/commands/serve.js +9 -0
  6. package/dist/cli/commands/voiceServer.d.ts +7 -0
  7. package/dist/cli/commands/voiceServer.js +9 -1
  8. package/dist/cli/factories/commandFactory.js +136 -11
  9. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  10. package/dist/cli/utils/audioFileUtils.d.ts +3 -3
  11. package/dist/cli/utils/audioFileUtils.js +5 -1
  12. package/dist/core/baseProvider.js +29 -6
  13. package/dist/factories/providerRegistry.d.ts +14 -0
  14. package/dist/factories/providerRegistry.js +141 -2
  15. package/dist/lib/adapters/tts/googleTTSHandler.js +1 -1
  16. package/dist/lib/core/baseProvider.js +29 -6
  17. package/dist/lib/factories/providerRegistry.d.ts +14 -0
  18. package/dist/lib/factories/providerRegistry.js +141 -2
  19. package/dist/lib/mcp/toolRegistry.js +7 -1
  20. package/dist/lib/neurolink.d.ts +19 -0
  21. package/dist/lib/neurolink.js +252 -14
  22. package/dist/lib/observability/exporters/laminarExporter.js +1 -0
  23. package/dist/lib/observability/exporters/posthogExporter.js +1 -0
  24. package/dist/lib/observability/utils/spanSerializer.js +1 -0
  25. package/dist/lib/server/voice/tokenCompare.d.ts +14 -0
  26. package/dist/lib/server/voice/tokenCompare.js +23 -0
  27. package/dist/lib/server/voice/voiceServerApp.js +62 -3
  28. package/dist/lib/server/voice/voiceWebSocketHandler.d.ts +20 -3
  29. package/dist/lib/server/voice/voiceWebSocketHandler.js +555 -435
  30. package/dist/lib/types/generate.d.ts +47 -0
  31. package/dist/lib/types/hitl.d.ts +3 -0
  32. package/dist/lib/types/index.d.ts +1 -1
  33. package/dist/lib/types/index.js +1 -1
  34. package/dist/lib/types/realtime.d.ts +243 -0
  35. package/dist/lib/types/realtime.js +70 -0
  36. package/dist/lib/types/server.d.ts +68 -0
  37. package/dist/lib/types/span.d.ts +2 -0
  38. package/dist/lib/types/span.js +2 -0
  39. package/dist/lib/types/stream.d.ts +36 -14
  40. package/dist/lib/types/stt.d.ts +585 -0
  41. package/dist/lib/types/stt.js +90 -0
  42. package/dist/lib/types/tools.d.ts +2 -0
  43. package/dist/lib/types/tts.d.ts +23 -11
  44. package/dist/lib/types/tts.js +7 -0
  45. package/dist/lib/types/voice.d.ts +272 -0
  46. package/dist/lib/types/voice.js +137 -0
  47. package/dist/lib/utils/audioFormatDetector.d.ts +15 -0
  48. package/dist/lib/utils/audioFormatDetector.js +34 -0
  49. package/dist/lib/utils/errorHandling.js +4 -0
  50. package/dist/lib/utils/sttProcessor.d.ts +115 -0
  51. package/dist/lib/utils/sttProcessor.js +295 -0
  52. package/dist/lib/voice/RealtimeVoiceAPI.d.ts +183 -0
  53. package/dist/lib/voice/RealtimeVoiceAPI.js +439 -0
  54. package/dist/lib/voice/audio-utils.d.ts +135 -0
  55. package/dist/lib/voice/audio-utils.js +435 -0
  56. package/dist/lib/voice/errors.d.ts +123 -0
  57. package/dist/lib/voice/errors.js +386 -0
  58. package/dist/lib/voice/index.d.ts +26 -0
  59. package/dist/lib/voice/index.js +55 -0
  60. package/dist/lib/voice/providers/AzureSTT.d.ts +47 -0
  61. package/dist/lib/voice/providers/AzureSTT.js +345 -0
  62. package/dist/lib/voice/providers/AzureTTS.d.ts +59 -0
  63. package/dist/lib/voice/providers/AzureTTS.js +349 -0
  64. package/dist/lib/voice/providers/DeepgramSTT.d.ts +40 -0
  65. package/dist/lib/voice/providers/DeepgramSTT.js +550 -0
  66. package/dist/lib/voice/providers/ElevenLabsTTS.d.ts +53 -0
  67. package/dist/lib/voice/providers/ElevenLabsTTS.js +311 -0
  68. package/dist/lib/voice/providers/GeminiLive.d.ts +52 -0
  69. package/dist/lib/voice/providers/GeminiLive.js +372 -0
  70. package/dist/lib/voice/providers/GoogleSTT.d.ts +60 -0
  71. package/dist/lib/voice/providers/GoogleSTT.js +454 -0
  72. package/dist/lib/voice/providers/OpenAIRealtime.d.ts +47 -0
  73. package/dist/lib/voice/providers/OpenAIRealtime.js +412 -0
  74. package/dist/lib/voice/providers/OpenAISTT.d.ts +41 -0
  75. package/dist/lib/voice/providers/OpenAISTT.js +286 -0
  76. package/dist/lib/voice/providers/OpenAITTS.d.ts +49 -0
  77. package/dist/lib/voice/providers/OpenAITTS.js +271 -0
  78. package/dist/lib/voice/stream-handler.d.ts +166 -0
  79. package/dist/lib/voice/stream-handler.js +514 -0
  80. package/dist/mcp/toolRegistry.js +7 -1
  81. package/dist/neurolink.d.ts +19 -0
  82. package/dist/neurolink.js +252 -14
  83. package/dist/observability/exporters/laminarExporter.js +1 -0
  84. package/dist/observability/exporters/posthogExporter.js +1 -0
  85. package/dist/observability/utils/spanSerializer.js +1 -0
  86. package/dist/server/voice/tokenCompare.d.ts +14 -0
  87. package/dist/server/voice/tokenCompare.js +22 -0
  88. package/dist/server/voice/voiceServerApp.js +62 -3
  89. package/dist/server/voice/voiceWebSocketHandler.d.ts +20 -3
  90. package/dist/server/voice/voiceWebSocketHandler.js +555 -435
  91. package/dist/types/generate.d.ts +47 -0
  92. package/dist/types/hitl.d.ts +3 -0
  93. package/dist/types/index.d.ts +1 -1
  94. package/dist/types/index.js +1 -1
  95. package/dist/types/realtime.d.ts +243 -0
  96. package/dist/types/realtime.js +69 -0
  97. package/dist/types/server.d.ts +68 -0
  98. package/dist/types/span.d.ts +2 -0
  99. package/dist/types/span.js +2 -0
  100. package/dist/types/stream.d.ts +36 -14
  101. package/dist/types/stt.d.ts +585 -0
  102. package/dist/types/stt.js +89 -0
  103. package/dist/types/tools.d.ts +2 -0
  104. package/dist/types/tts.d.ts +23 -11
  105. package/dist/types/tts.js +7 -0
  106. package/dist/types/voice.d.ts +272 -0
  107. package/dist/types/voice.js +136 -0
  108. package/dist/utils/audioFormatDetector.d.ts +15 -0
  109. package/dist/utils/audioFormatDetector.js +33 -0
  110. package/dist/utils/errorHandling.js +4 -0
  111. package/dist/utils/sttProcessor.d.ts +115 -0
  112. package/dist/utils/sttProcessor.js +294 -0
  113. package/dist/voice/RealtimeVoiceAPI.d.ts +183 -0
  114. package/dist/voice/RealtimeVoiceAPI.js +438 -0
  115. package/dist/voice/audio-utils.d.ts +135 -0
  116. package/dist/voice/audio-utils.js +434 -0
  117. package/dist/voice/errors.d.ts +123 -0
  118. package/dist/voice/errors.js +385 -0
  119. package/dist/voice/index.d.ts +26 -0
  120. package/dist/voice/index.js +54 -0
  121. package/dist/voice/providers/AzureSTT.d.ts +47 -0
  122. package/dist/voice/providers/AzureSTT.js +344 -0
  123. package/dist/voice/providers/AzureTTS.d.ts +59 -0
  124. package/dist/voice/providers/AzureTTS.js +348 -0
  125. package/dist/voice/providers/DeepgramSTT.d.ts +40 -0
  126. package/dist/voice/providers/DeepgramSTT.js +549 -0
  127. package/dist/voice/providers/ElevenLabsTTS.d.ts +53 -0
  128. package/dist/voice/providers/ElevenLabsTTS.js +310 -0
  129. package/dist/voice/providers/GeminiLive.d.ts +52 -0
  130. package/dist/voice/providers/GeminiLive.js +371 -0
  131. package/dist/voice/providers/GoogleSTT.d.ts +60 -0
  132. package/dist/voice/providers/GoogleSTT.js +453 -0
  133. package/dist/voice/providers/OpenAIRealtime.d.ts +47 -0
  134. package/dist/voice/providers/OpenAIRealtime.js +411 -0
  135. package/dist/voice/providers/OpenAISTT.d.ts +41 -0
  136. package/dist/voice/providers/OpenAISTT.js +285 -0
  137. package/dist/voice/providers/OpenAITTS.d.ts +49 -0
  138. package/dist/voice/providers/OpenAITTS.js +270 -0
  139. package/dist/voice/stream-handler.d.ts +166 -0
  140. package/dist/voice/stream-handler.js +513 -0
  141. package/package.json +5 -2
package/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## [9.62.0](https://github.com/juspay/neurolink/compare/v9.61.2...v9.62.0) (2026-05-07)
2
+
3
+ ### Features
4
+
5
+ - **(voice):** add multi-provider TTS, STT, and realtime voice integration ([4b26485](https://github.com/juspay/neurolink/commit/4b26485b6408eed29b2c608afb98a131fcb4a5ba))
6
+
7
+ ## [9.61.2](https://github.com/juspay/neurolink/compare/v9.61.1...v9.61.2) (2026-05-07)
8
+
9
+ ### Bug Fixes
10
+
11
+ - **(hitl):** prevent duplicate confirmation prompts on tool retry ([14e4890](https://github.com/juspay/neurolink/commit/14e4890d8613ae264f8c8bbd064f882ad4cc22f5))
12
+
1
13
  ## [9.61.1](https://github.com/juspay/neurolink/compare/v9.61.0...v9.61.1) (2026-05-04)
2
14
 
3
15
  ### Bug Fixes
package/README.md CHANGED
@@ -40,23 +40,25 @@ Extracted from production systems at Juspay and battle-tested at enterprise scal
40
40
 
41
41
  ## What's New (Q1 2026)
42
42
 
43
- | Feature | Version | Description | Guide |
44
- | ----------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------- |
45
- | **Gemini 3 Multi-turn Tool Fix** | v9.49.0 | Fixed multi-step agentic tool calling on Vertex AI Gemini 3 models. Correct `thoughtSignature` replay, `stepIndex` parallel-call grouping, `executionId` session isolation, 5-min timeout, silent-timeout surfacing. | [Vertex AI Guide](docs/getting-started/providers/google-vertex.md) |
46
- | **AutoResearch** | v9.17.0 | Autonomous AI experiment engine: proposes code changes, runs experiments, evaluates metrics, keeps improvements unattended for hours. | [AutoResearch Guide](docs/features/autoresearch.md) |
47
- | **MCP Enhancements** | v9.16.0 | Advanced MCP features: tool routing, result caching, request batching, annotations, elicitation, custom server base, multi-server management | [MCP Enhancements Guide](docs/features/mcp-enhancements.md) |
48
- | **Memory** | v9.12.0 | Per-user condensed memory that persists across conversations. LLM-powered condensation with S3, Redis, or SQLite backends. | [Memory Guide](docs/features/memory.md) |
49
- | **Context Window Management** | v9.2.0 | 4-stage compaction pipeline with auto-detection, budget gate at 80% usage, per-provider token estimation | [Context Compaction Guide](docs/features/context-compaction.md) |
50
- | **Tool Execution Control** | v9.3.0 | `prepareStep` and `toolChoice` support for per-step tool enforcement in multi-step agentic loops. API-level control over tool calls. | [API Reference](docs/api/type-aliases/GenerateOptions.md#preparestep) |
51
- | **File Processor System** | v9.1.0 | 17+ file type processors with ProcessorRegistry, security sanitization, SVG text injection | [File Processors Guide](docs/features/file-processors.md) |
52
- | **RAG with generate()/stream()** | v9.2.0 | Pass `rag: { files }` to generate/stream for automatic document chunking, embedding, and AI-powered search. 10 chunking strategies, hybrid search, reranking. | [RAG Guide](docs/features/rag.md) |
53
- | **External TracerProvider Support** | v8.43.0 | Integrate NeuroLink with existing OpenTelemetry instrumentation. Prevents duplicate registration conflicts. | [Observability Guide](docs/features/observability.md) |
54
- | **Server Adapters** | v8.43.0 | Multi-framework HTTP server with Hono, Express, Fastify, Koa support. Full CLI for server management with foreground/background modes. | [Server Adapters Guide](docs/guides/server-adapters/index.md) |
55
- | **Title Generation Events** | v8.38.0 | Emit `conversation:titleGenerated` event when conversation title is generated. Supports custom title prompts via `NEUROLINK_TITLE_PROMPT`. | [Conversation Memory Guide](docs/conversation-memory.md) |
56
- | **Video Generation with Veo** | v8.32.0 | Video generation using Veo 3.1 (`veo-3.1`). Realistic video generation with many parameter options | [Video Generation Guide](docs/features/video-generation.md) |
57
- | **Image Generation with Gemini** | v8.31.0 | Native image generation using Gemini 2.0 Flash Experimental (`imagen-3.0-generate-002`). High-quality image synthesis directly from Google AI. | [Image Generation Guide](docs/image-generation-streaming.md) |
58
- | **HTTP/Streamable HTTP Transport** | v8.29.0 | Connect to remote MCP servers via HTTP with authentication headers, automatic retry with exponential backoff, and configurable rate limiting. | [HTTP Transport Guide](docs/mcp-http-transport.md) |
59
-
43
+ | Feature | Version | Description | Guide |
44
+ | ----------------------------------- | ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------ |
45
+ | **Multi-Provider Voice (TTS/STT)** | unreleased | 4 TTS providers (OpenAI TTS, ElevenLabs, Google TTS, Azure TTS) + 4 STT providers (Whisper, Deepgram, Azure STT, Google STT) + 2 realtime APIs (OpenAI Realtime, Gemini Live). CLI `--tts` / `--stt` flags. | [TTS Guide](docs/features/tts.md) \| [STT Guide](docs/features/audio-input.md) |
46
+ | **Gemini 3 Multi-turn Tool Fix** | v9.49.0 | Fixed multi-step agentic tool calling on Vertex AI Gemini 3 models. Correct `thoughtSignature` replay, `stepIndex` parallel-call grouping, `executionId` session isolation, 5-min timeout, silent-timeout surfacing. | [Vertex AI Guide](docs/getting-started/providers/google-vertex.md) |
47
+ | **AutoResearch** | v9.17.0 | Autonomous AI experiment engine: proposes code changes, runs experiments, evaluates metrics, keeps improvements unattended for hours. | [AutoResearch Guide](docs/features/autoresearch.md) |
48
+ | **MCP Enhancements** | v9.16.0 | Advanced MCP features: tool routing, result caching, request batching, annotations, elicitation, custom server base, multi-server management | [MCP Enhancements Guide](docs/features/mcp-enhancements.md) |
49
+ | **Memory** | v9.12.0 | Per-user condensed memory that persists across conversations. LLM-powered condensation with S3, Redis, or SQLite backends. | [Memory Guide](docs/features/memory.md) |
50
+ | **Context Window Management** | v9.2.0 | 4-stage compaction pipeline with auto-detection, budget gate at 80% usage, per-provider token estimation | [Context Compaction Guide](docs/features/context-compaction.md) |
51
+ | **Tool Execution Control** | v9.3.0 | `prepareStep` and `toolChoice` support for per-step tool enforcement in multi-step agentic loops. API-level control over tool calls. | [API Reference](docs/api/type-aliases/GenerateOptions.md#preparestep) |
52
+ | **File Processor System** | v9.1.0 | 17+ file type processors with ProcessorRegistry, security sanitization, SVG text injection | [File Processors Guide](docs/features/file-processors.md) |
53
+ | **RAG with generate()/stream()** | v9.2.0 | Pass `rag: { files }` to generate/stream for automatic document chunking, embedding, and AI-powered search. 10 chunking strategies, hybrid search, reranking. | [RAG Guide](docs/features/rag.md) |
54
+ | **External TracerProvider Support** | v8.43.0 | Integrate NeuroLink with existing OpenTelemetry instrumentation. Prevents duplicate registration conflicts. | [Observability Guide](docs/features/observability.md) |
55
+ | **Server Adapters** | v8.43.0 | Multi-framework HTTP server with Hono, Express, Fastify, Koa support. Full CLI for server management with foreground/background modes. | [Server Adapters Guide](docs/guides/server-adapters/index.md) |
56
+ | **Title Generation Events** | v8.38.0 | Emit `conversation:titleGenerated` event when conversation title is generated. Supports custom title prompts via `NEUROLINK_TITLE_PROMPT`. | [Conversation Memory Guide](docs/conversation-memory.md) |
57
+ | **Video Generation with Veo** | v8.32.0 | Video generation using Veo 3.1 (`veo-3.1`). Realistic video generation with many parameter options | [Video Generation Guide](docs/features/video-generation.md) |
58
+ | **Image Generation with Gemini** | v8.31.0 | Native image generation using Gemini 2.0 Flash Experimental (`imagen-3.0-generate-002`). High-quality image synthesis directly from Google AI. | [Image Generation Guide](docs/image-generation-streaming.md) |
59
+ | **HTTP/Streamable HTTP Transport** | v8.29.0 | Connect to remote MCP servers via HTTP with authentication headers, automatic retry with exponential backoff, and configurable rate limiting. | [HTTP Transport Guide](docs/mcp-http-transport.md) |
60
+
61
+ - **Multi-Provider Voice (TTS/STT)** – Full voice pipeline with 4 TTS providers (OpenAI TTS, ElevenLabs, Google TTS, Azure TTS), 4 STT providers (Whisper, Deepgram, Azure STT, Google STT), and 2 realtime APIs (OpenAI Realtime, Gemini Live). Pass `--tts` / `--stt` flags in the CLI or configure via `tts` / `stt` in the SDK. → [TTS Guide](docs/features/tts.md) | [STT Guide](docs/features/audio-input.md) | [Realtime Guide](docs/features/real-time-services.md)
60
62
  - **AutoResearch** – Autonomous AI experiment engine inspired by Karpathy's autoresearch. Phase-gated tool access, git-backed safety, deterministic metric evaluation, and TaskManager integration for continuous unattended research. 12 research tools, 10 typed events, 9 CLI subcommands. → [AutoResearch Guide](docs/features/autoresearch.md)
61
63
  - **Memory** – Per-user condensed memory that persists across all conversations. Automatically retrieves and stores memory on each `generate()`/`stream()` call. Supports S3, Redis, and SQLite storage with LLM-powered condensation. → [Memory Guide](docs/features/memory.md)
62
64
  - **External TracerProvider Support** – Integrate NeuroLink with applications that already have OpenTelemetry instrumentation. Supports auto-detection and manual configuration. → [Observability Guide](docs/features/observability.md)
@@ -435,6 +437,10 @@ NeuroLink is a comprehensive AI development platform. Every feature below is pro
435
437
  | **NVIDIA NIM** | Llama 3.3 70B, 400+ catalog models | ❌ | ✅ Full | ✅ Production | [Setup Guide](docs/getting-started/provider-setup.md#nvidia-nim) |
436
438
  | **LM Studio** | Any model loaded in LM Studio (local) | ✅ Free (Local) | ✅ Full | ✅ Production | [Setup Guide](docs/getting-started/provider-setup.md#lm-studio) |
437
439
  | **llama.cpp** | Any GGUF model served by llama-server (local) | ✅ Free (Local) | ✅ Full | ✅ Production | [Setup Guide](docs/getting-started/provider-setup.md#llamacpp) |
440
+ | **OpenAI TTS** | TTS-1, TTS-1-HD, GPT-4o Audio | ❌ | N/A | ✅ Production | [Setup Guide](docs/getting-started/provider-setup.md#openai-tts) |
441
+ | **ElevenLabs** | Multilingual v2, Turbo v2.5, Flash v2.5 | ✅ Free Tier | N/A | ✅ Production | [Setup Guide](docs/getting-started/provider-setup.md#elevenlabs) |
442
+ | **Deepgram** | Nova-3, Nova-2, Enhanced, Base (STT) | ✅ Free Tier | N/A | ✅ Production | [Setup Guide](docs/getting-started/provider-setup.md#deepgram) |
443
+ | **Azure Speech** | Azure Cognitive Services TTS + STT | ❌ | N/A | ✅ Production | [Setup Guide](docs/getting-started/provider-setup.md#azure-speech) |
438
444
 
439
445
  **[📖 Provider Comparison Guide](docs/reference/provider-comparison.md)** - Detailed feature matrix and selection criteria
440
446
  **[🔬 Provider Feature Compatibility](docs/reference/provider-feature-compatibility.md)** - Test-based compatibility reference for all 19 features across 13 providers
@@ -107,7 +107,7 @@ export class GoogleTTSHandler {
107
107
  const languageCodes = voice.languageCodes;
108
108
  const primaryLanguageCode = languageCodes[0];
109
109
  const voiceType = this.detectVoiceType(voiceName);
110
- // Map Google's ssmlGender → internal Gender
110
+ // Map Google's ssmlGender → internal TTSGender
111
111
  const gender = voice.ssmlGender === "MALE"
112
112
  ? "male"
113
113
  : voice.ssmlGender === "FEMALE"