@bytexbyte/nxtlinq-ai-agent-web-development 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/hooks/useNxtlinqAgent.d.ts +2 -0
- package/dist/hooks/useNxtlinqAgent.d.ts.map +1 -1
- package/dist/hooks/useNxtlinqAgent.js +2 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/legacy/core/lib/textToSpeech.d.ts +10 -0
- package/dist/legacy/core/lib/textToSpeech.d.ts.map +1 -1
- package/dist/legacy/core/lib/textToSpeech.js +77 -0
- package/dist/legacy/core/lib/useVoiceMode.d.ts +4 -2
- package/dist/legacy/core/lib/useVoiceMode.d.ts.map +1 -1
- package/dist/legacy/core/lib/useVoiceMode.js +8 -2
- package/dist/legacy/index.d.ts +1 -1
- package/dist/legacy/index.d.ts.map +1 -1
- package/dist/legacy/index.js +1 -1
- package/package.json +2 -2
- package/src/hooks/useNxtlinqAgent.ts +9 -0
- package/src/index.ts +1 -0
- package/src/legacy/core/lib/textToSpeech.ts +92 -0
- package/src/legacy/core/lib/useVoiceMode.ts +13 -1
- package/src/legacy/index.ts +1 -1
|
@@ -12,6 +12,8 @@ export type UseNxtlinqAgentResult = NxtlinqAgentSnapshot & {
|
|
|
12
12
|
setMessages: (messages: Message[]) => void;
|
|
13
13
|
postTextTts: (text: string) => Promise<PostTextTtsResult>;
|
|
14
14
|
buildTextTtsPlaybackUri: (result: PostTextTtsResult) => string;
|
|
15
|
+
/** Stream PCM16 TTS using browser fetch streaming. Web-only. */
|
|
16
|
+
streamTextTts: (text: string, onChunk: (pcm16: Int16Array) => void, signal?: AbortSignal) => Promise<void>;
|
|
15
17
|
};
|
|
16
18
|
/** Subscribe to {@link NxtlinqAgent} state for custom Web chat UIs. */
|
|
17
19
|
export declare function useNxtlinqAgent(): UseNxtlinqAgentResult;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"useNxtlinqAgent.d.ts","sourceRoot":"","sources":["../../src/hooks/useNxtlinqAgent.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,OAAO,EACP,oBAAoB,EACpB,iBAAiB,EACjB,kBAAkB,EACnB,MAAM,8CAA8C,CAAC;AAEtD,OAAO,EAAE,sBAAsB,EAAE,MAAM,gCAAgC,CAAC;AAExE,MAAM,MAAM,qBAAqB,GAAG,oBAAoB,GAAG;IACzD,KAAK,EAAE,UAAU,CAAC,OAAO,sBAAsB,CAAC,CAAC;IACjD,WAAW,EAAE,CACX,IAAI,EAAE,MAAM,EACZ,OAAO,CAAC,EAAE,IAAI,CAAC,kBAAkB,EAAE,MAAM,CAAC,KACvC,OAAO,CAAC,IAAI,CAAC,CAAC;IACnB,WAAW,EAAE,CAAC,OAAO,CAAC,EAAE;QAAE,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5D,oBAAoB,EAAE,CAAC,OAAO,CAAC,EAAE;QAAE,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACrE,WAAW,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,IAAI,CAAC;IAC3C,WAAW,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,iBAAiB,CAAC,CAAC;IAC1D,uBAAuB,EAAE,CAAC,MAAM,EAAE,iBAAiB,KAAK,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"useNxtlinqAgent.d.ts","sourceRoot":"","sources":["../../src/hooks/useNxtlinqAgent.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,OAAO,EACP,oBAAoB,EACpB,iBAAiB,EACjB,kBAAkB,EACnB,MAAM,8CAA8C,CAAC;AAEtD,OAAO,EAAE,sBAAsB,EAAE,MAAM,gCAAgC,CAAC;AAExE,MAAM,MAAM,qBAAqB,GAAG,oBAAoB,GAAG;IACzD,KAAK,EAAE,UAAU,CAAC,OAAO,sBAAsB,CAAC,CAAC;IACjD,WAAW,EAAE,CACX,IAAI,EAAE,MAAM,EACZ,OAAO,CAAC,EAAE,IAAI,CAAC,kBAAkB,EAAE,MAAM,CAAC,KACvC,OAAO,CAAC,IAAI,CAAC,CAAC;IACnB,WAAW,EAAE,CAAC,OAAO,CAAC,EAAE;QAAE,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5D,oBAAoB,EAAE,CAAC,OAAO,CAAC,EAAE;QAAE,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACrE,WAAW,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,IAAI,CAAC;IAC3C,WAAW,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,iBAAiB,CAAC,CAAC;IAC1D,uBAAuB,EAAE,CAAC,MAAM,EAAE,iBAAiB,KAAK,MAAM,CAAC;IAC/D,gEAAgE;IAChE,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,KAAK,EAAE,UAAU,KAAK,IAAI,EAAE,MAAM,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CAC5G,CAAC;AAEF,uEAAuE;AACvE,wBAAgB,eAAe,IAAI,qBAAqB,CAwDvD"}
|
|
@@ -10,6 +10,7 @@ export function useNxtlinqAgent() {
|
|
|
10
10
|
const setMessages = useCallback((messages) => agent.setMessages(messages), [agent]);
|
|
11
11
|
const postTextTts = useCallback((text) => agent.postTextTts(text), [agent]);
|
|
12
12
|
const buildTextTtsPlaybackUri = useCallback((result) => agent.buildTextTtsPlaybackUri(result), [agent]);
|
|
13
|
+
const streamTextTts = useCallback((text, onChunk, signal) => agent.streamTextTts(text, onChunk, signal), [agent]);
|
|
13
14
|
return {
|
|
14
15
|
agent,
|
|
15
16
|
...snapshot,
|
|
@@ -19,5 +20,6 @@ export function useNxtlinqAgent() {
|
|
|
19
20
|
setMessages,
|
|
20
21
|
postTextTts,
|
|
21
22
|
buildTextTtsPlaybackUri,
|
|
23
|
+
streamTextTts,
|
|
22
24
|
};
|
|
23
25
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -7,6 +7,6 @@ export { WEB_AGENT_DEFAULTS, applyWebAgentDefaults } from './webAgentDefaults';
|
|
|
7
7
|
export { fileToAttachment } from './utils/fileToAttachment';
|
|
8
8
|
export type { Message, Attachment, AgentResponse, SendMessageOptions, NxtlinqAgentSnapshot, ToolUse, StartVoiceSessionOptions, VoiceSession, VoiceStatus, VoiceTranscriptEvent, VoiceDoneEvent, VoiceUserInputOptions, VoiceGreetingOptions, } from '@bytexbyte/nxtlinq-ai-agent-core-development';
|
|
9
9
|
export { NxtlinqAgent, setApiHosts, createBrowserWebRTCPort, createBrowserStoragePort, createDefaultHttpPort, VoiceNotSupportedError, mapServerHistoryToMessages, appendServerHistoryIntoMessages, STORAGE_KEYS, } from '@bytexbyte/nxtlinq-ai-agent-core-development';
|
|
10
|
-
export { createNxtlinqApi, getAiAgentApiHost, startVoiceSession, createVoiceApi, useDraggable, useLocalStorage, useSessionStorage, useResizable, synthesizeSpeechToBuffer, useSpeechToTextFromMic, useVoiceMode, metakeepClient, getEthers, sleep, walletTextUtils, connectWallet, disconnectWallet, validateToken, createAITMetadata, generateAITId, prepareAITCreation, createNotification, getNotificationIcon, containsUrls, convertUrlsToLinks, convertUrlsToHtml, } from './legacy';
|
|
10
|
+
export { createNxtlinqApi, getAiAgentApiHost, startVoiceSession, createVoiceApi, useDraggable, useLocalStorage, useSessionStorage, useResizable, synthesizeSpeechToBuffer, streamSpeechToAudioContext, useSpeechToTextFromMic, useVoiceMode, metakeepClient, getEthers, sleep, walletTextUtils, connectWallet, disconnectWallet, validateToken, createAITMetadata, generateAITId, prepareAITCreation, createNotification, getNotificationIcon, containsUrls, convertUrlsToLinks, convertUrlsToHtml, } from './legacy';
|
|
11
11
|
export type { ResizeCorner } from './legacy';
|
|
12
12
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,qBAAqB,EACrB,KAAK,4BAA4B,GAClC,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EACL,oBAAoB,EACpB,sBAAsB,EACtB,KAAK,yBAAyB,EAC9B,KAAK,WAAW,GACjB,MAAM,+BAA+B,CAAC;AAEvC,OAAO,EAAE,eAAe,EAAE,KAAK,qBAAqB,EAAE,MAAM,yBAAyB,CAAC;AACtF,OAAO,EACL,eAAe,EACf,KAAK,qBAAqB,EAC1B,KAAK,sBAAsB,GAC5B,MAAM,yBAAyB,CAAC;AAEjC,OAAO,EACL,sBAAsB,EACtB,KAAK,6BAA6B,GACnC,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAC/E,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAE5D,YAAY,EACV,OAAO,EACP,UAAU,EACV,aAAa,EACb,kBAAkB,EAClB,oBAAoB,EACpB,OAAO,EACP,wBAAwB,EACxB,YAAY,EACZ,WAAW,EACX,oBAAoB,EACpB,cAAc,EACd,qBAAqB,EACrB,oBAAoB,GACrB,MAAM,8CAA8C,CAAC;AAEtD,OAAO,EACL,YAAY,EACZ,WAAW,EACX,uBAAuB,EACvB,wBAAwB,EACxB,qBAAqB,EACrB,sBAAsB,EACtB,0BAA0B,EAC1B,+BAA+B,EAC/B,YAAY,GACb,MAAM,8CAA8C,CAAC;AAEtD,OAAO,EACL,gBAAgB,EAChB,iBAAiB,EACjB,iBAAiB,EACjB,cAAc,EACd,YAAY,EACZ,eAAe,EACf,iBAAiB,EACjB,YAAY,EACZ,wBAAwB,EACxB,sBAAsB,EACtB,YAAY,EACZ,cAAc,EACd,SAAS,EACT,KAAK,EACL,eAAe,EACf,aAAa,EACb,gBAAgB,EAChB,aAAa,EACb,iBAAiB,EACjB,aAAa,EACb,kBAAkB,EAClB,kBAAkB,EAClB,mBAAmB,EACnB,YAAY,EACZ,kBAAkB,EAClB,iBAAiB,GAClB,MAAM,UAAU,CAAC;AAElB,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,qBAAqB,EACrB,KAAK,4BAA4B,GAClC,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EACL,oBAAoB,EACpB,sBAAsB,EACtB,KAAK,yBAAyB,EAC9B,KAAK,WAAW,GACjB,MAAM,+BAA+B,CAAC;AAEvC,OAAO,EAAE,eAAe,EAAE,KAAK,qBAAqB,EAAE,MAAM,yBAAyB,CAAC;AACtF,OAAO,EACL,eAAe,EACf,KAAK,qBAAqB,EAC1B,KAAK,sBAAsB,GAC5B,MAAM,yBAAyB,CAAC;AAEjC,OAAO,EACL,sBAAsB,EACtB,KAAK,6BAA6B,GACnC,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAC/E,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAE5D,YAAY,EACV,OAAO,EACP,UAAU,EACV,aAAa,EACb,kBAAkB,EAClB,oBAAoB,EACpB,OAAO,EACP,wBAAwB,EACxB,YAAY,EACZ,WAAW,EACX,oBAAoB,EACpB,cAAc,EACd,qBAAqB,EACrB,oBAAoB,GACrB,MAAM,8CAA8C,CAAC;AAEtD,OAAO,EACL,YAAY,EACZ,WAAW,EACX,uBAAuB,EACvB,wBAAwB,EACxB,qBAAqB,EACrB,sBAAsB,EACtB,0BAA0B,EAC1B,+BAA+B,EAC/B,YAAY,GACb,MAAM,8CAA8C,CAAC;AAEtD,OAAO,EACL,gBAAgB,EAChB,iBAAiB,EACjB,iBAAiB,EACjB,cAAc,EACd,YAAY,EACZ,eAAe,EACf,iBAAiB,EACjB,YAAY,EACZ,wBAAwB,EACxB,0BAA0B,EAC1B,sBAAsB,EACtB,YAAY,EACZ,cAAc,EACd,SAAS,EACT,KAAK,EACL,eAAe,EACf,aAAa,EACb,gBAAgB,EAChB,aAAa,EACb,iBAAiB,EACjB,aAAa,EACb,kBAAkB,EAClB,kBAAkB,EAClB,mBAAmB,EACnB,YAAY,EACZ,kBAAkB,EAClB,iBAAiB,GAClB,MAAM,UAAU,CAAC;AAElB,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -6,4 +6,4 @@ export { createWebPlatformPorts, } from './ports/createWebPlatformPorts';
|
|
|
6
6
|
export { WEB_AGENT_DEFAULTS, applyWebAgentDefaults } from './webAgentDefaults';
|
|
7
7
|
export { fileToAttachment } from './utils/fileToAttachment';
|
|
8
8
|
export { NxtlinqAgent, setApiHosts, createBrowserWebRTCPort, createBrowserStoragePort, createDefaultHttpPort, VoiceNotSupportedError, mapServerHistoryToMessages, appendServerHistoryIntoMessages, STORAGE_KEYS, } from '@bytexbyte/nxtlinq-ai-agent-core-development';
|
|
9
|
-
export { createNxtlinqApi, getAiAgentApiHost, startVoiceSession, createVoiceApi, useDraggable, useLocalStorage, useSessionStorage, useResizable, synthesizeSpeechToBuffer, useSpeechToTextFromMic, useVoiceMode, metakeepClient, getEthers, sleep, walletTextUtils, connectWallet, disconnectWallet, validateToken, createAITMetadata, generateAITId, prepareAITCreation, createNotification, getNotificationIcon, containsUrls, convertUrlsToLinks, convertUrlsToHtml, } from './legacy';
|
|
9
|
+
export { createNxtlinqApi, getAiAgentApiHost, startVoiceSession, createVoiceApi, useDraggable, useLocalStorage, useSessionStorage, useResizable, synthesizeSpeechToBuffer, streamSpeechToAudioContext, useSpeechToTextFromMic, useVoiceMode, metakeepClient, getEthers, sleep, walletTextUtils, connectWallet, disconnectWallet, validateToken, createAITMetadata, generateAITId, prepareAITCreation, createNotification, getNotificationIcon, containsUrls, convertUrlsToLinks, convertUrlsToHtml, } from './legacy';
|
|
@@ -11,4 +11,14 @@ export declare function synthesizeSpeechToBuffer(params: {
|
|
|
11
11
|
buffer: ArrayBuffer;
|
|
12
12
|
mimeType: string;
|
|
13
13
|
} | undefined>;
|
|
14
|
+
export declare function streamSpeechToAudioContext(params: {
|
|
15
|
+
text: string;
|
|
16
|
+
apiKey: string;
|
|
17
|
+
apiSecret: string;
|
|
18
|
+
audioCtx: AudioContext;
|
|
19
|
+
onSourceScheduled?: (source: AudioBufferSourceNode) => void;
|
|
20
|
+
onFirstChunk?: () => void;
|
|
21
|
+
onEnded?: () => void;
|
|
22
|
+
signal?: AbortSignal;
|
|
23
|
+
}): Promise<void>;
|
|
14
24
|
//# sourceMappingURL=textToSpeech.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"textToSpeech.d.ts","sourceRoot":"","sources":["../../../../src/legacy/core/lib/textToSpeech.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"textToSpeech.d.ts","sourceRoot":"","sources":["../../../../src/legacy/core/lib/textToSpeech.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,8CAA8C,CAAC;AA8E3F;;GAEG;AACH,wBAAsB,wBAAwB,CAAC,MAAM,EAAE;IACrD,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,sBAAsB,CAAC;CACnC,GAAG,OAAO,CAAC;IAAE,MAAM,EAAE,WAAW,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,SAAS,CAAC,CAsCjE;AAmBD,wBAAsB,0BAA0B,CAAC,MAAM,EAAE;IACvD,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,YAAY,CAAC;IACvB,iBAAiB,CAAC,EAAE,CAAC,MAAM,EAAE,qBAAqB,KAAK,IAAI,CAAC;IAC5D,YAAY,CAAC,EAAE,MAAM,IAAI,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,IAAI,CAAC;IACrB,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB,GAAG,OAAO,CAAC,IAAI,CAAC,CA+DhB"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { SpeechConfig, SpeechSynthesizer } from 'microsoft-cognitiveservices-speech-sdk';
|
|
2
|
+
import { streamTextTts } from '@bytexbyte/nxtlinq-ai-agent-core-development';
|
|
2
3
|
import { getAiAgentApiHost } from '../../api/nxtlinq-api';
|
|
3
4
|
const DEFAULT_AZURE_VOICE = 'en-US-JennyNeural';
|
|
4
5
|
async function fetchAzureTokenFromAgent(agentHost, apiKey, apiSecret) {
|
|
@@ -80,3 +81,79 @@ export async function synthesizeSpeechToBuffer(params) {
|
|
|
80
81
|
return undefined;
|
|
81
82
|
return { buffer, mimeType: 'audio/wav' };
|
|
82
83
|
}
|
|
84
|
+
/**
|
|
85
|
+
* Stream OpenAI TTS audio progressively into a Web AudioContext.
|
|
86
|
+
*
|
|
87
|
+
* Receives raw PCM16 (24 kHz mono) chunks from the server and schedules
|
|
88
|
+
* them back-to-back on the AudioContext so playback starts as soon as the
|
|
89
|
+
* first chunk arrives, before the full audio is downloaded.
|
|
90
|
+
*
|
|
91
|
+
* @param onSourceScheduled Called for every AudioBufferSourceNode that is
|
|
92
|
+
* scheduled — collect these to stop playback early (e.g. user clicks stop).
|
|
93
|
+
* @param onFirstChunk Called when the first audio chunk is ready to play.
|
|
94
|
+
* @param onEnded Called when all scheduled buffers have finished playing.
|
|
95
|
+
*/
|
|
96
|
+
// Number of chunks to pre-buffer before starting playback. Each chunk is
|
|
97
|
+
// ~8192 bytes = ~170ms of 24kHz PCM16 audio. Buffering 3 chunks gives ~510ms
|
|
98
|
+
// of headroom to absorb network jitter and prevent gaps at the start.
|
|
99
|
+
const PREBUFFER_CHUNKS = 3;
|
|
100
|
+
export async function streamSpeechToAudioContext(params) {
|
|
101
|
+
const { text, apiKey, apiSecret, audioCtx, onSourceScheduled, onFirstChunk, onEnded, signal } = params;
|
|
102
|
+
if (audioCtx.state === 'suspended')
|
|
103
|
+
await audioCtx.resume();
|
|
104
|
+
let nextPlayTime = 0;
|
|
105
|
+
let pendingCount = 0;
|
|
106
|
+
let streamDone = false;
|
|
107
|
+
// Pre-buffer: collect initial chunks before scheduling any playback so the
|
|
108
|
+
// AudioContext has enough backlog to play without gaps from network jitter.
|
|
109
|
+
const prebuffer = [];
|
|
110
|
+
let prebuffering = true;
|
|
111
|
+
const checkEnded = () => {
|
|
112
|
+
if (streamDone && pendingCount === 0)
|
|
113
|
+
onEnded?.();
|
|
114
|
+
};
|
|
115
|
+
const scheduleBuffer = (audioBuffer) => {
|
|
116
|
+
const source = audioCtx.createBufferSource();
|
|
117
|
+
source.buffer = audioBuffer;
|
|
118
|
+
source.connect(audioCtx.destination);
|
|
119
|
+
pendingCount++;
|
|
120
|
+
source.start(nextPlayTime);
|
|
121
|
+
nextPlayTime += audioBuffer.duration;
|
|
122
|
+
onSourceScheduled?.(source);
|
|
123
|
+
source.onended = () => {
|
|
124
|
+
pendingCount--;
|
|
125
|
+
checkEnded();
|
|
126
|
+
};
|
|
127
|
+
};
|
|
128
|
+
const flushPrebuffer = () => {
|
|
129
|
+
nextPlayTime = audioCtx.currentTime + 0.05;
|
|
130
|
+
onFirstChunk?.();
|
|
131
|
+
for (const buf of prebuffer)
|
|
132
|
+
scheduleBuffer(buf);
|
|
133
|
+
prebuffer.length = 0;
|
|
134
|
+
prebuffering = false;
|
|
135
|
+
};
|
|
136
|
+
await streamTextTts({ apiKey, apiSecret, text }, (pcm16) => {
|
|
137
|
+
if (signal?.aborted)
|
|
138
|
+
return;
|
|
139
|
+
const float32 = new Float32Array(pcm16.length);
|
|
140
|
+
for (let i = 0; i < pcm16.length; i++)
|
|
141
|
+
float32[i] = pcm16[i] / 32768;
|
|
142
|
+
const audioBuffer = audioCtx.createBuffer(1, float32.length, 24000);
|
|
143
|
+
audioBuffer.copyToChannel(float32, 0);
|
|
144
|
+
if (prebuffering) {
|
|
145
|
+
prebuffer.push(audioBuffer);
|
|
146
|
+
if (prebuffer.length >= PREBUFFER_CHUNKS)
|
|
147
|
+
flushPrebuffer();
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
scheduleBuffer(audioBuffer);
|
|
151
|
+
}
|
|
152
|
+
}, signal);
|
|
153
|
+
// Stream finished — flush any remaining prebuffered chunks (short text that
|
|
154
|
+
// never reached PREBUFFER_CHUNKS) and mark done.
|
|
155
|
+
if (prebuffering && prebuffer.length > 0)
|
|
156
|
+
flushPrebuffer();
|
|
157
|
+
streamDone = true;
|
|
158
|
+
checkEnded();
|
|
159
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as React from 'react';
|
|
2
2
|
import type { AITApi, Message, VoiceTransport } from '@bytexbyte/nxtlinq-ai-agent-core-development';
|
|
3
|
-
import { type VoiceStatus } from '../../api/voice';
|
|
3
|
+
import { type VoiceStatus, type VoiceToolCallEvent, type VoiceToolResultEvent } from '../../api/voice';
|
|
4
4
|
export type UseVoiceModeOptions = {
|
|
5
5
|
apiKey: string;
|
|
6
6
|
apiSecret: string;
|
|
@@ -14,11 +14,13 @@ export type UseVoiceModeOptions = {
|
|
|
14
14
|
getMessages: () => Message[];
|
|
15
15
|
setMessages: React.Dispatch<React.SetStateAction<Message[]>>;
|
|
16
16
|
onError?: (error: Error) => void;
|
|
17
|
+
onToolCall?: (event: VoiceToolCallEvent) => void;
|
|
18
|
+
onToolResult?: (event: VoiceToolResultEvent) => void;
|
|
17
19
|
stopRecording: () => void;
|
|
18
20
|
stopTextToSpeech: () => void;
|
|
19
21
|
voiceTransport?: VoiceTransport;
|
|
20
22
|
};
|
|
21
|
-
export declare function useVoiceMode({ apiKey, apiSecret, pseudoId, externalId, aitId, walletAddress, aitToken, metadata, nxtlinqApi, getMessages, setMessages, onError, stopRecording, stopTextToSpeech, voiceTransport, }: UseVoiceModeOptions): {
|
|
23
|
+
export declare function useVoiceMode({ apiKey, apiSecret, pseudoId, externalId, aitId, walletAddress, aitToken, metadata, nxtlinqApi, getMessages, setMessages, onError, onToolCall, onToolResult, stopRecording, stopTextToSpeech, voiceTransport, }: UseVoiceModeOptions): {
|
|
22
24
|
isVoiceMode: boolean;
|
|
23
25
|
voiceStatus: VoiceStatus;
|
|
24
26
|
isVoiceConnecting: boolean;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"useVoiceMode.d.ts","sourceRoot":"","sources":["../../../../src/legacy/core/lib/useVoiceMode.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAE/B,OAAO,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,8CAA8C,CAAC;AACpG,OAAO,EAKL,KAAK,WAAW,
|
|
1
|
+
{"version":3,"file":"useVoiceMode.d.ts","sourceRoot":"","sources":["../../../../src/legacy/core/lib/useVoiceMode.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAE/B,OAAO,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,8CAA8C,CAAC;AACpG,OAAO,EAKL,KAAK,WAAW,EAChB,KAAK,kBAAkB,EACvB,KAAK,oBAAoB,EAE1B,MAAM,iBAAiB,CAAC;AAgBzB,MAAM,MAAM,mBAAmB,GAAG;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,OAAO,EAAE,CAAC;IAC7B,WAAW,EAAE,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAC7D,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,KAAK,KAAK,IAAI,CAAC;IACjC,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,kBAAkB,KAAK,IAAI,CAAC;IACjD,YAAY,CAAC,EAAE,CAAC,KAAK,EAAE,oBAAoB,KAAK,IAAI,CAAC;IACrD,aAAa,EAAE,MAAM,IAAI,CAAC;IAC1B,gBAAgB,EAAE,MAAM,IAAI,CAAC;IAC7B,cAAc,CAAC,EAAE,cAAc,CAAC;CACjC,CAAC;AAEF,wBAAgB,YAAY,CAAC,EAC3B,MAAM,EACN,SAAS,EACT,QAAQ,EACR,UAAU,EACV,KAAK,EACL,aAAa,EACb,QAAQ,EACR,QAAQ,EACR,UAAU,EACV,WAAW,EACX,WAAW,EACX,OAAO,EACP,UAAU,EACV,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,cAA8B,GAC/B,EAAE,mBAAmB;;;;;;;;;;EA0XrB"}
|
|
@@ -12,7 +12,7 @@ const USER_TRANSCRIPT_BLOCKED = new Set([
|
|
|
12
12
|
const ASSISTANT_MIC_HOLD_STATUSES = new Set([
|
|
13
13
|
'transcribing', 'thinking', 'generating', 'speaking',
|
|
14
14
|
]);
|
|
15
|
-
export function useVoiceMode({ apiKey, apiSecret, pseudoId, externalId, aitId, walletAddress, aitToken, metadata, nxtlinqApi, getMessages, setMessages, onError, stopRecording, stopTextToSpeech, voiceTransport = 'ws-realtime', }) {
|
|
15
|
+
export function useVoiceMode({ apiKey, apiSecret, pseudoId, externalId, aitId, walletAddress, aitToken, metadata, nxtlinqApi, getMessages, setMessages, onError, onToolCall, onToolResult, stopRecording, stopTextToSpeech, voiceTransport = 'ws-realtime', }) {
|
|
16
16
|
const [isVoiceMode, setIsVoiceMode] = React.useState(false);
|
|
17
17
|
const [voiceStatus, setVoiceStatus] = React.useState('idle');
|
|
18
18
|
const [isVoiceConnecting, setIsVoiceConnecting] = React.useState(false);
|
|
@@ -170,6 +170,8 @@ export function useVoiceMode({ apiKey, apiSecret, pseudoId, externalId, aitId, w
|
|
|
170
170
|
onStatus: (_status) => { },
|
|
171
171
|
onTranscript: (_event) => { },
|
|
172
172
|
onDone: (_event) => { },
|
|
173
|
+
onToolCall: (_event) => { },
|
|
174
|
+
onToolResult: (_event) => { },
|
|
173
175
|
});
|
|
174
176
|
const stopVoiceSession = React.useCallback(async (reason = 'unknown') => {
|
|
175
177
|
const session = sessionRef.current;
|
|
@@ -289,6 +291,8 @@ export function useVoiceMode({ apiKey, apiSecret, pseudoId, externalId, aitId, w
|
|
|
289
291
|
onStatus: (status) => voiceHandlersRef.current.onStatus(status),
|
|
290
292
|
onTranscript: (event) => voiceHandlersRef.current.onTranscript(event),
|
|
291
293
|
onDone: (event) => voiceHandlersRef.current.onDone(event),
|
|
294
|
+
onToolCall: (event) => voiceHandlersRef.current.onToolCall(event),
|
|
295
|
+
onToolResult: (event) => voiceHandlersRef.current.onToolResult(event),
|
|
292
296
|
onError: (err) => {
|
|
293
297
|
if (isConnectCancelled())
|
|
294
298
|
return;
|
|
@@ -358,8 +362,10 @@ export function useVoiceMode({ apiKey, apiSecret, pseudoId, externalId, aitId, w
|
|
|
358
362
|
handleTranscriptUi(event);
|
|
359
363
|
},
|
|
360
364
|
onDone: handleDone,
|
|
365
|
+
onToolCall: (event) => onToolCall?.(event),
|
|
366
|
+
onToolResult: (event) => onToolResult?.(event),
|
|
361
367
|
};
|
|
362
|
-
}, [handleStatus, handleTranscriptTurn, handleTranscriptUi, handleDone]);
|
|
368
|
+
}, [handleStatus, handleTranscriptTurn, handleTranscriptUi, handleDone, onToolCall, onToolResult]);
|
|
363
369
|
React.useEffect(() => () => { void stopVoiceSessionRef.current('unmount'); }, []);
|
|
364
370
|
// 追踪当前的 isVoiceMode 状态,用于 visibilitychange 事件
|
|
365
371
|
const isVoiceModeRef = React.useRef(isVoiceMode);
|
package/dist/legacy/index.d.ts
CHANGED
|
@@ -6,7 +6,7 @@ export { default as useLocalStorage } from './core/lib/useLocalStorage';
|
|
|
6
6
|
export { default as useSessionStorage } from './core/lib/useSessionStorage';
|
|
7
7
|
export { useResizable } from './core/lib/useResizable';
|
|
8
8
|
export type { ResizeCorner } from './core/lib/useResizable';
|
|
9
|
-
export { synthesizeSpeechToBuffer } from './core/lib/textToSpeech';
|
|
9
|
+
export { synthesizeSpeechToBuffer, streamSpeechToAudioContext } from './core/lib/textToSpeech';
|
|
10
10
|
export { useSpeechToTextFromMic } from './core/lib/useSpeechToTextFromMic';
|
|
11
11
|
export { useVoiceMode } from './core/lib/useVoiceMode';
|
|
12
12
|
export { default as metakeepClient } from './core/metakeepClient';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/legacy/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACrF,OAAO,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAChE,mBAAmB,aAAa,CAAC;AAEjC,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,OAAO,IAAI,eAAe,EAAE,MAAM,4BAA4B,CAAC;AACxE,OAAO,EAAE,OAAO,IAAI,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AAC5E,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,YAAY,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAC5D,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/legacy/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACrF,OAAO,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAChE,mBAAmB,aAAa,CAAC;AAEjC,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,OAAO,IAAI,eAAe,EAAE,MAAM,4BAA4B,CAAC;AACxE,OAAO,EAAE,OAAO,IAAI,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AAC5E,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,YAAY,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAC5D,OAAO,EAAE,wBAAwB,EAAE,0BAA0B,EAAE,MAAM,yBAAyB,CAAC;AAC/F,OAAO,EAAE,sBAAsB,EAAE,MAAM,mCAAmC,CAAC;AAC3E,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAEvD,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAClE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,KAAK,eAAe,MAAM,8BAA8B,CAAC;AAChE,OAAO,EACL,aAAa,EACb,gBAAgB,EAChB,aAAa,GACd,MAAM,0BAA0B,CAAC;AAClC,OAAO,EACL,iBAAiB,EACjB,aAAa,EACb,kBAAkB,GACnB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EACL,kBAAkB,EAClB,mBAAmB,GACpB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EACL,YAAY,EACZ,kBAAkB,EAClB,iBAAiB,GAClB,MAAM,uBAAuB,CAAC"}
|
package/dist/legacy/index.js
CHANGED
|
@@ -4,7 +4,7 @@ export { useDraggable } from './core/lib/useDraggable';
|
|
|
4
4
|
export { default as useLocalStorage } from './core/lib/useLocalStorage';
|
|
5
5
|
export { default as useSessionStorage } from './core/lib/useSessionStorage';
|
|
6
6
|
export { useResizable } from './core/lib/useResizable';
|
|
7
|
-
export { synthesizeSpeechToBuffer } from './core/lib/textToSpeech';
|
|
7
|
+
export { synthesizeSpeechToBuffer, streamSpeechToAudioContext } from './core/lib/textToSpeech';
|
|
8
8
|
export { useSpeechToTextFromMic } from './core/lib/useSpeechToTextFromMic';
|
|
9
9
|
export { useVoiceMode } from './core/lib/useVoiceMode';
|
|
10
10
|
export { default as metakeepClient } from './core/metakeepClient';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bytexbyte/nxtlinq-ai-agent-web-development",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.9",
|
|
4
4
|
"description": "React Web headless SDK for nxtlinq AI Agent — hooks and browser ports",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
"react-dom": ">=18.0.0"
|
|
38
38
|
},
|
|
39
39
|
"dependencies": {
|
|
40
|
-
"@bytexbyte/nxtlinq-ai-agent-core-development": "0.4.
|
|
40
|
+
"@bytexbyte/nxtlinq-ai-agent-core-development": "0.4.4",
|
|
41
41
|
"ethers": "^6.16.0",
|
|
42
42
|
"fast-json-stable-stringify": "^2.1.0",
|
|
43
43
|
"metakeep": "^2.2.8",
|
|
@@ -18,6 +18,8 @@ export type UseNxtlinqAgentResult = NxtlinqAgentSnapshot & {
|
|
|
18
18
|
setMessages: (messages: Message[]) => void;
|
|
19
19
|
postTextTts: (text: string) => Promise<PostTextTtsResult>;
|
|
20
20
|
buildTextTtsPlaybackUri: (result: PostTextTtsResult) => string;
|
|
21
|
+
/** Stream PCM16 TTS using browser fetch streaming. Web-only. */
|
|
22
|
+
streamTextTts: (text: string, onChunk: (pcm16: Int16Array) => void, signal?: AbortSignal) => Promise<void>;
|
|
21
23
|
};
|
|
22
24
|
|
|
23
25
|
/** Subscribe to {@link NxtlinqAgent} state for custom Web chat UIs. */
|
|
@@ -60,6 +62,12 @@ export function useNxtlinqAgent(): UseNxtlinqAgentResult {
|
|
|
60
62
|
[agent],
|
|
61
63
|
);
|
|
62
64
|
|
|
65
|
+
const streamTextTts = useCallback(
|
|
66
|
+
(text: string, onChunk: (pcm16: Int16Array) => void, signal?: AbortSignal) =>
|
|
67
|
+
agent.streamTextTts(text, onChunk, signal),
|
|
68
|
+
[agent],
|
|
69
|
+
);
|
|
70
|
+
|
|
63
71
|
return {
|
|
64
72
|
agent,
|
|
65
73
|
...snapshot,
|
|
@@ -69,5 +77,6 @@ export function useNxtlinqAgent(): UseNxtlinqAgentResult {
|
|
|
69
77
|
setMessages,
|
|
70
78
|
postTextTts,
|
|
71
79
|
buildTextTtsPlaybackUri,
|
|
80
|
+
streamTextTts,
|
|
72
81
|
};
|
|
73
82
|
}
|
package/src/index.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { SpeechConfig, SpeechSynthesisResult, SpeechSynthesizer } from 'microsoft-cognitiveservices-speech-sdk';
|
|
2
|
+
import { streamTextTts } from '@bytexbyte/nxtlinq-ai-agent-core-development';
|
|
2
3
|
import { getAiAgentApiHost } from '../../api/nxtlinq-api';
|
|
3
4
|
import type { ClientTtsVoiceSettings } from '@bytexbyte/nxtlinq-ai-agent-core-development';
|
|
4
5
|
|
|
@@ -125,3 +126,94 @@ export async function synthesizeSpeechToBuffer(params: {
|
|
|
125
126
|
if (!buffer) return undefined;
|
|
126
127
|
return { buffer, mimeType: 'audio/wav' };
|
|
127
128
|
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Stream OpenAI TTS audio progressively into a Web AudioContext.
|
|
132
|
+
*
|
|
133
|
+
* Receives raw PCM16 (24 kHz mono) chunks from the server and schedules
|
|
134
|
+
* them back-to-back on the AudioContext so playback starts as soon as the
|
|
135
|
+
* first chunk arrives, before the full audio is downloaded.
|
|
136
|
+
*
|
|
137
|
+
* @param onSourceScheduled Called for every AudioBufferSourceNode that is
|
|
138
|
+
* scheduled — collect these to stop playback early (e.g. user clicks stop).
|
|
139
|
+
* @param onFirstChunk Called when the first audio chunk is ready to play.
|
|
140
|
+
* @param onEnded Called when all scheduled buffers have finished playing.
|
|
141
|
+
*/
|
|
142
|
+
// Number of chunks to pre-buffer before starting playback. Each chunk is
|
|
143
|
+
// ~8192 bytes = ~170ms of 24kHz PCM16 audio. Buffering 3 chunks gives ~510ms
|
|
144
|
+
// of headroom to absorb network jitter and prevent gaps at the start.
|
|
145
|
+
const PREBUFFER_CHUNKS = 3;
|
|
146
|
+
|
|
147
|
+
export async function streamSpeechToAudioContext(params: {
|
|
148
|
+
text: string;
|
|
149
|
+
apiKey: string;
|
|
150
|
+
apiSecret: string;
|
|
151
|
+
audioCtx: AudioContext;
|
|
152
|
+
onSourceScheduled?: (source: AudioBufferSourceNode) => void;
|
|
153
|
+
onFirstChunk?: () => void;
|
|
154
|
+
onEnded?: () => void;
|
|
155
|
+
signal?: AbortSignal;
|
|
156
|
+
}): Promise<void> {
|
|
157
|
+
const { text, apiKey, apiSecret, audioCtx, onSourceScheduled, onFirstChunk, onEnded, signal } =
|
|
158
|
+
params;
|
|
159
|
+
|
|
160
|
+
if (audioCtx.state === 'suspended') await audioCtx.resume();
|
|
161
|
+
|
|
162
|
+
let nextPlayTime = 0;
|
|
163
|
+
let pendingCount = 0;
|
|
164
|
+
let streamDone = false;
|
|
165
|
+
// Pre-buffer: collect initial chunks before scheduling any playback so the
|
|
166
|
+
// AudioContext has enough backlog to play without gaps from network jitter.
|
|
167
|
+
const prebuffer: AudioBuffer[] = [];
|
|
168
|
+
let prebuffering = true;
|
|
169
|
+
|
|
170
|
+
const checkEnded = () => {
|
|
171
|
+
if (streamDone && pendingCount === 0) onEnded?.();
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
const scheduleBuffer = (audioBuffer: AudioBuffer) => {
|
|
175
|
+
const source = audioCtx.createBufferSource();
|
|
176
|
+
source.buffer = audioBuffer;
|
|
177
|
+
source.connect(audioCtx.destination);
|
|
178
|
+
pendingCount++;
|
|
179
|
+
source.start(nextPlayTime);
|
|
180
|
+
nextPlayTime += audioBuffer.duration;
|
|
181
|
+
onSourceScheduled?.(source);
|
|
182
|
+
source.onended = () => {
|
|
183
|
+
pendingCount--;
|
|
184
|
+
checkEnded();
|
|
185
|
+
};
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
const flushPrebuffer = () => {
|
|
189
|
+
nextPlayTime = audioCtx.currentTime + 0.05;
|
|
190
|
+
onFirstChunk?.();
|
|
191
|
+
for (const buf of prebuffer) scheduleBuffer(buf);
|
|
192
|
+
prebuffer.length = 0;
|
|
193
|
+
prebuffering = false;
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
await streamTextTts({ apiKey, apiSecret, text }, (pcm16: Int16Array) => {
|
|
197
|
+
if (signal?.aborted) return;
|
|
198
|
+
|
|
199
|
+
const float32 = new Float32Array(pcm16.length);
|
|
200
|
+
for (let i = 0; i < pcm16.length; i++) float32[i] = pcm16[i] / 32768;
|
|
201
|
+
|
|
202
|
+
const audioBuffer = audioCtx.createBuffer(1, float32.length, 24000);
|
|
203
|
+
audioBuffer.copyToChannel(float32, 0);
|
|
204
|
+
|
|
205
|
+
if (prebuffering) {
|
|
206
|
+
prebuffer.push(audioBuffer);
|
|
207
|
+
if (prebuffer.length >= PREBUFFER_CHUNKS) flushPrebuffer();
|
|
208
|
+
} else {
|
|
209
|
+
scheduleBuffer(audioBuffer);
|
|
210
|
+
}
|
|
211
|
+
}, signal);
|
|
212
|
+
|
|
213
|
+
// Stream finished — flush any remaining prebuffered chunks (short text that
|
|
214
|
+
// never reached PREBUFFER_CHUNKS) and mark done.
|
|
215
|
+
if (prebuffering && prebuffer.length > 0) flushPrebuffer();
|
|
216
|
+
|
|
217
|
+
streamDone = true;
|
|
218
|
+
checkEnded();
|
|
219
|
+
}
|
|
@@ -7,6 +7,8 @@ import {
|
|
|
7
7
|
type VoiceDoneEvent,
|
|
8
8
|
type VoiceSession,
|
|
9
9
|
type VoiceStatus,
|
|
10
|
+
type VoiceToolCallEvent,
|
|
11
|
+
type VoiceToolResultEvent,
|
|
10
12
|
type VoiceTranscriptEvent,
|
|
11
13
|
} from '../../api/voice';
|
|
12
14
|
import { appendServerHistoryIntoMessages } from './messageHistory';
|
|
@@ -37,6 +39,8 @@ export type UseVoiceModeOptions = {
|
|
|
37
39
|
getMessages: () => Message[];
|
|
38
40
|
setMessages: React.Dispatch<React.SetStateAction<Message[]>>;
|
|
39
41
|
onError?: (error: Error) => void;
|
|
42
|
+
onToolCall?: (event: VoiceToolCallEvent) => void;
|
|
43
|
+
onToolResult?: (event: VoiceToolResultEvent) => void;
|
|
40
44
|
stopRecording: () => void;
|
|
41
45
|
stopTextToSpeech: () => void;
|
|
42
46
|
voiceTransport?: VoiceTransport;
|
|
@@ -55,6 +59,8 @@ export function useVoiceMode({
|
|
|
55
59
|
getMessages,
|
|
56
60
|
setMessages,
|
|
57
61
|
onError,
|
|
62
|
+
onToolCall,
|
|
63
|
+
onToolResult,
|
|
58
64
|
stopRecording,
|
|
59
65
|
stopTextToSpeech,
|
|
60
66
|
voiceTransport = 'ws-realtime',
|
|
@@ -222,6 +228,8 @@ export function useVoiceMode({
|
|
|
222
228
|
onStatus: (_status: VoiceStatus) => {},
|
|
223
229
|
onTranscript: (_event: VoiceTranscriptEvent) => {},
|
|
224
230
|
onDone: (_event: VoiceDoneEvent) => {},
|
|
231
|
+
onToolCall: (_event: VoiceToolCallEvent) => {},
|
|
232
|
+
onToolResult: (_event: VoiceToolResultEvent) => {},
|
|
225
233
|
});
|
|
226
234
|
|
|
227
235
|
const stopVoiceSession = React.useCallback(async (reason = 'unknown') => {
|
|
@@ -333,6 +341,8 @@ export function useVoiceMode({
|
|
|
333
341
|
onStatus: (status) => voiceHandlersRef.current.onStatus(status),
|
|
334
342
|
onTranscript: (event) => voiceHandlersRef.current.onTranscript(event),
|
|
335
343
|
onDone: (event) => voiceHandlersRef.current.onDone(event),
|
|
344
|
+
onToolCall: (event) => voiceHandlersRef.current.onToolCall(event),
|
|
345
|
+
onToolResult: (event) => voiceHandlersRef.current.onToolResult(event),
|
|
336
346
|
onError: (err) => {
|
|
337
347
|
if (isConnectCancelled()) return;
|
|
338
348
|
onError?.(err);
|
|
@@ -396,8 +406,10 @@ export function useVoiceMode({
|
|
|
396
406
|
handleTranscriptUi(event);
|
|
397
407
|
},
|
|
398
408
|
onDone: handleDone,
|
|
409
|
+
onToolCall: (event) => onToolCall?.(event),
|
|
410
|
+
onToolResult: (event) => onToolResult?.(event),
|
|
399
411
|
};
|
|
400
|
-
}, [handleStatus, handleTranscriptTurn, handleTranscriptUi, handleDone]);
|
|
412
|
+
}, [handleStatus, handleTranscriptTurn, handleTranscriptUi, handleDone, onToolCall, onToolResult]);
|
|
401
413
|
|
|
402
414
|
React.useEffect(() => () => { void stopVoiceSessionRef.current('unmount'); }, []);
|
|
403
415
|
|
package/src/legacy/index.ts
CHANGED
|
@@ -7,7 +7,7 @@ export { default as useLocalStorage } from './core/lib/useLocalStorage';
|
|
|
7
7
|
export { default as useSessionStorage } from './core/lib/useSessionStorage';
|
|
8
8
|
export { useResizable } from './core/lib/useResizable';
|
|
9
9
|
export type { ResizeCorner } from './core/lib/useResizable';
|
|
10
|
-
export { synthesizeSpeechToBuffer } from './core/lib/textToSpeech';
|
|
10
|
+
export { synthesizeSpeechToBuffer, streamSpeechToAudioContext } from './core/lib/textToSpeech';
|
|
11
11
|
export { useSpeechToTextFromMic } from './core/lib/useSpeechToTextFromMic';
|
|
12
12
|
export { useVoiceMode } from './core/lib/useVoiceMode';
|
|
13
13
|
|