@bytexbyte/nxtlinq-ai-agent-core-development 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/dist/agent/ChatOrchestrator.d.ts +48 -0
  2. package/dist/agent/ChatOrchestrator.d.ts.map +1 -0
  3. package/dist/agent/ChatOrchestrator.js +311 -0
  4. package/dist/agent/NxtlinqAgent.d.ts +65 -0
  5. package/dist/agent/NxtlinqAgent.d.ts.map +1 -0
  6. package/dist/agent/NxtlinqAgent.js +256 -0
  7. package/dist/agent/errors.d.ts +4 -0
  8. package/dist/agent/errors.d.ts.map +1 -0
  9. package/dist/agent/errors.js +6 -0
  10. package/dist/agent/extractReplyText.d.ts +3 -0
  11. package/dist/agent/extractReplyText.d.ts.map +1 -0
  12. package/dist/agent/extractReplyText.js +16 -0
  13. package/dist/api/hosts.d.ts +4 -0
  14. package/dist/api/hosts.d.ts.map +1 -0
  15. package/dist/api/hosts.js +18 -0
  16. package/dist/api/nxtlinq-api.d.ts +9 -0
  17. package/dist/api/nxtlinq-api.d.ts.map +1 -0
  18. package/dist/api/nxtlinq-api.js +499 -0
  19. package/dist/api/parse-sse.d.ts +9 -0
  20. package/dist/api/parse-sse.d.ts.map +1 -0
  21. package/dist/api/parse-sse.js +97 -0
  22. package/dist/api/tts.d.ts +19 -0
  23. package/dist/api/tts.d.ts.map +1 -0
  24. package/dist/api/tts.js +46 -0
  25. package/dist/constants/storageKeys.d.ts +6 -0
  26. package/dist/constants/storageKeys.d.ts.map +1 -0
  27. package/dist/constants/storageKeys.js +5 -0
  28. package/dist/history/messageHistory.d.ts +18 -0
  29. package/dist/history/messageHistory.d.ts.map +1 -0
  30. package/dist/history/messageHistory.js +48 -0
  31. package/dist/index.d.ts +25 -0
  32. package/dist/index.d.ts.map +1 -0
  33. package/dist/index.js +26 -0
  34. package/dist/ports/HttpPort.d.ts +6 -0
  35. package/dist/ports/HttpPort.d.ts.map +1 -0
  36. package/dist/ports/HttpPort.js +3 -0
  37. package/dist/ports/PlatformPorts.d.ts +12 -0
  38. package/dist/ports/PlatformPorts.d.ts.map +1 -0
  39. package/dist/ports/PlatformPorts.js +1 -0
  40. package/dist/ports/StoragePort.d.ts +10 -0
  41. package/dist/ports/StoragePort.d.ts.map +1 -0
  42. package/dist/ports/StoragePort.js +33 -0
  43. package/dist/ports/WebRTCPort.d.ts +68 -0
  44. package/dist/ports/WebRTCPort.d.ts.map +1 -0
  45. package/dist/ports/WebRTCPort.js +10 -0
  46. package/dist/ports/createBrowserWebRTCPort.d.ts +7 -0
  47. package/dist/ports/createBrowserWebRTCPort.d.ts.map +1 -0
  48. package/dist/ports/createBrowserWebRTCPort.js +140 -0
  49. package/dist/ports/index.d.ts +5 -0
  50. package/dist/ports/index.d.ts.map +1 -0
  51. package/dist/ports/index.js +4 -0
  52. package/dist/types/agent-config.d.ts +40 -0
  53. package/dist/types/agent-config.d.ts.map +1 -0
  54. package/dist/types/agent-config.js +1 -0
  55. package/dist/types/ait-api.d.ts +393 -0
  56. package/dist/types/ait-api.d.ts.map +1 -0
  57. package/dist/types/ait-api.js +1 -0
  58. package/dist/voice/app-channel-dispatcher.d.ts +14 -0
  59. package/dist/voice/app-channel-dispatcher.d.ts.map +1 -0
  60. package/dist/voice/app-channel-dispatcher.js +171 -0
  61. package/dist/voice/create-voice-session.d.ts +8 -0
  62. package/dist/voice/create-voice-session.d.ts.map +1 -0
  63. package/dist/voice/create-voice-session.js +37 -0
  64. package/dist/voice/output-audio-level.d.ts +26 -0
  65. package/dist/voice/output-audio-level.d.ts.map +1 -0
  66. package/dist/voice/output-audio-level.js +132 -0
  67. package/dist/voice/remote-audio-gain.d.ts +10 -0
  68. package/dist/voice/remote-audio-gain.d.ts.map +1 -0
  69. package/dist/voice/remote-audio-gain.js +19 -0
  70. package/dist/voice/start-voice-session.d.ts +13 -0
  71. package/dist/voice/start-voice-session.d.ts.map +1 -0
  72. package/dist/voice/start-voice-session.js +303 -0
  73. package/dist/voice/transcript.d.ts +10 -0
  74. package/dist/voice/transcript.d.ts.map +1 -0
  75. package/dist/voice/transcript.js +50 -0
  76. package/dist/voice/trigger-voice-greeting.d.ts +14 -0
  77. package/dist/voice/trigger-voice-greeting.d.ts.map +1 -0
  78. package/dist/voice/trigger-voice-greeting.js +28 -0
  79. package/dist/voice/types.d.ts +138 -0
  80. package/dist/voice/types.d.ts.map +1 -0
  81. package/dist/voice/types.js +1 -0
  82. package/dist/voice/voice-user-input.d.ts +19 -0
  83. package/dist/voice/voice-user-input.d.ts.map +1 -0
  84. package/dist/voice/voice-user-input.js +10 -0
  85. package/package.json +41 -0
  86. package/src/agent/ChatOrchestrator.ts +380 -0
  87. package/src/agent/NxtlinqAgent.ts +325 -0
  88. package/src/agent/errors.ts +6 -0
  89. package/src/agent/extractReplyText.ts +22 -0
  90. package/src/api/hosts.ts +20 -0
  91. package/src/api/nxtlinq-api.ts +656 -0
  92. package/src/api/parse-sse.ts +104 -0
  93. package/src/api/tts.ts +69 -0
  94. package/src/constants/storageKeys.ts +5 -0
  95. package/src/history/messageHistory.ts +65 -0
  96. package/src/index.ts +70 -0
  97. package/src/ports/HttpPort.ts +12 -0
  98. package/src/ports/PlatformPorts.ts +12 -0
  99. package/src/ports/StoragePort.ts +37 -0
  100. package/src/ports/WebRTCPort.ts +54 -0
  101. package/src/ports/createBrowserWebRTCPort.ts +163 -0
  102. package/src/ports/index.ts +4 -0
  103. package/src/types/agent-config.ts +51 -0
  104. package/src/types/ait-api.ts +303 -0
  105. package/src/voice/app-channel-dispatcher.ts +201 -0
  106. package/src/voice/create-voice-session.ts +53 -0
  107. package/src/voice/output-audio-level.ts +153 -0
  108. package/src/voice/remote-audio-gain.ts +31 -0
  109. package/src/voice/start-voice-session.ts +369 -0
  110. package/src/voice/transcript.ts +44 -0
  111. package/src/voice/trigger-voice-greeting.ts +47 -0
  112. package/src/voice/types.ts +154 -0
  113. package/src/voice/voice-user-input.ts +32 -0
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Step 1 of nxtlinq voice: mint session + TURN credentials (no WebRTC here).
3
+ */
4
+ export async function createVoiceSession(baseUrl, params, fetchFn = globalThis.fetch) {
5
+ const url = `${baseUrl.replace(/\/+$/, '')}/api/voice/session`;
6
+ const sessionRes = await fetchFn(url, {
7
+ method: 'POST',
8
+ headers: { 'Content-Type': 'application/json' },
9
+ body: JSON.stringify({
10
+ apiKey: params.apiKey,
11
+ apiSecret: params.apiSecret,
12
+ pseudoId: params.pseudoId,
13
+ externalId: params.externalId,
14
+ aitId: params.aitId,
15
+ walletAddress: params.walletAddress,
16
+ aitToken: params.aitToken,
17
+ voiceMode: params.voiceMode,
18
+ metadata: params.metadata,
19
+ }),
20
+ });
21
+ if (!sessionRes.ok) {
22
+ const errBody = await sessionRes.text().catch(() => '');
23
+ throw new Error(`Voice session creation failed: ${sessionRes.status} ${errBody}`);
24
+ }
25
+ const sessionData = (await sessionRes.json());
26
+ if (!sessionData.voiceSessionId ||
27
+ !sessionData.signalingUrl ||
28
+ !Array.isArray(sessionData.iceServers)) {
29
+ throw new Error('Voice session response missing required fields.');
30
+ }
31
+ return sessionData;
32
+ }
33
+ export function appendVoiceModeToSignalingUrl(signalingUrl, mode) {
34
+ const resolvedMode = mode === 'realtime' ? 'realtime' : 'cascade';
35
+ const sep = signalingUrl.includes('?') ? '&' : '?';
36
+ return `${signalingUrl}${sep}mode=${resolvedMode}`;
37
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Remote assistant audio RMS (0–1) for waveform UIs.
3
+ * Browser: AnalyserNode on WebRTC remote stream.
4
+ * React Native: RTCStatsReport audioLevel via PeerConnection.getStats().
5
+ */
6
+ export type OutputAudioLevelMeter = {
7
+ getLevel(): number;
8
+ dispose(): void;
9
+ };
10
+ type PeerConnectionWithStats = {
11
+ getStats(): Promise<unknown>;
12
+ };
13
+ /**
14
+ * Poll inbound-rtp / media-source audioLevel from native RTCPeerConnection.
15
+ */
16
+ export declare function createStatsOutputAudioLevelMeter(getPeerConnection: () => PeerConnectionWithStats | null): OutputAudioLevelMeter;
17
+ /**
18
+ * Browser AnalyserNode tap on a remote MediaStream.
19
+ */
20
+ export declare function createAnalyserOutputAudioLevelMeter(stream: MediaStream): OutputAudioLevelMeter | null;
21
+ export declare function createOutputAudioLevelMeter(options: {
22
+ remoteStream: MediaStream | null;
23
+ getPeerConnection?: () => PeerConnectionWithStats | null;
24
+ }): OutputAudioLevelMeter;
25
+ export {};
26
+ //# sourceMappingURL=output-audio-level.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"output-audio-level.d.ts","sourceRoot":"","sources":["../../src/voice/output-audio-level.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,MAAM,MAAM,qBAAqB,GAAG;IAClC,QAAQ,IAAI,MAAM,CAAC;IACnB,OAAO,IAAI,IAAI,CAAC;CACjB,CAAC;AAkBF,KAAK,uBAAuB,GAAG;IAC7B,QAAQ,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;CAC9B,CAAC;AAEF;;GAEG;AACH,wBAAgB,gCAAgC,CAC9C,iBAAiB,EAAE,MAAM,uBAAuB,GAAG,IAAI,GACtD,qBAAqB,CAkDvB;AAED;;GAEG;AACH,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,WAAW,GAClB,qBAAqB,GAAG,IAAI,CA6C9B;AAED,wBAAgB,2BAA2B,CAAC,OAAO,EAAE;IACnD,YAAY,EAAE,WAAW,GAAG,IAAI,CAAC;IACjC,iBAAiB,CAAC,EAAE,MAAM,uBAAuB,GAAG,IAAI,CAAC;CAC1D,GAAG,qBAAqB,CASxB"}
@@ -0,0 +1,132 @@
1
+ /**
2
+ * Remote assistant audio RMS (0–1) for waveform UIs.
3
+ * Browser: AnalyserNode on WebRTC remote stream.
4
+ * React Native: RTCStatsReport audioLevel via PeerConnection.getStats().
5
+ */
6
+ const FFT_SIZE = 256;
7
+ const STATS_POLL_MS = 200;
8
+ function normalizeRms(sumSquares, count) {
9
+ if (count <= 0)
10
+ return 0;
11
+ const rms = Math.sqrt(sumSquares / count);
12
+ return Math.min(1, Math.max(0, rms * 4));
13
+ }
14
+ function normalizeStatsLevel(raw) {
15
+ if (!Number.isFinite(raw) || raw <= 0)
16
+ return 0;
17
+ // WebRTC stats often use 0–32767 (RFC); some browsers use 0–1.
18
+ const normalized = raw > 1 ? raw / 32767 : raw;
19
+ return Math.min(1, Math.max(0, normalized));
20
+ }
21
+ /**
22
+ * Poll inbound-rtp / media-source audioLevel from native RTCPeerConnection.
23
+ */
24
+ export function createStatsOutputAudioLevelMeter(getPeerConnection) {
25
+ let level = 0;
26
+ let smoothed = 0;
27
+ let disposed = false;
28
+ let timer = null;
29
+ const poll = async () => {
30
+ const pc = getPeerConnection();
31
+ if (!pc || disposed)
32
+ return;
33
+ try {
34
+ const report = (await pc.getStats());
35
+ let best = 0;
36
+ const visit = (entry) => {
37
+ const t = entry.type;
38
+ if (t === 'inbound-rtp' || t === 'media-source' || t === 'track') {
39
+ const audioLevel = entry.audioLevel;
40
+ if (typeof audioLevel === 'number' && audioLevel > 0) {
41
+ best = Math.max(best, normalizeStatsLevel(audioLevel));
42
+ }
43
+ }
44
+ };
45
+ if (report instanceof Map) {
46
+ report.forEach((entry) => visit(entry));
47
+ }
48
+ else if (report && typeof report === 'object') {
49
+ for (const entry of Object.values(report)) {
50
+ visit(entry);
51
+ }
52
+ }
53
+ smoothed = smoothed * 0.75 + best * 0.25;
54
+ level = smoothed;
55
+ }
56
+ catch {
57
+ /* ignore transient stats errors */
58
+ }
59
+ };
60
+ timer = setInterval(() => {
61
+ void poll();
62
+ }, STATS_POLL_MS);
63
+ void poll();
64
+ return {
65
+ getLevel: () => (disposed ? 0 : level),
66
+ dispose: () => {
67
+ disposed = true;
68
+ if (timer)
69
+ clearInterval(timer);
70
+ timer = null;
71
+ level = 0;
72
+ smoothed = 0;
73
+ },
74
+ };
75
+ }
76
+ /**
77
+ * Browser AnalyserNode tap on a remote MediaStream.
78
+ */
79
+ export function createAnalyserOutputAudioLevelMeter(stream) {
80
+ const AudioCtx = typeof globalThis !== 'undefined'
81
+ ? globalThis.AudioContext
82
+ : undefined;
83
+ if (!AudioCtx)
84
+ return null;
85
+ let level = 0;
86
+ let disposed = false;
87
+ let rafId = 0;
88
+ const ctx = new AudioCtx();
89
+ const analyser = ctx.createAnalyser();
90
+ analyser.fftSize = FFT_SIZE;
91
+ const source = ctx.createMediaStreamSource(stream);
92
+ source.connect(analyser);
93
+ const data = new Uint8Array(analyser.frequencyBinCount);
94
+ const tick = () => {
95
+ if (disposed)
96
+ return;
97
+ analyser.getByteTimeDomainData(data);
98
+ let sum = 0;
99
+ for (let i = 0; i < data.length; i++) {
100
+ const v = (data[i] - 128) / 128;
101
+ sum += v * v;
102
+ }
103
+ level = normalizeRms(sum, data.length);
104
+ rafId = requestAnimationFrame(tick);
105
+ };
106
+ rafId = requestAnimationFrame(tick);
107
+ return {
108
+ getLevel: () => (disposed ? 0 : level),
109
+ dispose: () => {
110
+ disposed = true;
111
+ cancelAnimationFrame(rafId);
112
+ level = 0;
113
+ try {
114
+ source.disconnect();
115
+ analyser.disconnect();
116
+ void ctx.close();
117
+ }
118
+ catch {
119
+ /* noop */
120
+ }
121
+ },
122
+ };
123
+ }
124
+ export function createOutputAudioLevelMeter(options) {
125
+ const stream = options.remoteStream;
126
+ if (stream) {
127
+ const analyserMeter = createAnalyserOutputAudioLevelMeter(stream);
128
+ if (analyserMeter)
129
+ return analyserMeter;
130
+ }
131
+ return createStatsOutputAudioLevelMeter(options.getPeerConnection ?? (() => null));
132
+ }
@@ -0,0 +1,10 @@
1
+ import type { MediaStreamLike } from '../ports/WebRTCPort';
2
+ /** react-native-webrtc `_setVolume` range; default 1.0 is quiet vs media-player TTS. */
3
+ export declare const DEFAULT_REMOTE_AUDIO_GAIN = 10;
4
+ export type RemoteAudioTrackLike = {
5
+ kind?: string;
6
+ _setVolume?: (volume: number) => void;
7
+ };
8
+ export declare function applyRemoteAudioPlaybackGain(stream: MediaStreamLike | null | undefined, gain?: number): void;
9
+ export declare function applyRemoteAudioPlaybackGainToTrack(track: RemoteAudioTrackLike | null | undefined, gain?: number): void;
10
+ //# sourceMappingURL=remote-audio-gain.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"remote-audio-gain.d.ts","sourceRoot":"","sources":["../../src/voice/remote-audio-gain.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAE3D,wFAAwF;AACxF,eAAO,MAAM,yBAAyB,KAAK,CAAC;AAE5C,MAAM,MAAM,oBAAoB,GAAG;IACjC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CACvC,CAAC;AAEF,wBAAgB,4BAA4B,CAC1C,MAAM,EAAE,eAAe,GAAG,IAAI,GAAG,SAAS,EAC1C,IAAI,GAAE,MAAkC,GACvC,IAAI,CAQN;AAED,wBAAgB,mCAAmC,CACjD,KAAK,EAAE,oBAAoB,GAAG,IAAI,GAAG,SAAS,EAC9C,IAAI,GAAE,MAAkC,GACvC,IAAI,CAIN"}
@@ -0,0 +1,19 @@
1
+ /** react-native-webrtc `_setVolume` range; default 1.0 is quiet vs media-player TTS. */
2
+ export const DEFAULT_REMOTE_AUDIO_GAIN = 10;
3
+ export function applyRemoteAudioPlaybackGain(stream, gain = DEFAULT_REMOTE_AUDIO_GAIN) {
4
+ if (!stream?.getAudioTracks)
5
+ return;
6
+ const clamped = Math.max(0, Math.min(10, gain));
7
+ for (const track of stream.getAudioTracks()) {
8
+ if ((track.kind === 'audio' || track.kind == null) && track._setVolume) {
9
+ track._setVolume(clamped);
10
+ }
11
+ }
12
+ }
13
+ export function applyRemoteAudioPlaybackGainToTrack(track, gain = DEFAULT_REMOTE_AUDIO_GAIN) {
14
+ if (!track || (track.kind !== 'audio' && track.kind != null))
15
+ return;
16
+ if (!track._setVolume)
17
+ return;
18
+ track._setVolume(Math.max(0, Math.min(10, gain)));
19
+ }
@@ -0,0 +1,13 @@
1
+ import type { HttpFetch } from '../ports/HttpPort';
2
+ import type { WebRTCPort } from '../ports/WebRTCPort';
3
+ import type { StartVoiceSessionOptions, VoiceSession } from './types';
4
+ export type StartVoiceSessionDeps = {
5
+ baseUrl: string;
6
+ webrtc: WebRTCPort;
7
+ fetchFn?: HttpFetch;
8
+ };
9
+ /**
10
+ * Platform-agnostic WebRTC voice session (browser or React Native via {@link WebRTCPort}).
11
+ */
12
+ export declare function startVoiceSessionWithPort(deps: StartVoiceSessionDeps, options: StartVoiceSessionOptions): Promise<VoiceSession>;
13
+ //# sourceMappingURL=start-voice-session.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"start-voice-session.d.ts","sourceRoot":"","sources":["../../src/voice/start-voice-session.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,KAAK,EAAuC,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAU3F,OAAO,KAAK,EAAE,wBAAwB,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AActE,MAAM,MAAM,qBAAqB,GAAG;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,UAAU,CAAC;IACnB,OAAO,CAAC,EAAE,SAAS,CAAC;CACrB,CAAC;AA4BF;;GAEG;AACH,wBAAsB,yBAAyB,CAC7C,IAAI,EAAE,qBAAqB,EAC3B,OAAO,EAAE,wBAAwB,GAChC,OAAO,CAAC,YAAY,CAAC,CAiTvB"}
@@ -0,0 +1,303 @@
1
+ import { createAppChannelDispatcher } from './app-channel-dispatcher';
2
+ import { appendVoiceModeToSignalingUrl, createVoiceSession, } from './create-voice-session';
3
+ import { createOutputAudioLevelMeter, } from './output-audio-level';
4
+ import { applyRemoteAudioPlaybackGain, applyRemoteAudioPlaybackGainToTrack, DEFAULT_REMOTE_AUDIO_GAIN, } from './remote-audio-gain';
5
+ import { buildUserInputPayload } from './voice-user-input';
6
+ const ICE_GATHERING_TIMEOUT_MS = 5000;
7
+ async function endVoiceSession(baseUrl, fetchFn, params) {
8
+ try {
9
+ await fetchFn(`${baseUrl.replace(/\/+$/, '')}/api/voice/session`, {
10
+ method: 'DELETE',
11
+ headers: { 'Content-Type': 'application/json' },
12
+ body: JSON.stringify({
13
+ apiKey: params.apiKey,
14
+ apiSecret: params.apiSecret,
15
+ voiceSessionId: params.voiceSessionId,
16
+ endReason: params.endReason,
17
+ }),
18
+ });
19
+ }
20
+ catch {
21
+ /* best-effort */
22
+ }
23
+ }
24
+ /**
25
+ * Platform-agnostic WebRTC voice session (browser or React Native via {@link WebRTCPort}).
26
+ */
27
+ export async function startVoiceSessionWithPort(deps, options) {
28
+ const fetchFn = deps.fetchFn ?? globalThis.fetch;
29
+ const baseUrl = (options.baseUrl ?? deps.baseUrl ?? '').replace(/\/+$/, '');
30
+ if (!baseUrl) {
31
+ throw new Error('startVoiceSession: API host is empty. Call setApiHosts() or pass baseUrl.');
32
+ }
33
+ const sessionData = await createVoiceSession(baseUrl, {
34
+ apiKey: options.apiKey,
35
+ apiSecret: options.apiSecret,
36
+ pseudoId: options.pseudoId,
37
+ externalId: options.externalId,
38
+ aitId: options.aitId,
39
+ walletAddress: options.walletAddress,
40
+ aitToken: options.aitToken,
41
+ voiceMode: options.voiceMode,
42
+ metadata: options.metadata,
43
+ }, fetchFn);
44
+ const voiceSessionId = sessionData.voiceSessionId;
45
+ const resolvedMode = sessionData.mode === 'realtime' || options.voiceMode === 'realtime'
46
+ ? 'realtime'
47
+ : 'cascade';
48
+ const signalingUrl = appendVoiceModeToSignalingUrl(sessionData.signalingUrl, resolvedMode);
49
+ const remoteAudioGain = options.remoteAudioGain ?? DEFAULT_REMOTE_AUDIO_GAIN;
50
+ const micStream = await deps.webrtc.getUserMedia({ audio: true });
51
+ let micMuted = options.startWithMicMuted === true;
52
+ let channelOpened = false;
53
+ const setMicTracksEnabled = (enabled) => {
54
+ for (const track of micStream.getAudioTracks()) {
55
+ track.enabled = enabled;
56
+ }
57
+ };
58
+ if (micMuted) {
59
+ setMicTracksEnabled(false);
60
+ }
61
+ const pc = deps.webrtc.createPeerConnection({
62
+ iceServers: sessionData.iceServers,
63
+ });
64
+ let errorFired = false;
65
+ const reportError = (err) => {
66
+ if (errorFired)
67
+ return;
68
+ errorFired = true;
69
+ options.onError?.(err);
70
+ };
71
+ let closeNotified = false;
72
+ const notifyClose = (reason) => {
73
+ if (closeNotified)
74
+ return;
75
+ closeNotified = true;
76
+ options.onClose?.(reason);
77
+ };
78
+ let activeAppChannel = pc.createDataChannel('app');
79
+ const dispatcher = createAppChannelDispatcher(options, reportError);
80
+ dispatcher.state.micMuted = micMuted;
81
+ const handleAppChannelOpen = () => {
82
+ if (channelOpened)
83
+ return;
84
+ channelOpened = true;
85
+ if (micMuted) {
86
+ try {
87
+ activeAppChannel.send(JSON.stringify({ type: 'mute', muted: true }));
88
+ }
89
+ catch {
90
+ /* noop */
91
+ }
92
+ }
93
+ options.onOpen?.();
94
+ };
95
+ const wireAppChannel = (channel) => {
96
+ activeAppChannel = channel;
97
+ channel.onopen = handleAppChannelOpen;
98
+ channel.onclose = () => {
99
+ notifyClose('datachannel_closed');
100
+ };
101
+ channel.onmessage = (event) => {
102
+ let payload;
103
+ try {
104
+ payload = JSON.parse(event.data);
105
+ }
106
+ catch {
107
+ return;
108
+ }
109
+ const ts = typeof payload.ts === 'number' ? payload.ts : Date.now();
110
+ dispatcher.handleMessage(payload, ts);
111
+ };
112
+ if (channel.readyState === 'open') {
113
+ handleAppChannelOpen();
114
+ }
115
+ };
116
+ wireAppChannel(activeAppChannel);
117
+ pc.ondatachannel = (event) => {
118
+ if (event.channel) {
119
+ wireAppChannel(event.channel);
120
+ }
121
+ };
122
+ pc.onconnectionstatechange = () => {
123
+ if (pc.connectionState === 'connected' && activeAppChannel.readyState === 'open') {
124
+ handleAppChannelOpen();
125
+ }
126
+ else if (pc.connectionState === 'failed') {
127
+ reportError(new Error('PeerConnection failed'));
128
+ }
129
+ else if (pc.connectionState === 'closed') {
130
+ notifyClose('peerconnection_closed');
131
+ }
132
+ };
133
+ pc.oniceconnectionstatechange = () => {
134
+ if (pc.iceConnectionState === 'failed') {
135
+ reportError(new Error('ICE connection failed (TURN/firewall?)'));
136
+ }
137
+ };
138
+ const remoteTracks = [];
139
+ let remoteStream = null;
140
+ let remoteMediaStream = null;
141
+ let outputMeter = null;
142
+ const attachOutputMeter = () => {
143
+ outputMeter?.dispose();
144
+ outputMeter = createOutputAudioLevelMeter({
145
+ remoteStream: remoteMediaStream,
146
+ getPeerConnection: () => {
147
+ const native = pc.__nativePc;
148
+ return native ?? null;
149
+ },
150
+ });
151
+ };
152
+ pc.ontrack = (event) => {
153
+ applyRemoteAudioPlaybackGainToTrack(event.track, remoteAudioGain);
154
+ const fromEvent = event.streams[0];
155
+ if (fromEvent) {
156
+ remoteStream = fromEvent;
157
+ remoteMediaStream = fromEvent;
158
+ applyRemoteAudioPlaybackGain(remoteStream, remoteAudioGain);
159
+ attachOutputMeter();
160
+ return;
161
+ }
162
+ remoteTracks.push(event.track);
163
+ remoteStream = {
164
+ getAudioTracks: () => remoteTracks.filter((t) => t.kind === 'audio'),
165
+ };
166
+ remoteMediaStream = null;
167
+ applyRemoteAudioPlaybackGain(remoteStream, remoteAudioGain);
168
+ attachOutputMeter();
169
+ if (activeAppChannel.readyState === 'open') {
170
+ handleAppChannelOpen();
171
+ }
172
+ };
173
+ for (const track of micStream.getAudioTracks()) {
174
+ pc.addTrack(track, micStream);
175
+ }
176
+ const applyMicMuted = (muted) => {
177
+ micMuted = muted;
178
+ dispatcher.state.micMuted = muted;
179
+ setMicTracksEnabled(!muted);
180
+ if (channelOpened && activeAppChannel.readyState === 'open') {
181
+ try {
182
+ activeAppChannel.send(JSON.stringify({ type: 'mute', muted }));
183
+ }
184
+ catch {
185
+ /* noop */
186
+ }
187
+ }
188
+ };
189
+ const offer = await pc.createOffer();
190
+ await pc.setLocalDescription(offer);
191
+ await deps.webrtc.waitForIceGathering(pc, ICE_GATHERING_TIMEOUT_MS);
192
+ const local = pc.getLocalDescription();
193
+ if (!local?.sdp) {
194
+ throw new Error('Local SDP description missing after ICE gathering.');
195
+ }
196
+ const answerRes = await fetchFn(signalingUrl, {
197
+ method: 'POST',
198
+ headers: { 'Content-Type': 'application/json' },
199
+ body: JSON.stringify({
200
+ voiceSessionId,
201
+ sdp: local.sdp,
202
+ type: local.type ?? 'offer',
203
+ }),
204
+ });
205
+ if (!answerRes.ok) {
206
+ const errBody = await answerRes.text().catch(() => '');
207
+ throw new Error(`Signaling POST failed: ${answerRes.status} ${errBody}`);
208
+ }
209
+ const answer = (await answerRes.json());
210
+ if (!answer.sdp || (answer.type !== 'answer' && answer.type !== 'pranswer')) {
211
+ throw new Error(`Signaling returned malformed answer: ${JSON.stringify(answer).slice(0, 200)}`);
212
+ }
213
+ await pc.setRemoteDescription({ sdp: answer.sdp, type: answer.type });
214
+ const channelWatch = setInterval(() => {
215
+ if (activeAppChannel.readyState === 'open') {
216
+ clearInterval(channelWatch);
217
+ handleAppChannelOpen();
218
+ }
219
+ }, 50);
220
+ setTimeout(() => clearInterval(channelWatch), 15000);
221
+ let stopped = false;
222
+ const sendAppMessage = (payload) => {
223
+ if (!channelOpened || activeAppChannel.readyState !== 'open') {
224
+ throw new Error('Voice app channel is not open');
225
+ }
226
+ activeAppChannel.send(JSON.stringify(payload));
227
+ };
228
+ const sendVoiceUserInput = (input) => {
229
+ const interruptFirst = input.interruptFirst !== false;
230
+ if (interruptFirst) {
231
+ try {
232
+ sendAppMessage({ type: 'interrupt' });
233
+ }
234
+ catch {
235
+ /* channel may not be ready yet */
236
+ }
237
+ }
238
+ sendAppMessage(buildUserInputPayload(input));
239
+ };
240
+ const stop = async (reason = 'client_stop') => {
241
+ if (stopped)
242
+ return;
243
+ stopped = true;
244
+ outputMeter?.dispose();
245
+ outputMeter = null;
246
+ await endVoiceSession(baseUrl, fetchFn, {
247
+ apiKey: options.apiKey,
248
+ apiSecret: options.apiSecret,
249
+ voiceSessionId,
250
+ endReason: reason,
251
+ });
252
+ try {
253
+ activeAppChannel.close();
254
+ }
255
+ catch {
256
+ /* noop */
257
+ }
258
+ for (const track of micStream.getAudioTracks()) {
259
+ try {
260
+ track.stop();
261
+ }
262
+ catch {
263
+ /* noop */
264
+ }
265
+ }
266
+ if (remoteStream?.getAudioTracks) {
267
+ for (const track of remoteStream.getAudioTracks()) {
268
+ try {
269
+ track.stop();
270
+ }
271
+ catch {
272
+ /* noop */
273
+ }
274
+ }
275
+ }
276
+ remoteStream = null;
277
+ try {
278
+ pc.close();
279
+ }
280
+ catch {
281
+ /* noop */
282
+ }
283
+ };
284
+ return {
285
+ id: voiceSessionId,
286
+ getRemoteAudioStream: () => remoteStream,
287
+ getOutputAudioLevel: () => outputMeter?.getLevel() ?? 0,
288
+ isAppChannelOpen: () => channelOpened && activeAppChannel.readyState === 'open',
289
+ sendVoiceUserInput,
290
+ muteMic: (muted) => applyMicMuted(muted),
291
+ interrupt: () => {
292
+ if (channelOpened && activeAppChannel.readyState === 'open') {
293
+ try {
294
+ activeAppChannel.send(JSON.stringify({ type: 'interrupt' }));
295
+ }
296
+ catch {
297
+ /* noop */
298
+ }
299
+ }
300
+ },
301
+ stop,
302
+ };
303
+ }
@@ -0,0 +1,10 @@
1
+ /** Loose compare key — ignores case/punctuation; keeps CJK and other letters. */
2
+ export declare function normalizeTranscriptKey(text: string): string;
3
+ /**
4
+ * Merge streaming transcript updates. Pipecat RTVI usually sends cumulative
5
+ * snapshots (each event is the full hypothesis so far).
6
+ */
7
+ export declare function mergeStreamingTranscript(current: string, incoming: string): string;
8
+ /** @deprecated Use mergeStreamingTranscript */
9
+ export declare function appendTranscriptSegment(current: string, chunk: string): string;
10
+ //# sourceMappingURL=transcript.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"transcript.d.ts","sourceRoot":"","sources":["../../src/voice/transcript.ts"],"names":[],"mappings":"AAAA,iFAAiF;AACjF,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAK3D;AAYD;;;GAGG;AACH,wBAAgB,wBAAwB,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAgBlF;AAED,+CAA+C;AAC/C,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAE9E"}
@@ -0,0 +1,50 @@
1
+ /** Loose compare key — ignores case/punctuation; keeps CJK and other letters. */
2
+ export function normalizeTranscriptKey(text) {
3
+ return text
4
+ .normalize('NFKC')
5
+ .toLowerCase()
6
+ .replace(/[^\p{L}\p{N}]/gu, '');
7
+ }
8
+ function joinTranscriptSegments(base, piece) {
9
+ const last = base.slice(-1);
10
+ const first = piece[0] ?? '';
11
+ const isCjk = (ch) => /[\u3000-\u9fff\uf900-\ufaff]/.test(ch);
12
+ if (isCjk(last) && isCjk(first))
13
+ return `${base}${piece}`;
14
+ if (/[a-z0-9]/i.test(last) && /[a-z0-9]/i.test(first))
15
+ return `${base} ${piece}`;
16
+ if (/\s$/.test(base) || /^\s/.test(piece))
17
+ return `${base}${piece}`;
18
+ return `${base}${piece}`;
19
+ }
20
+ /**
21
+ * Merge streaming transcript updates. Pipecat RTVI usually sends cumulative
22
+ * snapshots (each event is the full hypothesis so far).
23
+ */
24
+ export function mergeStreamingTranscript(current, incoming) {
25
+ const piece = incoming.trim();
26
+ if (!piece)
27
+ return current.trim();
28
+ const base = current.trim();
29
+ if (!base)
30
+ return piece;
31
+ if (piece === base)
32
+ return base;
33
+ if (piece.startsWith(base))
34
+ return piece;
35
+ if (base.startsWith(piece))
36
+ return base;
37
+ const normBase = normalizeTranscriptKey(base);
38
+ const normPiece = normalizeTranscriptKey(piece);
39
+ if (normPiece && normBase && normPiece.startsWith(normBase))
40
+ return piece;
41
+ if (normPiece && normBase && normBase.startsWith(normPiece))
42
+ return base;
43
+ if (base.endsWith(piece))
44
+ return base;
45
+ return joinTranscriptSegments(base, piece);
46
+ }
47
+ /** @deprecated Use mergeStreamingTranscript */
48
+ export function appendTranscriptSegment(current, chunk) {
49
+ return mergeStreamingTranscript(current, chunk);
50
+ }
@@ -0,0 +1,14 @@
1
+ import type { Message } from '../types/ait-api';
2
+ import type { VoiceGreetingOptions, VoiceSession } from './types';
3
+ export type TriggerVoiceGreetingParams = {
4
+ session: VoiceSession;
5
+ options: VoiceGreetingOptions;
6
+ /** Optional: append a local-only user bubble for hero/welcome UI. */
7
+ appendLocalUserMessage?: (message: Message) => void;
8
+ };
9
+ /**
10
+ * Berify-style auto-greet: inject synthetic user turn (+ optional product image)
11
+ * over the voice `app` data channel after session is active.
12
+ */
13
+ export declare function triggerVoiceGreeting({ session, options, appendLocalUserMessage, }: TriggerVoiceGreetingParams): void;
14
+ //# sourceMappingURL=trigger-voice-greeting.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"trigger-voice-greeting.d.ts","sourceRoot":"","sources":["../../src/voice/trigger-voice-greeting.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAChD,OAAO,KAAK,EAAE,oBAAoB,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAGlE,MAAM,MAAM,0BAA0B,GAAG;IACvC,OAAO,EAAE,YAAY,CAAC;IACtB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,qEAAqE;IACrE,sBAAsB,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,CAAC;CACrD,CAAC;AAEF;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,EACnC,OAAO,EACP,OAAO,EACP,sBAAsB,GACvB,EAAE,0BAA0B,GAAG,IAAI,CA2BnC"}
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Berify-style auto-greet: inject synthetic user turn (+ optional product image)
3
+ * over the voice `app` data channel after session is active.
4
+ */
5
+ export function triggerVoiceGreeting({ session, options, appendLocalUserMessage, }) {
6
+ const text = options.text.trim();
7
+ if (!text && !options.attachments?.length) {
8
+ throw new Error('triggerVoiceGreeting: text or attachments required');
9
+ }
10
+ const clientMessageId = options.clientMessageId ?? `greeting_${Date.now()}`;
11
+ if (!options.skipUserMessage && appendLocalUserMessage) {
12
+ appendLocalUserMessage({
13
+ id: clientMessageId,
14
+ role: 'user',
15
+ content: text,
16
+ timestamp: new Date().toISOString(),
17
+ attachments: options.attachments,
18
+ metadata: { voiceRealtime: true, voiceSessionId: session.id },
19
+ });
20
+ }
21
+ const input = {
22
+ text: text || ' ',
23
+ attachments: options.attachments,
24
+ clientMessageId,
25
+ interruptFirst: options.interruptFirst ?? false,
26
+ };
27
+ session.sendVoiceUserInput(input);
28
+ }