@lokutor/sdk 1.1.15 → 1.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,9 @@ var Language = /* @__PURE__ */ ((Language2) => {
22
22
  })(Language || {});
23
23
  var AUDIO_CONFIG = {
24
24
  SAMPLE_RATE: 16e3,
25
+ SAMPLE_RATE_INPUT: 16e3,
25
26
  SPEAKER_SAMPLE_RATE: 44100,
27
+ SAMPLE_RATE_OUTPUT: 44100,
26
28
  CHANNELS: 1,
27
29
  CHUNK_DURATION_MS: 20,
28
30
  get CHUNK_SIZE() {
package/dist/index.d.mts CHANGED
@@ -28,7 +28,9 @@ declare enum Language {
28
28
  */
29
29
  declare const AUDIO_CONFIG: {
30
30
  SAMPLE_RATE: number;
31
+ SAMPLE_RATE_INPUT: number;
31
32
  SPEAKER_SAMPLE_RATE: number;
33
+ SAMPLE_RATE_OUTPUT: number;
32
34
  CHANNELS: number;
33
35
  CHUNK_DURATION_MS: number;
34
36
  readonly CHUNK_SIZE: number;
@@ -120,6 +122,20 @@ interface ToolCall {
120
122
  arguments: string;
121
123
  }
122
124
 
125
+ /**
126
+ * Interface for audio hardware management (Browser/Node parity)
127
+ */
128
+ interface AudioManager {
129
+ init(): Promise<void>;
130
+ startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
131
+ stopMicrophone(): void;
132
+ playAudio(pcm16Data: Uint8Array): void;
133
+ stopPlayback(): void;
134
+ cleanup(): void;
135
+ isMicMuted(): boolean;
136
+ setMuted(muted: boolean): void;
137
+ getAmplitude(): number;
138
+ }
123
139
  /**
124
140
  * Main client for Lokutor Voice Agent SDK
125
141
  *
@@ -145,6 +161,7 @@ declare class VoiceAgentClient {
145
161
  private audioManager;
146
162
  private enableAudio;
147
163
  private currentGeneration;
164
+ private listeners;
148
165
  private isUserDisconnect;
149
166
  private reconnecting;
150
167
  private reconnectAttempts;
@@ -160,8 +177,16 @@ declare class VoiceAgentClient {
160
177
  });
161
178
  /**
162
179
  * Connect to the Lokutor Voice Agent server
180
+ * @param customAudioManager Optional replacement for the default audio hardware handler
181
+ */
182
+ connect(customAudioManager?: AudioManager): Promise<boolean>;
183
+ /**
184
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
185
+ * This is the recommended way to start a conversation in both Browser and Node.js.
163
186
  */
164
- connect(): Promise<boolean>;
187
+ startManaged(config?: {
188
+ audioManager?: AudioManager;
189
+ }): Promise<this>;
165
190
  /**
166
191
  * Send initial configuration to the server
167
192
  */
@@ -179,7 +204,13 @@ declare class VoiceAgentClient {
179
204
  * Handle incoming text messages (metadata/transcriptions)
180
205
  */
181
206
  private handleTextMessage;
182
- private audioListeners;
207
+ /**
208
+ * Register an event listener (for Python parity)
209
+ */
210
+ on(event: string, callback: Function): this;
211
+ /**
212
+ * Internal emitter for all events
213
+ */
183
214
  private emit;
184
215
  onAudio(callback: (data: Uint8Array) => void): void;
185
216
  onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -236,6 +267,7 @@ declare class TTSClient {
236
267
  visemes?: boolean;
237
268
  onAudio?: (data: Uint8Array) => void;
238
269
  onVisemes?: (visemes: any[]) => void;
270
+ onTTFB?: (ms: number) => void;
239
271
  onError?: (error: any) => void;
240
272
  }): Promise<void>;
241
273
  }
@@ -443,4 +475,4 @@ declare class BrowserAudioManager {
443
475
  isRecording(): boolean;
444
476
  }
445
477
 
446
- export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
478
+ export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
package/dist/index.d.ts CHANGED
@@ -28,7 +28,9 @@ declare enum Language {
28
28
  */
29
29
  declare const AUDIO_CONFIG: {
30
30
  SAMPLE_RATE: number;
31
+ SAMPLE_RATE_INPUT: number;
31
32
  SPEAKER_SAMPLE_RATE: number;
33
+ SAMPLE_RATE_OUTPUT: number;
32
34
  CHANNELS: number;
33
35
  CHUNK_DURATION_MS: number;
34
36
  readonly CHUNK_SIZE: number;
@@ -120,6 +122,20 @@ interface ToolCall {
120
122
  arguments: string;
121
123
  }
122
124
 
125
+ /**
126
+ * Interface for audio hardware management (Browser/Node parity)
127
+ */
128
+ interface AudioManager {
129
+ init(): Promise<void>;
130
+ startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
131
+ stopMicrophone(): void;
132
+ playAudio(pcm16Data: Uint8Array): void;
133
+ stopPlayback(): void;
134
+ cleanup(): void;
135
+ isMicMuted(): boolean;
136
+ setMuted(muted: boolean): void;
137
+ getAmplitude(): number;
138
+ }
123
139
  /**
124
140
  * Main client for Lokutor Voice Agent SDK
125
141
  *
@@ -145,6 +161,7 @@ declare class VoiceAgentClient {
145
161
  private audioManager;
146
162
  private enableAudio;
147
163
  private currentGeneration;
164
+ private listeners;
148
165
  private isUserDisconnect;
149
166
  private reconnecting;
150
167
  private reconnectAttempts;
@@ -160,8 +177,16 @@ declare class VoiceAgentClient {
160
177
  });
161
178
  /**
162
179
  * Connect to the Lokutor Voice Agent server
180
+ * @param customAudioManager Optional replacement for the default audio hardware handler
181
+ */
182
+ connect(customAudioManager?: AudioManager): Promise<boolean>;
183
+ /**
184
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
185
+ * This is the recommended way to start a conversation in both Browser and Node.js.
163
186
  */
164
- connect(): Promise<boolean>;
187
+ startManaged(config?: {
188
+ audioManager?: AudioManager;
189
+ }): Promise<this>;
165
190
  /**
166
191
  * Send initial configuration to the server
167
192
  */
@@ -179,7 +204,13 @@ declare class VoiceAgentClient {
179
204
  * Handle incoming text messages (metadata/transcriptions)
180
205
  */
181
206
  private handleTextMessage;
182
- private audioListeners;
207
+ /**
208
+ * Register an event listener (for Python parity)
209
+ */
210
+ on(event: string, callback: Function): this;
211
+ /**
212
+ * Internal emitter for all events
213
+ */
183
214
  private emit;
184
215
  onAudio(callback: (data: Uint8Array) => void): void;
185
216
  onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -236,6 +267,7 @@ declare class TTSClient {
236
267
  visemes?: boolean;
237
268
  onAudio?: (data: Uint8Array) => void;
238
269
  onVisemes?: (visemes: any[]) => void;
270
+ onTTFB?: (ms: number) => void;
239
271
  onError?: (error: any) => void;
240
272
  }): Promise<void>;
241
273
  }
@@ -443,4 +475,4 @@ declare class BrowserAudioManager {
443
475
  isRecording(): boolean;
444
476
  }
445
477
 
446
- export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
478
+ export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
package/dist/index.js CHANGED
@@ -1,8 +1,13 @@
1
1
  "use strict";
2
+ var __create = Object.create;
2
3
  var __defProp = Object.defineProperty;
3
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
5
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __esm = (fn, res) => function __init() {
9
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
+ };
6
11
  var __export = (target, all) => {
7
12
  for (var name in all)
8
13
  __defProp(target, name, { get: all[name], enumerable: true });
@@ -15,8 +20,159 @@ var __copyProps = (to, from, except, desc) => {
15
20
  }
16
21
  return to;
17
22
  };
23
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
24
+ // If the importer is in node compatibility mode or this is not an ESM
25
+ // file that has been converted to a CommonJS file using a Babel-
26
+ // compatible transform (i.e. "__esModule" has not been set), then set
27
+ // "default" to the CommonJS "module.exports" for node compatibility.
28
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
29
+ mod
30
+ ));
18
31
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
32
 
33
+ // src/types.ts
34
+ var VoiceStyle, Language, AUDIO_CONFIG, DEFAULT_URLS;
35
+ var init_types = __esm({
36
+ "src/types.ts"() {
37
+ "use strict";
38
+ VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
39
+ VoiceStyle2["F1"] = "F1";
40
+ VoiceStyle2["F2"] = "F2";
41
+ VoiceStyle2["F3"] = "F3";
42
+ VoiceStyle2["F4"] = "F4";
43
+ VoiceStyle2["F5"] = "F5";
44
+ VoiceStyle2["M1"] = "M1";
45
+ VoiceStyle2["M2"] = "M2";
46
+ VoiceStyle2["M3"] = "M3";
47
+ VoiceStyle2["M4"] = "M4";
48
+ VoiceStyle2["M5"] = "M5";
49
+ return VoiceStyle2;
50
+ })(VoiceStyle || {});
51
+ Language = /* @__PURE__ */ ((Language2) => {
52
+ Language2["ENGLISH"] = "en";
53
+ Language2["SPANISH"] = "es";
54
+ Language2["FRENCH"] = "fr";
55
+ Language2["PORTUGUESE"] = "pt";
56
+ Language2["KOREAN"] = "ko";
57
+ return Language2;
58
+ })(Language || {});
59
+ AUDIO_CONFIG = {
60
+ SAMPLE_RATE: 16e3,
61
+ SAMPLE_RATE_INPUT: 16e3,
62
+ SPEAKER_SAMPLE_RATE: 44100,
63
+ SAMPLE_RATE_OUTPUT: 44100,
64
+ CHANNELS: 1,
65
+ CHUNK_DURATION_MS: 20,
66
+ get CHUNK_SIZE() {
67
+ return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
68
+ }
69
+ };
70
+ DEFAULT_URLS = {
71
+ VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
72
+ TTS: "wss://api.lokutor.com/ws/tts"
73
+ };
74
+ }
75
+ });
76
+
77
+ // src/node-audio.ts
78
+ var node_audio_exports = {};
79
+ __export(node_audio_exports, {
80
+ NodeAudioManager: () => NodeAudioManager
81
+ });
82
+ var NodeAudioManager;
83
+ var init_node_audio = __esm({
84
+ "src/node-audio.ts"() {
85
+ "use strict";
86
+ init_types();
87
+ NodeAudioManager = class {
88
+ speaker = null;
89
+ recorder = null;
90
+ recordingStream = null;
91
+ isMuted = false;
92
+ isListening = false;
93
+ constructor() {
94
+ }
95
+ async init() {
96
+ try {
97
+ const Speaker = await import("speaker").catch(() => null);
98
+ if (!Speaker) {
99
+ console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
100
+ console.warn("\u{1F449} Run: npm install speaker");
101
+ }
102
+ } catch (e) {
103
+ console.error("Error initializing Node audio:", e);
104
+ }
105
+ }
106
+ async startMicrophone(onAudioInput) {
107
+ if (this.isListening) return;
108
+ try {
109
+ const recorder = await import("node-record-lpcm16").catch(() => null);
110
+ if (!recorder) {
111
+ throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
112
+ }
113
+ console.log("\u{1F3A4} Starting microphone (Node.js)...");
114
+ this.recordingStream = recorder.record({
115
+ sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
116
+ threshold: 0,
117
+ verbose: false,
118
+ recordProgram: "sox"
119
+ // default
120
+ });
121
+ this.recordingStream.stream().on("data", (chunk) => {
122
+ if (!this.isMuted && onAudioInput) {
123
+ onAudioInput(new Uint8Array(chunk));
124
+ }
125
+ });
126
+ this.isListening = true;
127
+ } catch (e) {
128
+ console.error("Failed to start microphone:", e.message);
129
+ throw e;
130
+ }
131
+ }
132
+ stopMicrophone() {
133
+ if (this.recordingStream) {
134
+ this.recordingStream.stop();
135
+ this.recordingStream = null;
136
+ }
137
+ this.isListening = false;
138
+ }
139
+ async playAudio(pcm16Data) {
140
+ try {
141
+ if (!this.speaker) {
142
+ const Speaker = (await import("speaker")).default;
143
+ this.speaker = new Speaker({
144
+ channels: AUDIO_CONFIG.CHANNELS,
145
+ bitDepth: 16,
146
+ sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
147
+ });
148
+ }
149
+ this.speaker.write(Buffer.from(pcm16Data));
150
+ } catch (e) {
151
+ }
152
+ }
153
+ stopPlayback() {
154
+ if (this.speaker) {
155
+ this.speaker.end();
156
+ this.speaker = null;
157
+ }
158
+ }
159
+ cleanup() {
160
+ this.stopMicrophone();
161
+ this.stopPlayback();
162
+ }
163
+ isMicMuted() {
164
+ return this.isMuted;
165
+ }
166
+ setMuted(muted) {
167
+ this.isMuted = muted;
168
+ }
169
+ getAmplitude() {
170
+ return 0;
171
+ }
172
+ };
173
+ }
174
+ });
175
+
20
176
  // src/index.ts
21
177
  var index_exports = {};
22
178
  __export(index_exports, {
@@ -41,42 +197,13 @@ __export(index_exports, {
41
197
  simpleTTS: () => simpleTTS
42
198
  });
43
199
  module.exports = __toCommonJS(index_exports);
200
+ init_types();
44
201
 
45
- // src/types.ts
46
- var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
47
- VoiceStyle2["F1"] = "F1";
48
- VoiceStyle2["F2"] = "F2";
49
- VoiceStyle2["F3"] = "F3";
50
- VoiceStyle2["F4"] = "F4";
51
- VoiceStyle2["F5"] = "F5";
52
- VoiceStyle2["M1"] = "M1";
53
- VoiceStyle2["M2"] = "M2";
54
- VoiceStyle2["M3"] = "M3";
55
- VoiceStyle2["M4"] = "M4";
56
- VoiceStyle2["M5"] = "M5";
57
- return VoiceStyle2;
58
- })(VoiceStyle || {});
59
- var Language = /* @__PURE__ */ ((Language2) => {
60
- Language2["ENGLISH"] = "en";
61
- Language2["SPANISH"] = "es";
62
- Language2["FRENCH"] = "fr";
63
- Language2["PORTUGUESE"] = "pt";
64
- Language2["KOREAN"] = "ko";
65
- return Language2;
66
- })(Language || {});
67
- var AUDIO_CONFIG = {
68
- SAMPLE_RATE: 16e3,
69
- SPEAKER_SAMPLE_RATE: 44100,
70
- CHANNELS: 1,
71
- CHUNK_DURATION_MS: 20,
72
- get CHUNK_SIZE() {
73
- return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
74
- }
75
- };
76
- var DEFAULT_URLS = {
77
- VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
78
- TTS: "wss://api.lokutor.com/ws/tts"
79
- };
202
+ // src/client.ts
203
+ init_types();
204
+
205
+ // src/browser-audio.ts
206
+ init_types();
80
207
 
81
208
  // src/audio-utils.ts
82
209
  function pcm16ToFloat32(int16Data) {
@@ -521,6 +648,7 @@ var VoiceAgentClient = class {
521
648
  audioManager = null;
522
649
  enableAudio = false;
523
650
  currentGeneration = 0;
651
+ listeners = {};
524
652
  // Connection resilience
525
653
  isUserDisconnect = false;
526
654
  reconnecting = false;
@@ -543,14 +671,19 @@ var VoiceAgentClient = class {
543
671
  }
544
672
  /**
545
673
  * Connect to the Lokutor Voice Agent server
674
+ * @param customAudioManager Optional replacement for the default audio hardware handler
546
675
  */
547
- async connect() {
676
+ async connect(customAudioManager) {
548
677
  this.isUserDisconnect = false;
549
- if (this.enableAudio) {
550
- if (!this.audioManager) {
678
+ if (this.enableAudio || customAudioManager) {
679
+ if (customAudioManager) {
680
+ this.audioManager = customAudioManager;
681
+ } else if (!this.audioManager && typeof window !== "undefined") {
551
682
  this.audioManager = new BrowserAudioManager();
552
683
  }
553
- await this.audioManager.init();
684
+ if (this.audioManager) {
685
+ await this.audioManager.init();
686
+ }
554
687
  }
555
688
  return new Promise((resolve, reject) => {
556
689
  try {
@@ -611,6 +744,34 @@ var VoiceAgentClient = class {
611
744
  }
612
745
  });
613
746
  }
747
+ /**
748
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
749
+ * This is the recommended way to start a conversation in both Browser and Node.js.
750
+ */
751
+ async startManaged(config) {
752
+ this.enableAudio = true;
753
+ if (config?.audioManager) {
754
+ this.audioManager = config.audioManager;
755
+ } else if (!this.audioManager) {
756
+ if (typeof window !== "undefined") {
757
+ this.audioManager = new BrowserAudioManager();
758
+ } else {
759
+ try {
760
+ const { NodeAudioManager: NodeAudioManager2 } = await Promise.resolve().then(() => (init_node_audio(), node_audio_exports));
761
+ this.audioManager = new NodeAudioManager2();
762
+ } catch (e) {
763
+ console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
764
+ }
765
+ }
766
+ }
767
+ await this.connect();
768
+ if (this.audioManager && this.isConnected) {
769
+ await this.audioManager.startMicrophone((data) => {
770
+ this.sendAudio(data);
771
+ });
772
+ }
773
+ return this;
774
+ }
614
775
  /**
615
776
  * Send initial configuration to the server
616
777
  */
@@ -712,21 +873,51 @@ var VoiceAgentClient = class {
712
873
  } catch (e) {
713
874
  }
714
875
  }
715
- audioListeners = [];
716
- emit(event, data) {
717
- if (event === "audio") {
718
- if (this.onAudioCallback) this.onAudioCallback(data);
719
- this.audioListeners.forEach((l) => l(data));
720
- } else if (event === "visemes") {
721
- if (this.onVisemesCallback) this.onVisemesCallback(data);
722
- this.visemeListeners.forEach((l) => l(data));
876
+ /**
877
+ * Register an event listener (for Python parity)
878
+ */
879
+ on(event, callback) {
880
+ if (!this.listeners[event]) {
881
+ this.listeners[event] = [];
882
+ }
883
+ this.listeners[event].push(callback);
884
+ return this;
885
+ }
886
+ /**
887
+ * Internal emitter for all events
888
+ */
889
+ emit(event, ...args) {
890
+ const legacyMap = {
891
+ "transcription": "onTranscription",
892
+ "response": "onResponse",
893
+ "audio": "onAudioCallback",
894
+ "visemes": "onVisemesCallback",
895
+ "status": "onStatus",
896
+ "error": "onError"
897
+ };
898
+ const legacyKey = legacyMap[event];
899
+ if (legacyKey && this[legacyKey]) {
900
+ try {
901
+ this[legacyKey](...args);
902
+ } catch (e) {
903
+ console.error(`Error in legacy callback ${legacyKey}:`, e);
904
+ }
905
+ }
906
+ if (this.listeners[event]) {
907
+ this.listeners[event].forEach((cb) => {
908
+ try {
909
+ cb(...args);
910
+ } catch (e) {
911
+ console.error(`Error in listener for ${event}:`, e);
912
+ }
913
+ });
723
914
  }
724
915
  }
725
916
  onAudio(callback) {
726
- this.audioListeners.push(callback);
917
+ this.on("audio", callback);
727
918
  }
728
919
  onVisemes(callback) {
729
- this.visemeListeners.push(callback);
920
+ this.on("visemes", callback);
730
921
  }
731
922
  /**
732
923
  * Disconnect from the server
@@ -805,15 +996,28 @@ var TTSClient = class {
805
996
  */
806
997
  synthesize(options) {
807
998
  return new Promise((resolve, reject) => {
999
+ let activityTimeout;
1000
+ let ws;
1001
+ let startTime;
1002
+ let firstByteReceived = false;
1003
+ const refreshTimeout = () => {
1004
+ if (activityTimeout) clearTimeout(activityTimeout);
1005
+ activityTimeout = setTimeout(() => {
1006
+ console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
1007
+ if (ws) ws.close();
1008
+ resolve();
1009
+ }, 2e3);
1010
+ };
808
1011
  try {
809
1012
  let url = DEFAULT_URLS.TTS;
810
1013
  if (this.apiKey) {
811
1014
  const separator = url.includes("?") ? "&" : "?";
812
1015
  url += `${separator}api_key=${this.apiKey}`;
813
1016
  }
814
- const ws = new WebSocket(url);
1017
+ ws = new WebSocket(url);
815
1018
  ws.binaryType = "arraybuffer";
816
1019
  ws.onopen = () => {
1020
+ refreshTimeout();
817
1021
  const req = {
818
1022
  text: options.text,
819
1023
  voice: options.voice || "F1" /* F1 */,
@@ -823,28 +1027,50 @@ var TTSClient = class {
823
1027
  visemes: options.visemes || false
824
1028
  };
825
1029
  ws.send(JSON.stringify(req));
1030
+ startTime = Date.now();
826
1031
  };
827
1032
  ws.onmessage = async (event) => {
1033
+ refreshTimeout();
828
1034
  if (event.data instanceof ArrayBuffer) {
1035
+ if (!firstByteReceived) {
1036
+ const ttfb = Date.now() - startTime;
1037
+ if (options.onTTFB) options.onTTFB(ttfb);
1038
+ firstByteReceived = true;
1039
+ }
829
1040
  if (options.onAudio) options.onAudio(new Uint8Array(event.data));
830
1041
  } else {
1042
+ const text = event.data.toString();
1043
+ if (text === "EOS") {
1044
+ if (activityTimeout) clearTimeout(activityTimeout);
1045
+ ws.close();
1046
+ resolve();
1047
+ return;
1048
+ }
831
1049
  try {
832
- const msg = JSON.parse(event.data.toString());
1050
+ const msg = JSON.parse(text);
833
1051
  if (Array.isArray(msg) && options.onVisemes) {
834
1052
  options.onVisemes(msg);
835
1053
  }
1054
+ if (msg.type === "eos") {
1055
+ if (activityTimeout) clearTimeout(activityTimeout);
1056
+ ws.close();
1057
+ resolve();
1058
+ }
836
1059
  } catch (e) {
837
1060
  }
838
1061
  }
839
1062
  };
840
1063
  ws.onerror = (err) => {
1064
+ if (activityTimeout) clearTimeout(activityTimeout);
841
1065
  if (options.onError) options.onError(err);
842
1066
  reject(err);
843
1067
  };
844
1068
  ws.onclose = () => {
1069
+ if (activityTimeout) clearTimeout(activityTimeout);
845
1070
  resolve();
846
1071
  };
847
1072
  } catch (err) {
1073
+ if (activityTimeout) clearTimeout(activityTimeout);
848
1074
  if (options.onError) options.onError(err);
849
1075
  reject(err);
850
1076
  }
package/dist/index.mjs CHANGED
@@ -3,7 +3,7 @@ import {
3
3
  DEFAULT_URLS,
4
4
  Language,
5
5
  VoiceStyle
6
- } from "./chunk-SNNPJP5R.mjs";
6
+ } from "./chunk-UI24THO7.mjs";
7
7
 
8
8
  // src/audio-utils.ts
9
9
  function pcm16ToFloat32(int16Data) {
@@ -448,6 +448,7 @@ var VoiceAgentClient = class {
448
448
  audioManager = null;
449
449
  enableAudio = false;
450
450
  currentGeneration = 0;
451
+ listeners = {};
451
452
  // Connection resilience
452
453
  isUserDisconnect = false;
453
454
  reconnecting = false;
@@ -470,14 +471,19 @@ var VoiceAgentClient = class {
470
471
  }
471
472
  /**
472
473
  * Connect to the Lokutor Voice Agent server
474
+ * @param customAudioManager Optional replacement for the default audio hardware handler
473
475
  */
474
- async connect() {
476
+ async connect(customAudioManager) {
475
477
  this.isUserDisconnect = false;
476
- if (this.enableAudio) {
477
- if (!this.audioManager) {
478
+ if (this.enableAudio || customAudioManager) {
479
+ if (customAudioManager) {
480
+ this.audioManager = customAudioManager;
481
+ } else if (!this.audioManager && typeof window !== "undefined") {
478
482
  this.audioManager = new BrowserAudioManager();
479
483
  }
480
- await this.audioManager.init();
484
+ if (this.audioManager) {
485
+ await this.audioManager.init();
486
+ }
481
487
  }
482
488
  return new Promise((resolve, reject) => {
483
489
  try {
@@ -538,6 +544,34 @@ var VoiceAgentClient = class {
538
544
  }
539
545
  });
540
546
  }
547
+ /**
548
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
549
+ * This is the recommended way to start a conversation in both Browser and Node.js.
550
+ */
551
+ async startManaged(config) {
552
+ this.enableAudio = true;
553
+ if (config?.audioManager) {
554
+ this.audioManager = config.audioManager;
555
+ } else if (!this.audioManager) {
556
+ if (typeof window !== "undefined") {
557
+ this.audioManager = new BrowserAudioManager();
558
+ } else {
559
+ try {
560
+ const { NodeAudioManager } = await import("./node-audio-5HOWE6MC.mjs");
561
+ this.audioManager = new NodeAudioManager();
562
+ } catch (e) {
563
+ console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
564
+ }
565
+ }
566
+ }
567
+ await this.connect();
568
+ if (this.audioManager && this.isConnected) {
569
+ await this.audioManager.startMicrophone((data) => {
570
+ this.sendAudio(data);
571
+ });
572
+ }
573
+ return this;
574
+ }
541
575
  /**
542
576
  * Send initial configuration to the server
543
577
  */
@@ -639,21 +673,51 @@ var VoiceAgentClient = class {
639
673
  } catch (e) {
640
674
  }
641
675
  }
642
- audioListeners = [];
643
- emit(event, data) {
644
- if (event === "audio") {
645
- if (this.onAudioCallback) this.onAudioCallback(data);
646
- this.audioListeners.forEach((l) => l(data));
647
- } else if (event === "visemes") {
648
- if (this.onVisemesCallback) this.onVisemesCallback(data);
649
- this.visemeListeners.forEach((l) => l(data));
676
+ /**
677
+ * Register an event listener (for Python parity)
678
+ */
679
+ on(event, callback) {
680
+ if (!this.listeners[event]) {
681
+ this.listeners[event] = [];
682
+ }
683
+ this.listeners[event].push(callback);
684
+ return this;
685
+ }
686
+ /**
687
+ * Internal emitter for all events
688
+ */
689
+ emit(event, ...args) {
690
+ const legacyMap = {
691
+ "transcription": "onTranscription",
692
+ "response": "onResponse",
693
+ "audio": "onAudioCallback",
694
+ "visemes": "onVisemesCallback",
695
+ "status": "onStatus",
696
+ "error": "onError"
697
+ };
698
+ const legacyKey = legacyMap[event];
699
+ if (legacyKey && this[legacyKey]) {
700
+ try {
701
+ this[legacyKey](...args);
702
+ } catch (e) {
703
+ console.error(`Error in legacy callback ${legacyKey}:`, e);
704
+ }
705
+ }
706
+ if (this.listeners[event]) {
707
+ this.listeners[event].forEach((cb) => {
708
+ try {
709
+ cb(...args);
710
+ } catch (e) {
711
+ console.error(`Error in listener for ${event}:`, e);
712
+ }
713
+ });
650
714
  }
651
715
  }
652
716
  onAudio(callback) {
653
- this.audioListeners.push(callback);
717
+ this.on("audio", callback);
654
718
  }
655
719
  onVisemes(callback) {
656
- this.visemeListeners.push(callback);
720
+ this.on("visemes", callback);
657
721
  }
658
722
  /**
659
723
  * Disconnect from the server
@@ -732,15 +796,28 @@ var TTSClient = class {
732
796
  */
733
797
  synthesize(options) {
734
798
  return new Promise((resolve, reject) => {
799
+ let activityTimeout;
800
+ let ws;
801
+ let startTime;
802
+ let firstByteReceived = false;
803
+ const refreshTimeout = () => {
804
+ if (activityTimeout) clearTimeout(activityTimeout);
805
+ activityTimeout = setTimeout(() => {
806
+ console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
807
+ if (ws) ws.close();
808
+ resolve();
809
+ }, 2e3);
810
+ };
735
811
  try {
736
812
  let url = DEFAULT_URLS.TTS;
737
813
  if (this.apiKey) {
738
814
  const separator = url.includes("?") ? "&" : "?";
739
815
  url += `${separator}api_key=${this.apiKey}`;
740
816
  }
741
- const ws = new WebSocket(url);
817
+ ws = new WebSocket(url);
742
818
  ws.binaryType = "arraybuffer";
743
819
  ws.onopen = () => {
820
+ refreshTimeout();
744
821
  const req = {
745
822
  text: options.text,
746
823
  voice: options.voice || "F1" /* F1 */,
@@ -750,28 +827,50 @@ var TTSClient = class {
750
827
  visemes: options.visemes || false
751
828
  };
752
829
  ws.send(JSON.stringify(req));
830
+ startTime = Date.now();
753
831
  };
754
832
  ws.onmessage = async (event) => {
833
+ refreshTimeout();
755
834
  if (event.data instanceof ArrayBuffer) {
835
+ if (!firstByteReceived) {
836
+ const ttfb = Date.now() - startTime;
837
+ if (options.onTTFB) options.onTTFB(ttfb);
838
+ firstByteReceived = true;
839
+ }
756
840
  if (options.onAudio) options.onAudio(new Uint8Array(event.data));
757
841
  } else {
842
+ const text = event.data.toString();
843
+ if (text === "EOS") {
844
+ if (activityTimeout) clearTimeout(activityTimeout);
845
+ ws.close();
846
+ resolve();
847
+ return;
848
+ }
758
849
  try {
759
- const msg = JSON.parse(event.data.toString());
850
+ const msg = JSON.parse(text);
760
851
  if (Array.isArray(msg) && options.onVisemes) {
761
852
  options.onVisemes(msg);
762
853
  }
854
+ if (msg.type === "eos") {
855
+ if (activityTimeout) clearTimeout(activityTimeout);
856
+ ws.close();
857
+ resolve();
858
+ }
763
859
  } catch (e) {
764
860
  }
765
861
  }
766
862
  };
767
863
  ws.onerror = (err) => {
864
+ if (activityTimeout) clearTimeout(activityTimeout);
768
865
  if (options.onError) options.onError(err);
769
866
  reject(err);
770
867
  };
771
868
  ws.onclose = () => {
869
+ if (activityTimeout) clearTimeout(activityTimeout);
772
870
  resolve();
773
871
  };
774
872
  } catch (err) {
873
+ if (activityTimeout) clearTimeout(activityTimeout);
775
874
  if (options.onError) options.onError(err);
776
875
  reject(err);
777
876
  }
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  AUDIO_CONFIG
3
- } from "./chunk-SNNPJP5R.mjs";
3
+ } from "./chunk-UI24THO7.mjs";
4
4
 
5
5
  // src/node-audio.ts
6
6
  var NodeAudioManager = class {
@@ -16,7 +16,7 @@ var NodeAudioManager = class {
16
16
  const Speaker = await import("speaker").catch(() => null);
17
17
  if (!Speaker) {
18
18
  console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
19
- console.warn("\u{1F449} Run: npm install speaker");
19
+ console.warn("\u{1F449} Run: npm install speaker");
20
20
  }
21
21
  } catch (e) {
22
22
  console.error("Error initializing Node audio:", e);
@@ -24,23 +24,29 @@ var NodeAudioManager = class {
24
24
  }
25
25
  async startMicrophone(onAudioInput) {
26
26
  if (this.isListening) return;
27
- const recorder = await import("node-record-lpcm16").catch(() => null);
28
- if (!recorder) {
29
- throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
30
- }
31
- this.recorder = recorder;
32
- this.recordingStream = recorder.record({
33
- sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
34
- threshold: 0,
35
- verbose: false,
36
- recordProgram: "sox"
37
- });
38
- this.recordingStream.stream().on("data", (chunk) => {
39
- if (!this.isMuted && onAudioInput) {
40
- onAudioInput(new Uint8Array(chunk));
27
+ try {
28
+ const recorder = await import("node-record-lpcm16").catch(() => null);
29
+ if (!recorder) {
30
+ throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
41
31
  }
42
- });
43
- this.isListening = true;
32
+ console.log("\u{1F3A4} Starting microphone (Node.js)...");
33
+ this.recordingStream = recorder.record({
34
+ sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
35
+ threshold: 0,
36
+ verbose: false,
37
+ recordProgram: "sox"
38
+ // default
39
+ });
40
+ this.recordingStream.stream().on("data", (chunk) => {
41
+ if (!this.isMuted && onAudioInput) {
42
+ onAudioInput(new Uint8Array(chunk));
43
+ }
44
+ });
45
+ this.isListening = true;
46
+ } catch (e) {
47
+ console.error("Failed to start microphone:", e.message);
48
+ throw e;
49
+ }
44
50
  }
45
51
  stopMicrophone() {
46
52
  if (this.recordingStream) {
@@ -60,7 +66,7 @@ var NodeAudioManager = class {
60
66
  });
61
67
  }
62
68
  this.speaker.write(Buffer.from(pcm16Data));
63
- } catch {
69
+ } catch (e) {
64
70
  }
65
71
  }
66
72
  stopPlayback() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lokutor/sdk",
3
- "version": "1.1.15",
3
+ "version": "1.1.17",
4
4
  "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",
@@ -8,23 +8,9 @@
8
8
  "files": [
9
9
  "dist"
10
10
  ],
11
- "exports": {
12
- ".": {
13
- "import": "./dist/index.mjs",
14
- "require": "./dist/index.js"
15
- },
16
- "./node-audio": {
17
- "import": "./dist/node-audio.mjs",
18
- "require": "./dist/node-audio.js"
19
- }
20
- },
21
- "browser": {
22
- "speaker": false,
23
- "node-record-lpcm16": false
24
- },
25
11
  "scripts": {
26
- "build": "tsup src/index.ts src/node-audio.ts --format cjs,esm --dts --clean",
27
- "dev": "tsup src/index.ts src/node-audio.ts --format cjs,esm --watch --dts --clean",
12
+ "build": "tsup src/index.ts --format cjs,esm --dts",
13
+ "dev": "tsup src/index.ts --format cjs,esm --watch --dts",
28
14
  "test": "vitest run",
29
15
  "test:watch": "vitest",
30
16
  "lint": "eslint src --ext .ts",
@@ -1,25 +0,0 @@
1
- /**
2
- * Node.js-only audio manager.
3
- *
4
- * This module is intentionally separate, and is not exported from the browser default
5
- * entrypoint, so browser bundlers do not include Node-only dependencies.
6
- */
7
- declare class NodeAudioManager {
8
- private speaker;
9
- private recorder;
10
- private recordingStream;
11
- private isMuted;
12
- private isListening;
13
- constructor();
14
- init(): Promise<void>;
15
- startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
16
- stopMicrophone(): void;
17
- playAudio(pcm16Data: Uint8Array): Promise<void>;
18
- stopPlayback(): void;
19
- cleanup(): void;
20
- isMicMuted(): boolean;
21
- setMuted(muted: boolean): void;
22
- getAmplitude(): number;
23
- }
24
-
25
- export { NodeAudioManager };
@@ -1,25 +0,0 @@
1
- /**
2
- * Node.js-only audio manager.
3
- *
4
- * This module is intentionally separate, and is not exported from the browser default
5
- * entrypoint, so browser bundlers do not include Node-only dependencies.
6
- */
7
- declare class NodeAudioManager {
8
- private speaker;
9
- private recorder;
10
- private recordingStream;
11
- private isMuted;
12
- private isListening;
13
- constructor();
14
- init(): Promise<void>;
15
- startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
16
- stopMicrophone(): void;
17
- playAudio(pcm16Data: Uint8Array): Promise<void>;
18
- stopPlayback(): void;
19
- cleanup(): void;
20
- isMicMuted(): boolean;
21
- setMuted(muted: boolean): void;
22
- getAmplitude(): number;
23
- }
24
-
25
- export { NodeAudioManager };
@@ -1,132 +0,0 @@
1
- "use strict";
2
- var __create = Object.create;
3
- var __defProp = Object.defineProperty;
4
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
- var __getOwnPropNames = Object.getOwnPropertyNames;
6
- var __getProtoOf = Object.getPrototypeOf;
7
- var __hasOwnProp = Object.prototype.hasOwnProperty;
8
- var __export = (target, all) => {
9
- for (var name in all)
10
- __defProp(target, name, { get: all[name], enumerable: true });
11
- };
12
- var __copyProps = (to, from, except, desc) => {
13
- if (from && typeof from === "object" || typeof from === "function") {
14
- for (let key of __getOwnPropNames(from))
15
- if (!__hasOwnProp.call(to, key) && key !== except)
16
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
- }
18
- return to;
19
- };
20
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
- // If the importer is in node compatibility mode or this is not an ESM
22
- // file that has been converted to a CommonJS file using a Babel-
23
- // compatible transform (i.e. "__esModule" has not been set), then set
24
- // "default" to the CommonJS "module.exports" for node compatibility.
25
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
- mod
27
- ));
28
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
-
30
- // src/node-audio.ts
31
- var node_audio_exports = {};
32
- __export(node_audio_exports, {
33
- NodeAudioManager: () => NodeAudioManager
34
- });
35
- module.exports = __toCommonJS(node_audio_exports);
36
-
37
- // src/types.ts
38
- var AUDIO_CONFIG = {
39
- SAMPLE_RATE: 16e3,
40
- SPEAKER_SAMPLE_RATE: 44100,
41
- CHANNELS: 1,
42
- CHUNK_DURATION_MS: 20,
43
- get CHUNK_SIZE() {
44
- return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
45
- }
46
- };
47
-
48
- // src/node-audio.ts
49
- var NodeAudioManager = class {
50
- speaker = null;
51
- recorder = null;
52
- recordingStream = null;
53
- isMuted = false;
54
- isListening = false;
55
- constructor() {
56
- }
57
- async init() {
58
- try {
59
- const Speaker = await import("speaker").catch(() => null);
60
- if (!Speaker) {
61
- console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
62
- console.warn("\u{1F449} Run: npm install speaker");
63
- }
64
- } catch (e) {
65
- console.error("Error initializing Node audio:", e);
66
- }
67
- }
68
- async startMicrophone(onAudioInput) {
69
- if (this.isListening) return;
70
- const recorder = await import("node-record-lpcm16").catch(() => null);
71
- if (!recorder) {
72
- throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
73
- }
74
- this.recorder = recorder;
75
- this.recordingStream = recorder.record({
76
- sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
77
- threshold: 0,
78
- verbose: false,
79
- recordProgram: "sox"
80
- });
81
- this.recordingStream.stream().on("data", (chunk) => {
82
- if (!this.isMuted && onAudioInput) {
83
- onAudioInput(new Uint8Array(chunk));
84
- }
85
- });
86
- this.isListening = true;
87
- }
88
- stopMicrophone() {
89
- if (this.recordingStream) {
90
- this.recordingStream.stop();
91
- this.recordingStream = null;
92
- }
93
- this.isListening = false;
94
- }
95
- async playAudio(pcm16Data) {
96
- try {
97
- if (!this.speaker) {
98
- const Speaker = (await import("speaker")).default;
99
- this.speaker = new Speaker({
100
- channels: AUDIO_CONFIG.CHANNELS,
101
- bitDepth: 16,
102
- sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
103
- });
104
- }
105
- this.speaker.write(Buffer.from(pcm16Data));
106
- } catch {
107
- }
108
- }
109
- stopPlayback() {
110
- if (this.speaker) {
111
- this.speaker.end();
112
- this.speaker = null;
113
- }
114
- }
115
- cleanup() {
116
- this.stopMicrophone();
117
- this.stopPlayback();
118
- }
119
- isMicMuted() {
120
- return this.isMuted;
121
- }
122
- setMuted(muted) {
123
- this.isMuted = muted;
124
- }
125
- getAmplitude() {
126
- return 0;
127
- }
128
- };
129
- // Annotate the CommonJS export names for ESM import in node:
130
- 0 && (module.exports = {
131
- NodeAudioManager
132
- });