@lokutor/sdk 1.1.13 → 1.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,9 +22,7 @@ var Language = /* @__PURE__ */ ((Language2) => {
22
22
  })(Language || {});
23
23
  var AUDIO_CONFIG = {
24
24
  SAMPLE_RATE: 16e3,
25
- SAMPLE_RATE_INPUT: 16e3,
26
25
  SPEAKER_SAMPLE_RATE: 44100,
27
- SAMPLE_RATE_OUTPUT: 44100,
28
26
  CHANNELS: 1,
29
27
  CHUNK_DURATION_MS: 20,
30
28
  get CHUNK_SIZE() {
package/dist/index.d.mts CHANGED
@@ -28,9 +28,7 @@ declare enum Language {
28
28
  */
29
29
  declare const AUDIO_CONFIG: {
30
30
  SAMPLE_RATE: number;
31
- SAMPLE_RATE_INPUT: number;
32
31
  SPEAKER_SAMPLE_RATE: number;
33
- SAMPLE_RATE_OUTPUT: number;
34
32
  CHANNELS: number;
35
33
  CHUNK_DURATION_MS: number;
36
34
  readonly CHUNK_SIZE: number;
@@ -122,20 +120,6 @@ interface ToolCall {
122
120
  arguments: string;
123
121
  }
124
122
 
125
- /**
126
- * Interface for audio hardware management (Browser/Node parity)
127
- */
128
- interface AudioManager {
129
- init(): Promise<void>;
130
- startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
131
- stopMicrophone(): void;
132
- playAudio(pcm16Data: Uint8Array): void;
133
- stopPlayback(): void;
134
- cleanup(): void;
135
- isMicMuted(): boolean;
136
- setMuted(muted: boolean): void;
137
- getAmplitude(): number;
138
- }
139
123
  /**
140
124
  * Main client for Lokutor Voice Agent SDK
141
125
  *
@@ -161,7 +145,6 @@ declare class VoiceAgentClient {
161
145
  private audioManager;
162
146
  private enableAudio;
163
147
  private currentGeneration;
164
- private listeners;
165
148
  private isUserDisconnect;
166
149
  private reconnecting;
167
150
  private reconnectAttempts;
@@ -177,16 +160,8 @@ declare class VoiceAgentClient {
177
160
  });
178
161
  /**
179
162
  * Connect to the Lokutor Voice Agent server
180
- * @param customAudioManager Optional replacement for the default audio hardware handler
181
- */
182
- connect(customAudioManager?: AudioManager): Promise<boolean>;
183
- /**
184
- * The "Golden Path" - Starts a managed session with hardware handled automatically.
185
- * This is the recommended way to start a conversation in both Browser and Node.js.
186
163
  */
187
- startManaged(config?: {
188
- audioManager?: AudioManager;
189
- }): Promise<this>;
164
+ connect(): Promise<boolean>;
190
165
  /**
191
166
  * Send initial configuration to the server
192
167
  */
@@ -204,13 +179,7 @@ declare class VoiceAgentClient {
204
179
  * Handle incoming text messages (metadata/transcriptions)
205
180
  */
206
181
  private handleTextMessage;
207
- /**
208
- * Register an event listener (for Python parity)
209
- */
210
- on(event: string, callback: Function): this;
211
- /**
212
- * Internal emitter for all events
213
- */
182
+ private audioListeners;
214
183
  private emit;
215
184
  onAudio(callback: (data: Uint8Array) => void): void;
216
185
  onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -267,7 +236,6 @@ declare class TTSClient {
267
236
  visemes?: boolean;
268
237
  onAudio?: (data: Uint8Array) => void;
269
238
  onVisemes?: (visemes: any[]) => void;
270
- onTTFB?: (ms: number) => void;
271
239
  onError?: (error: any) => void;
272
240
  }): Promise<void>;
273
241
  }
@@ -475,4 +443,4 @@ declare class BrowserAudioManager {
475
443
  isRecording(): boolean;
476
444
  }
477
445
 
478
- export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
446
+ export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
package/dist/index.d.ts CHANGED
@@ -28,9 +28,7 @@ declare enum Language {
28
28
  */
29
29
  declare const AUDIO_CONFIG: {
30
30
  SAMPLE_RATE: number;
31
- SAMPLE_RATE_INPUT: number;
32
31
  SPEAKER_SAMPLE_RATE: number;
33
- SAMPLE_RATE_OUTPUT: number;
34
32
  CHANNELS: number;
35
33
  CHUNK_DURATION_MS: number;
36
34
  readonly CHUNK_SIZE: number;
@@ -122,20 +120,6 @@ interface ToolCall {
122
120
  arguments: string;
123
121
  }
124
122
 
125
- /**
126
- * Interface for audio hardware management (Browser/Node parity)
127
- */
128
- interface AudioManager {
129
- init(): Promise<void>;
130
- startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
131
- stopMicrophone(): void;
132
- playAudio(pcm16Data: Uint8Array): void;
133
- stopPlayback(): void;
134
- cleanup(): void;
135
- isMicMuted(): boolean;
136
- setMuted(muted: boolean): void;
137
- getAmplitude(): number;
138
- }
139
123
  /**
140
124
  * Main client for Lokutor Voice Agent SDK
141
125
  *
@@ -161,7 +145,6 @@ declare class VoiceAgentClient {
161
145
  private audioManager;
162
146
  private enableAudio;
163
147
  private currentGeneration;
164
- private listeners;
165
148
  private isUserDisconnect;
166
149
  private reconnecting;
167
150
  private reconnectAttempts;
@@ -177,16 +160,8 @@ declare class VoiceAgentClient {
177
160
  });
178
161
  /**
179
162
  * Connect to the Lokutor Voice Agent server
180
- * @param customAudioManager Optional replacement for the default audio hardware handler
181
- */
182
- connect(customAudioManager?: AudioManager): Promise<boolean>;
183
- /**
184
- * The "Golden Path" - Starts a managed session with hardware handled automatically.
185
- * This is the recommended way to start a conversation in both Browser and Node.js.
186
163
  */
187
- startManaged(config?: {
188
- audioManager?: AudioManager;
189
- }): Promise<this>;
164
+ connect(): Promise<boolean>;
190
165
  /**
191
166
  * Send initial configuration to the server
192
167
  */
@@ -204,13 +179,7 @@ declare class VoiceAgentClient {
204
179
  * Handle incoming text messages (metadata/transcriptions)
205
180
  */
206
181
  private handleTextMessage;
207
- /**
208
- * Register an event listener (for Python parity)
209
- */
210
- on(event: string, callback: Function): this;
211
- /**
212
- * Internal emitter for all events
213
- */
182
+ private audioListeners;
214
183
  private emit;
215
184
  onAudio(callback: (data: Uint8Array) => void): void;
216
185
  onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -267,7 +236,6 @@ declare class TTSClient {
267
236
  visemes?: boolean;
268
237
  onAudio?: (data: Uint8Array) => void;
269
238
  onVisemes?: (visemes: any[]) => void;
270
- onTTFB?: (ms: number) => void;
271
239
  onError?: (error: any) => void;
272
240
  }): Promise<void>;
273
241
  }
@@ -475,4 +443,4 @@ declare class BrowserAudioManager {
475
443
  isRecording(): boolean;
476
444
  }
477
445
 
478
- export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
446
+ export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
package/dist/index.js CHANGED
@@ -1,13 +1,8 @@
1
1
  "use strict";
2
- var __create = Object.create;
3
2
  var __defProp = Object.defineProperty;
4
3
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
4
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
- var __getProtoOf = Object.getPrototypeOf;
7
5
  var __hasOwnProp = Object.prototype.hasOwnProperty;
8
- var __esm = (fn, res) => function __init() {
9
- return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
- };
11
6
  var __export = (target, all) => {
12
7
  for (var name in all)
13
8
  __defProp(target, name, { get: all[name], enumerable: true });
@@ -20,159 +15,8 @@ var __copyProps = (to, from, except, desc) => {
20
15
  }
21
16
  return to;
22
17
  };
23
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
24
- // If the importer is in node compatibility mode or this is not an ESM
25
- // file that has been converted to a CommonJS file using a Babel-
26
- // compatible transform (i.e. "__esModule" has not been set), then set
27
- // "default" to the CommonJS "module.exports" for node compatibility.
28
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
29
- mod
30
- ));
31
18
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
32
19
 
33
- // src/types.ts
34
- var VoiceStyle, Language, AUDIO_CONFIG, DEFAULT_URLS;
35
- var init_types = __esm({
36
- "src/types.ts"() {
37
- "use strict";
38
- VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
39
- VoiceStyle2["F1"] = "F1";
40
- VoiceStyle2["F2"] = "F2";
41
- VoiceStyle2["F3"] = "F3";
42
- VoiceStyle2["F4"] = "F4";
43
- VoiceStyle2["F5"] = "F5";
44
- VoiceStyle2["M1"] = "M1";
45
- VoiceStyle2["M2"] = "M2";
46
- VoiceStyle2["M3"] = "M3";
47
- VoiceStyle2["M4"] = "M4";
48
- VoiceStyle2["M5"] = "M5";
49
- return VoiceStyle2;
50
- })(VoiceStyle || {});
51
- Language = /* @__PURE__ */ ((Language2) => {
52
- Language2["ENGLISH"] = "en";
53
- Language2["SPANISH"] = "es";
54
- Language2["FRENCH"] = "fr";
55
- Language2["PORTUGUESE"] = "pt";
56
- Language2["KOREAN"] = "ko";
57
- return Language2;
58
- })(Language || {});
59
- AUDIO_CONFIG = {
60
- SAMPLE_RATE: 16e3,
61
- SAMPLE_RATE_INPUT: 16e3,
62
- SPEAKER_SAMPLE_RATE: 44100,
63
- SAMPLE_RATE_OUTPUT: 44100,
64
- CHANNELS: 1,
65
- CHUNK_DURATION_MS: 20,
66
- get CHUNK_SIZE() {
67
- return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
68
- }
69
- };
70
- DEFAULT_URLS = {
71
- VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
72
- TTS: "wss://api.lokutor.com/ws/tts"
73
- };
74
- }
75
- });
76
-
77
- // src/node-audio.ts
78
- var node_audio_exports = {};
79
- __export(node_audio_exports, {
80
- NodeAudioManager: () => NodeAudioManager
81
- });
82
- var NodeAudioManager;
83
- var init_node_audio = __esm({
84
- "src/node-audio.ts"() {
85
- "use strict";
86
- init_types();
87
- NodeAudioManager = class {
88
- speaker = null;
89
- recorder = null;
90
- recordingStream = null;
91
- isMuted = false;
92
- isListening = false;
93
- constructor() {
94
- }
95
- async init() {
96
- try {
97
- const Speaker = await import("speaker").catch(() => null);
98
- if (!Speaker) {
99
- console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
100
- console.warn("\u{1F449} Run: npm install speaker");
101
- }
102
- } catch (e) {
103
- console.error("Error initializing Node audio:", e);
104
- }
105
- }
106
- async startMicrophone(onAudioInput) {
107
- if (this.isListening) return;
108
- try {
109
- const recorder = await import("node-record-lpcm16").catch(() => null);
110
- if (!recorder) {
111
- throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
112
- }
113
- console.log("\u{1F3A4} Starting microphone (Node.js)...");
114
- this.recordingStream = recorder.record({
115
- sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
116
- threshold: 0,
117
- verbose: false,
118
- recordProgram: "sox"
119
- // default
120
- });
121
- this.recordingStream.stream().on("data", (chunk) => {
122
- if (!this.isMuted && onAudioInput) {
123
- onAudioInput(new Uint8Array(chunk));
124
- }
125
- });
126
- this.isListening = true;
127
- } catch (e) {
128
- console.error("Failed to start microphone:", e.message);
129
- throw e;
130
- }
131
- }
132
- stopMicrophone() {
133
- if (this.recordingStream) {
134
- this.recordingStream.stop();
135
- this.recordingStream = null;
136
- }
137
- this.isListening = false;
138
- }
139
- async playAudio(pcm16Data) {
140
- try {
141
- if (!this.speaker) {
142
- const Speaker = (await import("speaker")).default;
143
- this.speaker = new Speaker({
144
- channels: AUDIO_CONFIG.CHANNELS,
145
- bitDepth: 16,
146
- sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
147
- });
148
- }
149
- this.speaker.write(Buffer.from(pcm16Data));
150
- } catch (e) {
151
- }
152
- }
153
- stopPlayback() {
154
- if (this.speaker) {
155
- this.speaker.end();
156
- this.speaker = null;
157
- }
158
- }
159
- cleanup() {
160
- this.stopMicrophone();
161
- this.stopPlayback();
162
- }
163
- isMicMuted() {
164
- return this.isMuted;
165
- }
166
- setMuted(muted) {
167
- this.isMuted = muted;
168
- }
169
- getAmplitude() {
170
- return 0;
171
- }
172
- };
173
- }
174
- });
175
-
176
20
  // src/index.ts
177
21
  var index_exports = {};
178
22
  __export(index_exports, {
@@ -197,13 +41,42 @@ __export(index_exports, {
197
41
  simpleTTS: () => simpleTTS
198
42
  });
199
43
  module.exports = __toCommonJS(index_exports);
200
- init_types();
201
44
 
202
- // src/client.ts
203
- init_types();
204
-
205
- // src/browser-audio.ts
206
- init_types();
45
+ // src/types.ts
46
+ var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
47
+ VoiceStyle2["F1"] = "F1";
48
+ VoiceStyle2["F2"] = "F2";
49
+ VoiceStyle2["F3"] = "F3";
50
+ VoiceStyle2["F4"] = "F4";
51
+ VoiceStyle2["F5"] = "F5";
52
+ VoiceStyle2["M1"] = "M1";
53
+ VoiceStyle2["M2"] = "M2";
54
+ VoiceStyle2["M3"] = "M3";
55
+ VoiceStyle2["M4"] = "M4";
56
+ VoiceStyle2["M5"] = "M5";
57
+ return VoiceStyle2;
58
+ })(VoiceStyle || {});
59
+ var Language = /* @__PURE__ */ ((Language2) => {
60
+ Language2["ENGLISH"] = "en";
61
+ Language2["SPANISH"] = "es";
62
+ Language2["FRENCH"] = "fr";
63
+ Language2["PORTUGUESE"] = "pt";
64
+ Language2["KOREAN"] = "ko";
65
+ return Language2;
66
+ })(Language || {});
67
+ var AUDIO_CONFIG = {
68
+ SAMPLE_RATE: 16e3,
69
+ SPEAKER_SAMPLE_RATE: 44100,
70
+ CHANNELS: 1,
71
+ CHUNK_DURATION_MS: 20,
72
+ get CHUNK_SIZE() {
73
+ return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
74
+ }
75
+ };
76
+ var DEFAULT_URLS = {
77
+ VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
78
+ TTS: "wss://api.lokutor.com/ws/tts"
79
+ };
207
80
 
208
81
  // src/audio-utils.ts
209
82
  function pcm16ToFloat32(int16Data) {
@@ -648,7 +521,6 @@ var VoiceAgentClient = class {
648
521
  audioManager = null;
649
522
  enableAudio = false;
650
523
  currentGeneration = 0;
651
- listeners = {};
652
524
  // Connection resilience
653
525
  isUserDisconnect = false;
654
526
  reconnecting = false;
@@ -671,19 +543,14 @@ var VoiceAgentClient = class {
671
543
  }
672
544
  /**
673
545
  * Connect to the Lokutor Voice Agent server
674
- * @param customAudioManager Optional replacement for the default audio hardware handler
675
546
  */
676
- async connect(customAudioManager) {
547
+ async connect() {
677
548
  this.isUserDisconnect = false;
678
- if (this.enableAudio || customAudioManager) {
679
- if (customAudioManager) {
680
- this.audioManager = customAudioManager;
681
- } else if (!this.audioManager && typeof window !== "undefined") {
549
+ if (this.enableAudio) {
550
+ if (!this.audioManager) {
682
551
  this.audioManager = new BrowserAudioManager();
683
552
  }
684
- if (this.audioManager) {
685
- await this.audioManager.init();
686
- }
553
+ await this.audioManager.init();
687
554
  }
688
555
  return new Promise((resolve, reject) => {
689
556
  try {
@@ -744,34 +611,6 @@ var VoiceAgentClient = class {
744
611
  }
745
612
  });
746
613
  }
747
- /**
748
- * The "Golden Path" - Starts a managed session with hardware handled automatically.
749
- * This is the recommended way to start a conversation in both Browser and Node.js.
750
- */
751
- async startManaged(config) {
752
- this.enableAudio = true;
753
- if (config?.audioManager) {
754
- this.audioManager = config.audioManager;
755
- } else if (!this.audioManager) {
756
- if (typeof window !== "undefined") {
757
- this.audioManager = new BrowserAudioManager();
758
- } else {
759
- try {
760
- const { NodeAudioManager: NodeAudioManager2 } = await Promise.resolve().then(() => (init_node_audio(), node_audio_exports));
761
- this.audioManager = new NodeAudioManager2();
762
- } catch (e) {
763
- console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
764
- }
765
- }
766
- }
767
- await this.connect();
768
- if (this.audioManager && this.isConnected) {
769
- await this.audioManager.startMicrophone((data) => {
770
- this.sendAudio(data);
771
- });
772
- }
773
- return this;
774
- }
775
614
  /**
776
615
  * Send initial configuration to the server
777
616
  */
@@ -873,51 +712,21 @@ var VoiceAgentClient = class {
873
712
  } catch (e) {
874
713
  }
875
714
  }
876
- /**
877
- * Register an event listener (for Python parity)
878
- */
879
- on(event, callback) {
880
- if (!this.listeners[event]) {
881
- this.listeners[event] = [];
882
- }
883
- this.listeners[event].push(callback);
884
- return this;
885
- }
886
- /**
887
- * Internal emitter for all events
888
- */
889
- emit(event, ...args) {
890
- const legacyMap = {
891
- "transcription": "onTranscription",
892
- "response": "onResponse",
893
- "audio": "onAudioCallback",
894
- "visemes": "onVisemesCallback",
895
- "status": "onStatus",
896
- "error": "onError"
897
- };
898
- const legacyKey = legacyMap[event];
899
- if (legacyKey && this[legacyKey]) {
900
- try {
901
- this[legacyKey](...args);
902
- } catch (e) {
903
- console.error(`Error in legacy callback ${legacyKey}:`, e);
904
- }
905
- }
906
- if (this.listeners[event]) {
907
- this.listeners[event].forEach((cb) => {
908
- try {
909
- cb(...args);
910
- } catch (e) {
911
- console.error(`Error in listener for ${event}:`, e);
912
- }
913
- });
715
+ audioListeners = [];
716
+ emit(event, data) {
717
+ if (event === "audio") {
718
+ if (this.onAudioCallback) this.onAudioCallback(data);
719
+ this.audioListeners.forEach((l) => l(data));
720
+ } else if (event === "visemes") {
721
+ if (this.onVisemesCallback) this.onVisemesCallback(data);
722
+ this.visemeListeners.forEach((l) => l(data));
914
723
  }
915
724
  }
916
725
  onAudio(callback) {
917
- this.on("audio", callback);
726
+ this.audioListeners.push(callback);
918
727
  }
919
728
  onVisemes(callback) {
920
- this.on("visemes", callback);
729
+ this.visemeListeners.push(callback);
921
730
  }
922
731
  /**
923
732
  * Disconnect from the server
@@ -996,28 +805,15 @@ var TTSClient = class {
996
805
  */
997
806
  synthesize(options) {
998
807
  return new Promise((resolve, reject) => {
999
- let activityTimeout;
1000
- let ws;
1001
- let startTime;
1002
- let firstByteReceived = false;
1003
- const refreshTimeout = () => {
1004
- if (activityTimeout) clearTimeout(activityTimeout);
1005
- activityTimeout = setTimeout(() => {
1006
- console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
1007
- if (ws) ws.close();
1008
- resolve();
1009
- }, 2e3);
1010
- };
1011
808
  try {
1012
809
  let url = DEFAULT_URLS.TTS;
1013
810
  if (this.apiKey) {
1014
811
  const separator = url.includes("?") ? "&" : "?";
1015
812
  url += `${separator}api_key=${this.apiKey}`;
1016
813
  }
1017
- ws = new WebSocket(url);
814
+ const ws = new WebSocket(url);
1018
815
  ws.binaryType = "arraybuffer";
1019
816
  ws.onopen = () => {
1020
- refreshTimeout();
1021
817
  const req = {
1022
818
  text: options.text,
1023
819
  voice: options.voice || "F1" /* F1 */,
@@ -1027,16 +823,9 @@ var TTSClient = class {
1027
823
  visemes: options.visemes || false
1028
824
  };
1029
825
  ws.send(JSON.stringify(req));
1030
- startTime = Date.now();
1031
826
  };
1032
827
  ws.onmessage = async (event) => {
1033
- refreshTimeout();
1034
828
  if (event.data instanceof ArrayBuffer) {
1035
- if (!firstByteReceived) {
1036
- const ttfb = Date.now() - startTime;
1037
- if (options.onTTFB) options.onTTFB(ttfb);
1038
- firstByteReceived = true;
1039
- }
1040
829
  if (options.onAudio) options.onAudio(new Uint8Array(event.data));
1041
830
  } else {
1042
831
  try {
@@ -1044,26 +833,18 @@ var TTSClient = class {
1044
833
  if (Array.isArray(msg) && options.onVisemes) {
1045
834
  options.onVisemes(msg);
1046
835
  }
1047
- if (msg.type === "eos") {
1048
- if (activityTimeout) clearTimeout(activityTimeout);
1049
- ws.close();
1050
- resolve();
1051
- }
1052
836
  } catch (e) {
1053
837
  }
1054
838
  }
1055
839
  };
1056
840
  ws.onerror = (err) => {
1057
- if (activityTimeout) clearTimeout(activityTimeout);
1058
841
  if (options.onError) options.onError(err);
1059
842
  reject(err);
1060
843
  };
1061
844
  ws.onclose = () => {
1062
- if (activityTimeout) clearTimeout(activityTimeout);
1063
845
  resolve();
1064
846
  };
1065
847
  } catch (err) {
1066
- if (activityTimeout) clearTimeout(activityTimeout);
1067
848
  if (options.onError) options.onError(err);
1068
849
  reject(err);
1069
850
  }
package/dist/index.mjs CHANGED
@@ -3,7 +3,7 @@ import {
3
3
  DEFAULT_URLS,
4
4
  Language,
5
5
  VoiceStyle
6
- } from "./chunk-UI24THO7.mjs";
6
+ } from "./chunk-SNNPJP5R.mjs";
7
7
 
8
8
  // src/audio-utils.ts
9
9
  function pcm16ToFloat32(int16Data) {
@@ -448,7 +448,6 @@ var VoiceAgentClient = class {
448
448
  audioManager = null;
449
449
  enableAudio = false;
450
450
  currentGeneration = 0;
451
- listeners = {};
452
451
  // Connection resilience
453
452
  isUserDisconnect = false;
454
453
  reconnecting = false;
@@ -471,19 +470,14 @@ var VoiceAgentClient = class {
471
470
  }
472
471
  /**
473
472
  * Connect to the Lokutor Voice Agent server
474
- * @param customAudioManager Optional replacement for the default audio hardware handler
475
473
  */
476
- async connect(customAudioManager) {
474
+ async connect() {
477
475
  this.isUserDisconnect = false;
478
- if (this.enableAudio || customAudioManager) {
479
- if (customAudioManager) {
480
- this.audioManager = customAudioManager;
481
- } else if (!this.audioManager && typeof window !== "undefined") {
476
+ if (this.enableAudio) {
477
+ if (!this.audioManager) {
482
478
  this.audioManager = new BrowserAudioManager();
483
479
  }
484
- if (this.audioManager) {
485
- await this.audioManager.init();
486
- }
480
+ await this.audioManager.init();
487
481
  }
488
482
  return new Promise((resolve, reject) => {
489
483
  try {
@@ -544,34 +538,6 @@ var VoiceAgentClient = class {
544
538
  }
545
539
  });
546
540
  }
547
- /**
548
- * The "Golden Path" - Starts a managed session with hardware handled automatically.
549
- * This is the recommended way to start a conversation in both Browser and Node.js.
550
- */
551
- async startManaged(config) {
552
- this.enableAudio = true;
553
- if (config?.audioManager) {
554
- this.audioManager = config.audioManager;
555
- } else if (!this.audioManager) {
556
- if (typeof window !== "undefined") {
557
- this.audioManager = new BrowserAudioManager();
558
- } else {
559
- try {
560
- const { NodeAudioManager } = await import("./node-audio-5HOWE6MC.mjs");
561
- this.audioManager = new NodeAudioManager();
562
- } catch (e) {
563
- console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
564
- }
565
- }
566
- }
567
- await this.connect();
568
- if (this.audioManager && this.isConnected) {
569
- await this.audioManager.startMicrophone((data) => {
570
- this.sendAudio(data);
571
- });
572
- }
573
- return this;
574
- }
575
541
  /**
576
542
  * Send initial configuration to the server
577
543
  */
@@ -673,51 +639,21 @@ var VoiceAgentClient = class {
673
639
  } catch (e) {
674
640
  }
675
641
  }
676
- /**
677
- * Register an event listener (for Python parity)
678
- */
679
- on(event, callback) {
680
- if (!this.listeners[event]) {
681
- this.listeners[event] = [];
682
- }
683
- this.listeners[event].push(callback);
684
- return this;
685
- }
686
- /**
687
- * Internal emitter for all events
688
- */
689
- emit(event, ...args) {
690
- const legacyMap = {
691
- "transcription": "onTranscription",
692
- "response": "onResponse",
693
- "audio": "onAudioCallback",
694
- "visemes": "onVisemesCallback",
695
- "status": "onStatus",
696
- "error": "onError"
697
- };
698
- const legacyKey = legacyMap[event];
699
- if (legacyKey && this[legacyKey]) {
700
- try {
701
- this[legacyKey](...args);
702
- } catch (e) {
703
- console.error(`Error in legacy callback ${legacyKey}:`, e);
704
- }
705
- }
706
- if (this.listeners[event]) {
707
- this.listeners[event].forEach((cb) => {
708
- try {
709
- cb(...args);
710
- } catch (e) {
711
- console.error(`Error in listener for ${event}:`, e);
712
- }
713
- });
642
+ audioListeners = [];
643
+ emit(event, data) {
644
+ if (event === "audio") {
645
+ if (this.onAudioCallback) this.onAudioCallback(data);
646
+ this.audioListeners.forEach((l) => l(data));
647
+ } else if (event === "visemes") {
648
+ if (this.onVisemesCallback) this.onVisemesCallback(data);
649
+ this.visemeListeners.forEach((l) => l(data));
714
650
  }
715
651
  }
716
652
  onAudio(callback) {
717
- this.on("audio", callback);
653
+ this.audioListeners.push(callback);
718
654
  }
719
655
  onVisemes(callback) {
720
- this.on("visemes", callback);
656
+ this.visemeListeners.push(callback);
721
657
  }
722
658
  /**
723
659
  * Disconnect from the server
@@ -796,28 +732,15 @@ var TTSClient = class {
796
732
  */
797
733
  synthesize(options) {
798
734
  return new Promise((resolve, reject) => {
799
- let activityTimeout;
800
- let ws;
801
- let startTime;
802
- let firstByteReceived = false;
803
- const refreshTimeout = () => {
804
- if (activityTimeout) clearTimeout(activityTimeout);
805
- activityTimeout = setTimeout(() => {
806
- console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
807
- if (ws) ws.close();
808
- resolve();
809
- }, 2e3);
810
- };
811
735
  try {
812
736
  let url = DEFAULT_URLS.TTS;
813
737
  if (this.apiKey) {
814
738
  const separator = url.includes("?") ? "&" : "?";
815
739
  url += `${separator}api_key=${this.apiKey}`;
816
740
  }
817
- ws = new WebSocket(url);
741
+ const ws = new WebSocket(url);
818
742
  ws.binaryType = "arraybuffer";
819
743
  ws.onopen = () => {
820
- refreshTimeout();
821
744
  const req = {
822
745
  text: options.text,
823
746
  voice: options.voice || "F1" /* F1 */,
@@ -827,16 +750,9 @@ var TTSClient = class {
827
750
  visemes: options.visemes || false
828
751
  };
829
752
  ws.send(JSON.stringify(req));
830
- startTime = Date.now();
831
753
  };
832
754
  ws.onmessage = async (event) => {
833
- refreshTimeout();
834
755
  if (event.data instanceof ArrayBuffer) {
835
- if (!firstByteReceived) {
836
- const ttfb = Date.now() - startTime;
837
- if (options.onTTFB) options.onTTFB(ttfb);
838
- firstByteReceived = true;
839
- }
840
756
  if (options.onAudio) options.onAudio(new Uint8Array(event.data));
841
757
  } else {
842
758
  try {
@@ -844,26 +760,18 @@ var TTSClient = class {
844
760
  if (Array.isArray(msg) && options.onVisemes) {
845
761
  options.onVisemes(msg);
846
762
  }
847
- if (msg.type === "eos") {
848
- if (activityTimeout) clearTimeout(activityTimeout);
849
- ws.close();
850
- resolve();
851
- }
852
763
  } catch (e) {
853
764
  }
854
765
  }
855
766
  };
856
767
  ws.onerror = (err) => {
857
- if (activityTimeout) clearTimeout(activityTimeout);
858
768
  if (options.onError) options.onError(err);
859
769
  reject(err);
860
770
  };
861
771
  ws.onclose = () => {
862
- if (activityTimeout) clearTimeout(activityTimeout);
863
772
  resolve();
864
773
  };
865
774
  } catch (err) {
866
- if (activityTimeout) clearTimeout(activityTimeout);
867
775
  if (options.onError) options.onError(err);
868
776
  reject(err);
869
777
  }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Node.js-only audio manager.
3
+ *
4
+ * This module is intentionally separate, and is not exported from the browser default
5
+ * entrypoint, so browser bundlers do not include Node-only dependencies.
6
+ */
7
+ declare class NodeAudioManager {
8
+ private speaker;
9
+ private recorder;
10
+ private recordingStream;
11
+ private isMuted;
12
+ private isListening;
13
+ constructor();
14
+ init(): Promise<void>;
15
+ startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
16
+ stopMicrophone(): void;
17
+ playAudio(pcm16Data: Uint8Array): Promise<void>;
18
+ stopPlayback(): void;
19
+ cleanup(): void;
20
+ isMicMuted(): boolean;
21
+ setMuted(muted: boolean): void;
22
+ getAmplitude(): number;
23
+ }
24
+
25
+ export { NodeAudioManager };
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Node.js-only audio manager.
3
+ *
4
+ * This module is intentionally separate, and is not exported from the browser default
5
+ * entrypoint, so browser bundlers do not include Node-only dependencies.
6
+ */
7
+ declare class NodeAudioManager {
8
+ private speaker;
9
+ private recorder;
10
+ private recordingStream;
11
+ private isMuted;
12
+ private isListening;
13
+ constructor();
14
+ init(): Promise<void>;
15
+ startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
16
+ stopMicrophone(): void;
17
+ playAudio(pcm16Data: Uint8Array): Promise<void>;
18
+ stopPlayback(): void;
19
+ cleanup(): void;
20
+ isMicMuted(): boolean;
21
+ setMuted(muted: boolean): void;
22
+ getAmplitude(): number;
23
+ }
24
+
25
+ export { NodeAudioManager };
@@ -0,0 +1,132 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/node-audio.ts
31
+ var node_audio_exports = {};
32
+ __export(node_audio_exports, {
33
+ NodeAudioManager: () => NodeAudioManager
34
+ });
35
+ module.exports = __toCommonJS(node_audio_exports);
36
+
37
+ // src/types.ts
38
+ var AUDIO_CONFIG = {
39
+ SAMPLE_RATE: 16e3,
40
+ SPEAKER_SAMPLE_RATE: 44100,
41
+ CHANNELS: 1,
42
+ CHUNK_DURATION_MS: 20,
43
+ get CHUNK_SIZE() {
44
+ return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
45
+ }
46
+ };
47
+
48
+ // src/node-audio.ts
49
+ var NodeAudioManager = class {
50
+ speaker = null;
51
+ recorder = null;
52
+ recordingStream = null;
53
+ isMuted = false;
54
+ isListening = false;
55
+ constructor() {
56
+ }
57
+ async init() {
58
+ try {
59
+ const Speaker = await import("speaker").catch(() => null);
60
+ if (!Speaker) {
61
+ console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
62
+ console.warn("\u{1F449} Run: npm install speaker");
63
+ }
64
+ } catch (e) {
65
+ console.error("Error initializing Node audio:", e);
66
+ }
67
+ }
68
+ async startMicrophone(onAudioInput) {
69
+ if (this.isListening) return;
70
+ const recorder = await import("node-record-lpcm16").catch(() => null);
71
+ if (!recorder) {
72
+ throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
73
+ }
74
+ this.recorder = recorder;
75
+ this.recordingStream = recorder.record({
76
+ sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
77
+ threshold: 0,
78
+ verbose: false,
79
+ recordProgram: "sox"
80
+ });
81
+ this.recordingStream.stream().on("data", (chunk) => {
82
+ if (!this.isMuted && onAudioInput) {
83
+ onAudioInput(new Uint8Array(chunk));
84
+ }
85
+ });
86
+ this.isListening = true;
87
+ }
88
+ stopMicrophone() {
89
+ if (this.recordingStream) {
90
+ this.recordingStream.stop();
91
+ this.recordingStream = null;
92
+ }
93
+ this.isListening = false;
94
+ }
95
+ async playAudio(pcm16Data) {
96
+ try {
97
+ if (!this.speaker) {
98
+ const Speaker = (await import("speaker")).default;
99
+ this.speaker = new Speaker({
100
+ channels: AUDIO_CONFIG.CHANNELS,
101
+ bitDepth: 16,
102
+ sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
103
+ });
104
+ }
105
+ this.speaker.write(Buffer.from(pcm16Data));
106
+ } catch {
107
+ }
108
+ }
109
+ stopPlayback() {
110
+ if (this.speaker) {
111
+ this.speaker.end();
112
+ this.speaker = null;
113
+ }
114
+ }
115
+ cleanup() {
116
+ this.stopMicrophone();
117
+ this.stopPlayback();
118
+ }
119
+ isMicMuted() {
120
+ return this.isMuted;
121
+ }
122
+ setMuted(muted) {
123
+ this.isMuted = muted;
124
+ }
125
+ getAmplitude() {
126
+ return 0;
127
+ }
128
+ };
129
+ // Annotate the CommonJS export names for ESM import in node:
130
+ 0 && (module.exports = {
131
+ NodeAudioManager
132
+ });
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  AUDIO_CONFIG
3
- } from "./chunk-UI24THO7.mjs";
3
+ } from "./chunk-SNNPJP5R.mjs";
4
4
 
5
5
  // src/node-audio.ts
6
6
  var NodeAudioManager = class {
@@ -16,7 +16,7 @@ var NodeAudioManager = class {
16
16
  const Speaker = await import("speaker").catch(() => null);
17
17
  if (!Speaker) {
18
18
  console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
19
- console.warn("\u{1F449} Run: npm install speaker");
19
+ console.warn("\u{1F449} Run: npm install speaker");
20
20
  }
21
21
  } catch (e) {
22
22
  console.error("Error initializing Node audio:", e);
@@ -24,29 +24,23 @@ var NodeAudioManager = class {
24
24
  }
25
25
  async startMicrophone(onAudioInput) {
26
26
  if (this.isListening) return;
27
- try {
28
- const recorder = await import("node-record-lpcm16").catch(() => null);
29
- if (!recorder) {
30
- throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
31
- }
32
- console.log("\u{1F3A4} Starting microphone (Node.js)...");
33
- this.recordingStream = recorder.record({
34
- sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
35
- threshold: 0,
36
- verbose: false,
37
- recordProgram: "sox"
38
- // default
39
- });
40
- this.recordingStream.stream().on("data", (chunk) => {
41
- if (!this.isMuted && onAudioInput) {
42
- onAudioInput(new Uint8Array(chunk));
43
- }
44
- });
45
- this.isListening = true;
46
- } catch (e) {
47
- console.error("Failed to start microphone:", e.message);
48
- throw e;
27
+ const recorder = await import("node-record-lpcm16").catch(() => null);
28
+ if (!recorder) {
29
+ throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
49
30
  }
31
+ this.recorder = recorder;
32
+ this.recordingStream = recorder.record({
33
+ sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
34
+ threshold: 0,
35
+ verbose: false,
36
+ recordProgram: "sox"
37
+ });
38
+ this.recordingStream.stream().on("data", (chunk) => {
39
+ if (!this.isMuted && onAudioInput) {
40
+ onAudioInput(new Uint8Array(chunk));
41
+ }
42
+ });
43
+ this.isListening = true;
50
44
  }
51
45
  stopMicrophone() {
52
46
  if (this.recordingStream) {
@@ -66,7 +60,7 @@ var NodeAudioManager = class {
66
60
  });
67
61
  }
68
62
  this.speaker.write(Buffer.from(pcm16Data));
69
- } catch (e) {
63
+ } catch {
70
64
  }
71
65
  }
72
66
  stopPlayback() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lokutor/sdk",
3
- "version": "1.1.13",
3
+ "version": "1.1.15",
4
4
  "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",
@@ -8,9 +8,23 @@
8
8
  "files": [
9
9
  "dist"
10
10
  ],
11
+ "exports": {
12
+ ".": {
13
+ "import": "./dist/index.mjs",
14
+ "require": "./dist/index.js"
15
+ },
16
+ "./node-audio": {
17
+ "import": "./dist/node-audio.mjs",
18
+ "require": "./dist/node-audio.js"
19
+ }
20
+ },
21
+ "browser": {
22
+ "speaker": false,
23
+ "node-record-lpcm16": false
24
+ },
11
25
  "scripts": {
12
- "build": "tsup src/index.ts --format cjs,esm --dts",
13
- "dev": "tsup src/index.ts --format cjs,esm --watch --dts",
26
+ "build": "tsup src/index.ts src/node-audio.ts --format cjs,esm --dts --clean",
27
+ "dev": "tsup src/index.ts src/node-audio.ts --format cjs,esm --watch --dts --clean",
14
28
  "test": "vitest run",
15
29
  "test:watch": "vitest",
16
30
  "lint": "eslint src --ext .ts",