@lokutor/sdk 1.1.12 → 1.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ // src/types.ts
2
+ var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
3
+ VoiceStyle2["F1"] = "F1";
4
+ VoiceStyle2["F2"] = "F2";
5
+ VoiceStyle2["F3"] = "F3";
6
+ VoiceStyle2["F4"] = "F4";
7
+ VoiceStyle2["F5"] = "F5";
8
+ VoiceStyle2["M1"] = "M1";
9
+ VoiceStyle2["M2"] = "M2";
10
+ VoiceStyle2["M3"] = "M3";
11
+ VoiceStyle2["M4"] = "M4";
12
+ VoiceStyle2["M5"] = "M5";
13
+ return VoiceStyle2;
14
+ })(VoiceStyle || {});
15
+ var Language = /* @__PURE__ */ ((Language2) => {
16
+ Language2["ENGLISH"] = "en";
17
+ Language2["SPANISH"] = "es";
18
+ Language2["FRENCH"] = "fr";
19
+ Language2["PORTUGUESE"] = "pt";
20
+ Language2["KOREAN"] = "ko";
21
+ return Language2;
22
+ })(Language || {});
23
+ var AUDIO_CONFIG = {
24
+ SAMPLE_RATE: 16e3,
25
+ SAMPLE_RATE_INPUT: 16e3,
26
+ SPEAKER_SAMPLE_RATE: 44100,
27
+ SAMPLE_RATE_OUTPUT: 44100,
28
+ CHANNELS: 1,
29
+ CHUNK_DURATION_MS: 20,
30
+ get CHUNK_SIZE() {
31
+ return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
32
+ }
33
+ };
34
+ var DEFAULT_URLS = {
35
+ VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
36
+ TTS: "wss://api.lokutor.com/ws/tts"
37
+ };
38
+
39
+ export {
40
+ VoiceStyle,
41
+ Language,
42
+ AUDIO_CONFIG,
43
+ DEFAULT_URLS
44
+ };
package/dist/index.d.mts CHANGED
@@ -28,7 +28,9 @@ declare enum Language {
28
28
  */
29
29
  declare const AUDIO_CONFIG: {
30
30
  SAMPLE_RATE: number;
31
+ SAMPLE_RATE_INPUT: number;
31
32
  SPEAKER_SAMPLE_RATE: number;
33
+ SAMPLE_RATE_OUTPUT: number;
32
34
  CHANNELS: number;
33
35
  CHUNK_DURATION_MS: number;
34
36
  readonly CHUNK_SIZE: number;
@@ -120,6 +122,20 @@ interface ToolCall {
120
122
  arguments: string;
121
123
  }
122
124
 
125
+ /**
126
+ * Interface for audio hardware management (Browser/Node parity)
127
+ */
128
+ interface AudioManager {
129
+ init(): Promise<void>;
130
+ startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
131
+ stopMicrophone(): void;
132
+ playAudio(pcm16Data: Uint8Array): void;
133
+ stopPlayback(): void;
134
+ cleanup(): void;
135
+ isMicMuted(): boolean;
136
+ setMuted(muted: boolean): void;
137
+ getAmplitude(): number;
138
+ }
123
139
  /**
124
140
  * Main client for Lokutor Voice Agent SDK
125
141
  *
@@ -145,6 +161,7 @@ declare class VoiceAgentClient {
145
161
  private audioManager;
146
162
  private enableAudio;
147
163
  private currentGeneration;
164
+ private listeners;
148
165
  private isUserDisconnect;
149
166
  private reconnecting;
150
167
  private reconnectAttempts;
@@ -160,8 +177,16 @@ declare class VoiceAgentClient {
160
177
  });
161
178
  /**
162
179
  * Connect to the Lokutor Voice Agent server
180
+ * @param customAudioManager Optional replacement for the default audio hardware handler
181
+ */
182
+ connect(customAudioManager?: AudioManager): Promise<boolean>;
183
+ /**
184
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
185
+ * This is the recommended way to start a conversation in both Browser and Node.js.
163
186
  */
164
- connect(): Promise<boolean>;
187
+ startManaged(config?: {
188
+ audioManager?: AudioManager;
189
+ }): Promise<this>;
165
190
  /**
166
191
  * Send initial configuration to the server
167
192
  */
@@ -179,7 +204,13 @@ declare class VoiceAgentClient {
179
204
  * Handle incoming text messages (metadata/transcriptions)
180
205
  */
181
206
  private handleTextMessage;
182
- private audioListeners;
207
+ /**
208
+ * Register an event listener (for Python parity)
209
+ */
210
+ on(event: string, callback: Function): this;
211
+ /**
212
+ * Internal emitter for all events
213
+ */
183
214
  private emit;
184
215
  onAudio(callback: (data: Uint8Array) => void): void;
185
216
  onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -236,6 +267,7 @@ declare class TTSClient {
236
267
  visemes?: boolean;
237
268
  onAudio?: (data: Uint8Array) => void;
238
269
  onVisemes?: (visemes: any[]) => void;
270
+ onTTFB?: (ms: number) => void;
239
271
  onError?: (error: any) => void;
240
272
  }): Promise<void>;
241
273
  }
@@ -443,4 +475,4 @@ declare class BrowserAudioManager {
443
475
  isRecording(): boolean;
444
476
  }
445
477
 
446
- export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
478
+ export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
package/dist/index.d.ts CHANGED
@@ -28,7 +28,9 @@ declare enum Language {
28
28
  */
29
29
  declare const AUDIO_CONFIG: {
30
30
  SAMPLE_RATE: number;
31
+ SAMPLE_RATE_INPUT: number;
31
32
  SPEAKER_SAMPLE_RATE: number;
33
+ SAMPLE_RATE_OUTPUT: number;
32
34
  CHANNELS: number;
33
35
  CHUNK_DURATION_MS: number;
34
36
  readonly CHUNK_SIZE: number;
@@ -120,6 +122,20 @@ interface ToolCall {
120
122
  arguments: string;
121
123
  }
122
124
 
125
+ /**
126
+ * Interface for audio hardware management (Browser/Node parity)
127
+ */
128
+ interface AudioManager {
129
+ init(): Promise<void>;
130
+ startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
131
+ stopMicrophone(): void;
132
+ playAudio(pcm16Data: Uint8Array): void;
133
+ stopPlayback(): void;
134
+ cleanup(): void;
135
+ isMicMuted(): boolean;
136
+ setMuted(muted: boolean): void;
137
+ getAmplitude(): number;
138
+ }
123
139
  /**
124
140
  * Main client for Lokutor Voice Agent SDK
125
141
  *
@@ -145,6 +161,7 @@ declare class VoiceAgentClient {
145
161
  private audioManager;
146
162
  private enableAudio;
147
163
  private currentGeneration;
164
+ private listeners;
148
165
  private isUserDisconnect;
149
166
  private reconnecting;
150
167
  private reconnectAttempts;
@@ -160,8 +177,16 @@ declare class VoiceAgentClient {
160
177
  });
161
178
  /**
162
179
  * Connect to the Lokutor Voice Agent server
180
+ * @param customAudioManager Optional replacement for the default audio hardware handler
181
+ */
182
+ connect(customAudioManager?: AudioManager): Promise<boolean>;
183
+ /**
184
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
185
+ * This is the recommended way to start a conversation in both Browser and Node.js.
163
186
  */
164
- connect(): Promise<boolean>;
187
+ startManaged(config?: {
188
+ audioManager?: AudioManager;
189
+ }): Promise<this>;
165
190
  /**
166
191
  * Send initial configuration to the server
167
192
  */
@@ -179,7 +204,13 @@ declare class VoiceAgentClient {
179
204
  * Handle incoming text messages (metadata/transcriptions)
180
205
  */
181
206
  private handleTextMessage;
182
- private audioListeners;
207
+ /**
208
+ * Register an event listener (for Python parity)
209
+ */
210
+ on(event: string, callback: Function): this;
211
+ /**
212
+ * Internal emitter for all events
213
+ */
183
214
  private emit;
184
215
  onAudio(callback: (data: Uint8Array) => void): void;
185
216
  onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -236,6 +267,7 @@ declare class TTSClient {
236
267
  visemes?: boolean;
237
268
  onAudio?: (data: Uint8Array) => void;
238
269
  onVisemes?: (visemes: any[]) => void;
270
+ onTTFB?: (ms: number) => void;
239
271
  onError?: (error: any) => void;
240
272
  }): Promise<void>;
241
273
  }
@@ -443,4 +475,4 @@ declare class BrowserAudioManager {
443
475
  isRecording(): boolean;
444
476
  }
445
477
 
446
- export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
478
+ export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
package/dist/index.js CHANGED
@@ -1,8 +1,13 @@
1
1
  "use strict";
2
+ var __create = Object.create;
2
3
  var __defProp = Object.defineProperty;
3
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
5
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __esm = (fn, res) => function __init() {
9
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
+ };
6
11
  var __export = (target, all) => {
7
12
  for (var name in all)
8
13
  __defProp(target, name, { get: all[name], enumerable: true });
@@ -15,8 +20,159 @@ var __copyProps = (to, from, except, desc) => {
15
20
  }
16
21
  return to;
17
22
  };
23
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
24
+ // If the importer is in node compatibility mode or this is not an ESM
25
+ // file that has been converted to a CommonJS file using a Babel-
26
+ // compatible transform (i.e. "__esModule" has not been set), then set
27
+ // "default" to the CommonJS "module.exports" for node compatibility.
28
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
29
+ mod
30
+ ));
18
31
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
32
 
33
+ // src/types.ts
34
+ var VoiceStyle, Language, AUDIO_CONFIG, DEFAULT_URLS;
35
+ var init_types = __esm({
36
+ "src/types.ts"() {
37
+ "use strict";
38
+ VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
39
+ VoiceStyle2["F1"] = "F1";
40
+ VoiceStyle2["F2"] = "F2";
41
+ VoiceStyle2["F3"] = "F3";
42
+ VoiceStyle2["F4"] = "F4";
43
+ VoiceStyle2["F5"] = "F5";
44
+ VoiceStyle2["M1"] = "M1";
45
+ VoiceStyle2["M2"] = "M2";
46
+ VoiceStyle2["M3"] = "M3";
47
+ VoiceStyle2["M4"] = "M4";
48
+ VoiceStyle2["M5"] = "M5";
49
+ return VoiceStyle2;
50
+ })(VoiceStyle || {});
51
+ Language = /* @__PURE__ */ ((Language2) => {
52
+ Language2["ENGLISH"] = "en";
53
+ Language2["SPANISH"] = "es";
54
+ Language2["FRENCH"] = "fr";
55
+ Language2["PORTUGUESE"] = "pt";
56
+ Language2["KOREAN"] = "ko";
57
+ return Language2;
58
+ })(Language || {});
59
+ AUDIO_CONFIG = {
60
+ SAMPLE_RATE: 16e3,
61
+ SAMPLE_RATE_INPUT: 16e3,
62
+ SPEAKER_SAMPLE_RATE: 44100,
63
+ SAMPLE_RATE_OUTPUT: 44100,
64
+ CHANNELS: 1,
65
+ CHUNK_DURATION_MS: 20,
66
+ get CHUNK_SIZE() {
67
+ return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
68
+ }
69
+ };
70
+ DEFAULT_URLS = {
71
+ VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
72
+ TTS: "wss://api.lokutor.com/ws/tts"
73
+ };
74
+ }
75
+ });
76
+
77
+ // src/node-audio.ts
78
+ var node_audio_exports = {};
79
+ __export(node_audio_exports, {
80
+ NodeAudioManager: () => NodeAudioManager
81
+ });
82
+ var NodeAudioManager;
83
+ var init_node_audio = __esm({
84
+ "src/node-audio.ts"() {
85
+ "use strict";
86
+ init_types();
87
+ NodeAudioManager = class {
88
+ speaker = null;
89
+ recorder = null;
90
+ recordingStream = null;
91
+ isMuted = false;
92
+ isListening = false;
93
+ constructor() {
94
+ }
95
+ async init() {
96
+ try {
97
+ const Speaker = await import("speaker").catch(() => null);
98
+ if (!Speaker) {
99
+ console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
100
+ console.warn("\u{1F449} Run: npm install speaker");
101
+ }
102
+ } catch (e) {
103
+ console.error("Error initializing Node audio:", e);
104
+ }
105
+ }
106
+ async startMicrophone(onAudioInput) {
107
+ if (this.isListening) return;
108
+ try {
109
+ const recorder = await import("node-record-lpcm16").catch(() => null);
110
+ if (!recorder) {
111
+ throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
112
+ }
113
+ console.log("\u{1F3A4} Starting microphone (Node.js)...");
114
+ this.recordingStream = recorder.record({
115
+ sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
116
+ threshold: 0,
117
+ verbose: false,
118
+ recordProgram: "sox"
119
+ // default
120
+ });
121
+ this.recordingStream.stream().on("data", (chunk) => {
122
+ if (!this.isMuted && onAudioInput) {
123
+ onAudioInput(new Uint8Array(chunk));
124
+ }
125
+ });
126
+ this.isListening = true;
127
+ } catch (e) {
128
+ console.error("Failed to start microphone:", e.message);
129
+ throw e;
130
+ }
131
+ }
132
+ stopMicrophone() {
133
+ if (this.recordingStream) {
134
+ this.recordingStream.stop();
135
+ this.recordingStream = null;
136
+ }
137
+ this.isListening = false;
138
+ }
139
+ async playAudio(pcm16Data) {
140
+ try {
141
+ if (!this.speaker) {
142
+ const Speaker = (await import("speaker")).default;
143
+ this.speaker = new Speaker({
144
+ channels: AUDIO_CONFIG.CHANNELS,
145
+ bitDepth: 16,
146
+ sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
147
+ });
148
+ }
149
+ this.speaker.write(Buffer.from(pcm16Data));
150
+ } catch (e) {
151
+ }
152
+ }
153
+ stopPlayback() {
154
+ if (this.speaker) {
155
+ this.speaker.end();
156
+ this.speaker = null;
157
+ }
158
+ }
159
+ cleanup() {
160
+ this.stopMicrophone();
161
+ this.stopPlayback();
162
+ }
163
+ isMicMuted() {
164
+ return this.isMuted;
165
+ }
166
+ setMuted(muted) {
167
+ this.isMuted = muted;
168
+ }
169
+ getAmplitude() {
170
+ return 0;
171
+ }
172
+ };
173
+ }
174
+ });
175
+
20
176
  // src/index.ts
21
177
  var index_exports = {};
22
178
  __export(index_exports, {
@@ -41,42 +197,13 @@ __export(index_exports, {
41
197
  simpleTTS: () => simpleTTS
42
198
  });
43
199
  module.exports = __toCommonJS(index_exports);
200
+ init_types();
44
201
 
45
- // src/types.ts
46
- var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
47
- VoiceStyle2["F1"] = "F1";
48
- VoiceStyle2["F2"] = "F2";
49
- VoiceStyle2["F3"] = "F3";
50
- VoiceStyle2["F4"] = "F4";
51
- VoiceStyle2["F5"] = "F5";
52
- VoiceStyle2["M1"] = "M1";
53
- VoiceStyle2["M2"] = "M2";
54
- VoiceStyle2["M3"] = "M3";
55
- VoiceStyle2["M4"] = "M4";
56
- VoiceStyle2["M5"] = "M5";
57
- return VoiceStyle2;
58
- })(VoiceStyle || {});
59
- var Language = /* @__PURE__ */ ((Language2) => {
60
- Language2["ENGLISH"] = "en";
61
- Language2["SPANISH"] = "es";
62
- Language2["FRENCH"] = "fr";
63
- Language2["PORTUGUESE"] = "pt";
64
- Language2["KOREAN"] = "ko";
65
- return Language2;
66
- })(Language || {});
67
- var AUDIO_CONFIG = {
68
- SAMPLE_RATE: 16e3,
69
- SPEAKER_SAMPLE_RATE: 44100,
70
- CHANNELS: 1,
71
- CHUNK_DURATION_MS: 20,
72
- get CHUNK_SIZE() {
73
- return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
74
- }
75
- };
76
- var DEFAULT_URLS = {
77
- VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
78
- TTS: "wss://api.lokutor.com/ws/tts"
79
- };
202
+ // src/client.ts
203
+ init_types();
204
+
205
+ // src/browser-audio.ts
206
+ init_types();
80
207
 
81
208
  // src/audio-utils.ts
82
209
  function pcm16ToFloat32(int16Data) {
@@ -521,6 +648,7 @@ var VoiceAgentClient = class {
521
648
  audioManager = null;
522
649
  enableAudio = false;
523
650
  currentGeneration = 0;
651
+ listeners = {};
524
652
  // Connection resilience
525
653
  isUserDisconnect = false;
526
654
  reconnecting = false;
@@ -543,14 +671,19 @@ var VoiceAgentClient = class {
543
671
  }
544
672
  /**
545
673
  * Connect to the Lokutor Voice Agent server
674
+ * @param customAudioManager Optional replacement for the default audio hardware handler
546
675
  */
547
- async connect() {
676
+ async connect(customAudioManager) {
548
677
  this.isUserDisconnect = false;
549
- if (this.enableAudio) {
550
- if (!this.audioManager) {
678
+ if (this.enableAudio || customAudioManager) {
679
+ if (customAudioManager) {
680
+ this.audioManager = customAudioManager;
681
+ } else if (!this.audioManager && typeof window !== "undefined") {
551
682
  this.audioManager = new BrowserAudioManager();
552
683
  }
553
- await this.audioManager.init();
684
+ if (this.audioManager) {
685
+ await this.audioManager.init();
686
+ }
554
687
  }
555
688
  return new Promise((resolve, reject) => {
556
689
  try {
@@ -611,6 +744,34 @@ var VoiceAgentClient = class {
611
744
  }
612
745
  });
613
746
  }
747
+ /**
748
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
749
+ * This is the recommended way to start a conversation in both Browser and Node.js.
750
+ */
751
+ async startManaged(config) {
752
+ this.enableAudio = true;
753
+ if (config?.audioManager) {
754
+ this.audioManager = config.audioManager;
755
+ } else if (!this.audioManager) {
756
+ if (typeof window !== "undefined") {
757
+ this.audioManager = new BrowserAudioManager();
758
+ } else {
759
+ try {
760
+ const { NodeAudioManager: NodeAudioManager2 } = await Promise.resolve().then(() => (init_node_audio(), node_audio_exports));
761
+ this.audioManager = new NodeAudioManager2();
762
+ } catch (e) {
763
+ console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
764
+ }
765
+ }
766
+ }
767
+ await this.connect();
768
+ if (this.audioManager && this.isConnected) {
769
+ await this.audioManager.startMicrophone((data) => {
770
+ this.sendAudio(data);
771
+ });
772
+ }
773
+ return this;
774
+ }
614
775
  /**
615
776
  * Send initial configuration to the server
616
777
  */
@@ -712,21 +873,51 @@ var VoiceAgentClient = class {
712
873
  } catch (e) {
713
874
  }
714
875
  }
715
- audioListeners = [];
716
- emit(event, data) {
717
- if (event === "audio") {
718
- if (this.onAudioCallback) this.onAudioCallback(data);
719
- this.audioListeners.forEach((l) => l(data));
720
- } else if (event === "visemes") {
721
- if (this.onVisemesCallback) this.onVisemesCallback(data);
722
- this.visemeListeners.forEach((l) => l(data));
876
+ /**
877
+ * Register an event listener (for Python parity)
878
+ */
879
+ on(event, callback) {
880
+ if (!this.listeners[event]) {
881
+ this.listeners[event] = [];
882
+ }
883
+ this.listeners[event].push(callback);
884
+ return this;
885
+ }
886
+ /**
887
+ * Internal emitter for all events
888
+ */
889
+ emit(event, ...args) {
890
+ const legacyMap = {
891
+ "transcription": "onTranscription",
892
+ "response": "onResponse",
893
+ "audio": "onAudioCallback",
894
+ "visemes": "onVisemesCallback",
895
+ "status": "onStatus",
896
+ "error": "onError"
897
+ };
898
+ const legacyKey = legacyMap[event];
899
+ if (legacyKey && this[legacyKey]) {
900
+ try {
901
+ this[legacyKey](...args);
902
+ } catch (e) {
903
+ console.error(`Error in legacy callback ${legacyKey}:`, e);
904
+ }
905
+ }
906
+ if (this.listeners[event]) {
907
+ this.listeners[event].forEach((cb) => {
908
+ try {
909
+ cb(...args);
910
+ } catch (e) {
911
+ console.error(`Error in listener for ${event}:`, e);
912
+ }
913
+ });
723
914
  }
724
915
  }
725
916
  onAudio(callback) {
726
- this.audioListeners.push(callback);
917
+ this.on("audio", callback);
727
918
  }
728
919
  onVisemes(callback) {
729
- this.visemeListeners.push(callback);
920
+ this.on("visemes", callback);
730
921
  }
731
922
  /**
732
923
  * Disconnect from the server
@@ -805,15 +996,28 @@ var TTSClient = class {
805
996
  */
806
997
  synthesize(options) {
807
998
  return new Promise((resolve, reject) => {
999
+ let activityTimeout;
1000
+ let ws;
1001
+ let startTime;
1002
+ let firstByteReceived = false;
1003
+ const refreshTimeout = () => {
1004
+ if (activityTimeout) clearTimeout(activityTimeout);
1005
+ activityTimeout = setTimeout(() => {
1006
+ console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
1007
+ if (ws) ws.close();
1008
+ resolve();
1009
+ }, 2e3);
1010
+ };
808
1011
  try {
809
1012
  let url = DEFAULT_URLS.TTS;
810
1013
  if (this.apiKey) {
811
1014
  const separator = url.includes("?") ? "&" : "?";
812
1015
  url += `${separator}api_key=${this.apiKey}`;
813
1016
  }
814
- const ws = new WebSocket(url);
1017
+ ws = new WebSocket(url);
815
1018
  ws.binaryType = "arraybuffer";
816
1019
  ws.onopen = () => {
1020
+ refreshTimeout();
817
1021
  const req = {
818
1022
  text: options.text,
819
1023
  voice: options.voice || "F1" /* F1 */,
@@ -823,9 +1027,16 @@ var TTSClient = class {
823
1027
  visemes: options.visemes || false
824
1028
  };
825
1029
  ws.send(JSON.stringify(req));
1030
+ startTime = Date.now();
826
1031
  };
827
1032
  ws.onmessage = async (event) => {
1033
+ refreshTimeout();
828
1034
  if (event.data instanceof ArrayBuffer) {
1035
+ if (!firstByteReceived) {
1036
+ const ttfb = Date.now() - startTime;
1037
+ if (options.onTTFB) options.onTTFB(ttfb);
1038
+ firstByteReceived = true;
1039
+ }
829
1040
  if (options.onAudio) options.onAudio(new Uint8Array(event.data));
830
1041
  } else {
831
1042
  try {
@@ -833,18 +1044,26 @@ var TTSClient = class {
833
1044
  if (Array.isArray(msg) && options.onVisemes) {
834
1045
  options.onVisemes(msg);
835
1046
  }
1047
+ if (msg.type === "eos") {
1048
+ if (activityTimeout) clearTimeout(activityTimeout);
1049
+ ws.close();
1050
+ resolve();
1051
+ }
836
1052
  } catch (e) {
837
1053
  }
838
1054
  }
839
1055
  };
840
1056
  ws.onerror = (err) => {
1057
+ if (activityTimeout) clearTimeout(activityTimeout);
841
1058
  if (options.onError) options.onError(err);
842
1059
  reject(err);
843
1060
  };
844
1061
  ws.onclose = () => {
1062
+ if (activityTimeout) clearTimeout(activityTimeout);
845
1063
  resolve();
846
1064
  };
847
1065
  } catch (err) {
1066
+ if (activityTimeout) clearTimeout(activityTimeout);
848
1067
  if (options.onError) options.onError(err);
849
1068
  reject(err);
850
1069
  }
package/dist/index.mjs CHANGED
@@ -1,38 +1,9 @@
1
- // src/types.ts
2
- var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
3
- VoiceStyle2["F1"] = "F1";
4
- VoiceStyle2["F2"] = "F2";
5
- VoiceStyle2["F3"] = "F3";
6
- VoiceStyle2["F4"] = "F4";
7
- VoiceStyle2["F5"] = "F5";
8
- VoiceStyle2["M1"] = "M1";
9
- VoiceStyle2["M2"] = "M2";
10
- VoiceStyle2["M3"] = "M3";
11
- VoiceStyle2["M4"] = "M4";
12
- VoiceStyle2["M5"] = "M5";
13
- return VoiceStyle2;
14
- })(VoiceStyle || {});
15
- var Language = /* @__PURE__ */ ((Language2) => {
16
- Language2["ENGLISH"] = "en";
17
- Language2["SPANISH"] = "es";
18
- Language2["FRENCH"] = "fr";
19
- Language2["PORTUGUESE"] = "pt";
20
- Language2["KOREAN"] = "ko";
21
- return Language2;
22
- })(Language || {});
23
- var AUDIO_CONFIG = {
24
- SAMPLE_RATE: 16e3,
25
- SPEAKER_SAMPLE_RATE: 44100,
26
- CHANNELS: 1,
27
- CHUNK_DURATION_MS: 20,
28
- get CHUNK_SIZE() {
29
- return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
30
- }
31
- };
32
- var DEFAULT_URLS = {
33
- VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
34
- TTS: "wss://api.lokutor.com/ws/tts"
35
- };
1
+ import {
2
+ AUDIO_CONFIG,
3
+ DEFAULT_URLS,
4
+ Language,
5
+ VoiceStyle
6
+ } from "./chunk-UI24THO7.mjs";
36
7
 
37
8
  // src/audio-utils.ts
38
9
  function pcm16ToFloat32(int16Data) {
@@ -477,6 +448,7 @@ var VoiceAgentClient = class {
477
448
  audioManager = null;
478
449
  enableAudio = false;
479
450
  currentGeneration = 0;
451
+ listeners = {};
480
452
  // Connection resilience
481
453
  isUserDisconnect = false;
482
454
  reconnecting = false;
@@ -499,14 +471,19 @@ var VoiceAgentClient = class {
499
471
  }
500
472
  /**
501
473
  * Connect to the Lokutor Voice Agent server
474
+ * @param customAudioManager Optional replacement for the default audio hardware handler
502
475
  */
503
- async connect() {
476
+ async connect(customAudioManager) {
504
477
  this.isUserDisconnect = false;
505
- if (this.enableAudio) {
506
- if (!this.audioManager) {
478
+ if (this.enableAudio || customAudioManager) {
479
+ if (customAudioManager) {
480
+ this.audioManager = customAudioManager;
481
+ } else if (!this.audioManager && typeof window !== "undefined") {
507
482
  this.audioManager = new BrowserAudioManager();
508
483
  }
509
- await this.audioManager.init();
484
+ if (this.audioManager) {
485
+ await this.audioManager.init();
486
+ }
510
487
  }
511
488
  return new Promise((resolve, reject) => {
512
489
  try {
@@ -567,6 +544,34 @@ var VoiceAgentClient = class {
567
544
  }
568
545
  });
569
546
  }
547
+ /**
548
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
549
+ * This is the recommended way to start a conversation in both Browser and Node.js.
550
+ */
551
+ async startManaged(config) {
552
+ this.enableAudio = true;
553
+ if (config?.audioManager) {
554
+ this.audioManager = config.audioManager;
555
+ } else if (!this.audioManager) {
556
+ if (typeof window !== "undefined") {
557
+ this.audioManager = new BrowserAudioManager();
558
+ } else {
559
+ try {
560
+ const { NodeAudioManager } = await import("./node-audio-5HOWE6MC.mjs");
561
+ this.audioManager = new NodeAudioManager();
562
+ } catch (e) {
563
+ console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
564
+ }
565
+ }
566
+ }
567
+ await this.connect();
568
+ if (this.audioManager && this.isConnected) {
569
+ await this.audioManager.startMicrophone((data) => {
570
+ this.sendAudio(data);
571
+ });
572
+ }
573
+ return this;
574
+ }
570
575
  /**
571
576
  * Send initial configuration to the server
572
577
  */
@@ -668,21 +673,51 @@ var VoiceAgentClient = class {
668
673
  } catch (e) {
669
674
  }
670
675
  }
671
- audioListeners = [];
672
- emit(event, data) {
673
- if (event === "audio") {
674
- if (this.onAudioCallback) this.onAudioCallback(data);
675
- this.audioListeners.forEach((l) => l(data));
676
- } else if (event === "visemes") {
677
- if (this.onVisemesCallback) this.onVisemesCallback(data);
678
- this.visemeListeners.forEach((l) => l(data));
676
+ /**
677
+ * Register an event listener (for Python parity)
678
+ */
679
+ on(event, callback) {
680
+ if (!this.listeners[event]) {
681
+ this.listeners[event] = [];
682
+ }
683
+ this.listeners[event].push(callback);
684
+ return this;
685
+ }
686
+ /**
687
+ * Internal emitter for all events
688
+ */
689
+ emit(event, ...args) {
690
+ const legacyMap = {
691
+ "transcription": "onTranscription",
692
+ "response": "onResponse",
693
+ "audio": "onAudioCallback",
694
+ "visemes": "onVisemesCallback",
695
+ "status": "onStatus",
696
+ "error": "onError"
697
+ };
698
+ const legacyKey = legacyMap[event];
699
+ if (legacyKey && this[legacyKey]) {
700
+ try {
701
+ this[legacyKey](...args);
702
+ } catch (e) {
703
+ console.error(`Error in legacy callback ${legacyKey}:`, e);
704
+ }
705
+ }
706
+ if (this.listeners[event]) {
707
+ this.listeners[event].forEach((cb) => {
708
+ try {
709
+ cb(...args);
710
+ } catch (e) {
711
+ console.error(`Error in listener for ${event}:`, e);
712
+ }
713
+ });
679
714
  }
680
715
  }
681
716
  onAudio(callback) {
682
- this.audioListeners.push(callback);
717
+ this.on("audio", callback);
683
718
  }
684
719
  onVisemes(callback) {
685
- this.visemeListeners.push(callback);
720
+ this.on("visemes", callback);
686
721
  }
687
722
  /**
688
723
  * Disconnect from the server
@@ -761,15 +796,28 @@ var TTSClient = class {
761
796
  */
762
797
  synthesize(options) {
763
798
  return new Promise((resolve, reject) => {
799
+ let activityTimeout;
800
+ let ws;
801
+ let startTime;
802
+ let firstByteReceived = false;
803
+ const refreshTimeout = () => {
804
+ if (activityTimeout) clearTimeout(activityTimeout);
805
+ activityTimeout = setTimeout(() => {
806
+ console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
807
+ if (ws) ws.close();
808
+ resolve();
809
+ }, 2e3);
810
+ };
764
811
  try {
765
812
  let url = DEFAULT_URLS.TTS;
766
813
  if (this.apiKey) {
767
814
  const separator = url.includes("?") ? "&" : "?";
768
815
  url += `${separator}api_key=${this.apiKey}`;
769
816
  }
770
- const ws = new WebSocket(url);
817
+ ws = new WebSocket(url);
771
818
  ws.binaryType = "arraybuffer";
772
819
  ws.onopen = () => {
820
+ refreshTimeout();
773
821
  const req = {
774
822
  text: options.text,
775
823
  voice: options.voice || "F1" /* F1 */,
@@ -779,9 +827,16 @@ var TTSClient = class {
779
827
  visemes: options.visemes || false
780
828
  };
781
829
  ws.send(JSON.stringify(req));
830
+ startTime = Date.now();
782
831
  };
783
832
  ws.onmessage = async (event) => {
833
+ refreshTimeout();
784
834
  if (event.data instanceof ArrayBuffer) {
835
+ if (!firstByteReceived) {
836
+ const ttfb = Date.now() - startTime;
837
+ if (options.onTTFB) options.onTTFB(ttfb);
838
+ firstByteReceived = true;
839
+ }
785
840
  if (options.onAudio) options.onAudio(new Uint8Array(event.data));
786
841
  } else {
787
842
  try {
@@ -789,18 +844,26 @@ var TTSClient = class {
789
844
  if (Array.isArray(msg) && options.onVisemes) {
790
845
  options.onVisemes(msg);
791
846
  }
847
+ if (msg.type === "eos") {
848
+ if (activityTimeout) clearTimeout(activityTimeout);
849
+ ws.close();
850
+ resolve();
851
+ }
792
852
  } catch (e) {
793
853
  }
794
854
  }
795
855
  };
796
856
  ws.onerror = (err) => {
857
+ if (activityTimeout) clearTimeout(activityTimeout);
797
858
  if (options.onError) options.onError(err);
798
859
  reject(err);
799
860
  };
800
861
  ws.onclose = () => {
862
+ if (activityTimeout) clearTimeout(activityTimeout);
801
863
  resolve();
802
864
  };
803
865
  } catch (err) {
866
+ if (activityTimeout) clearTimeout(activityTimeout);
804
867
  if (options.onError) options.onError(err);
805
868
  reject(err);
806
869
  }
@@ -0,0 +1,94 @@
1
+ import {
2
+ AUDIO_CONFIG
3
+ } from "./chunk-UI24THO7.mjs";
4
+
5
+ // src/node-audio.ts
6
+ var NodeAudioManager = class {
7
+ speaker = null;
8
+ recorder = null;
9
+ recordingStream = null;
10
+ isMuted = false;
11
+ isListening = false;
12
+ constructor() {
13
+ }
14
+ async init() {
15
+ try {
16
+ const Speaker = await import("speaker").catch(() => null);
17
+ if (!Speaker) {
18
+ console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
19
+ console.warn("\u{1F449} Run: npm install speaker");
20
+ }
21
+ } catch (e) {
22
+ console.error("Error initializing Node audio:", e);
23
+ }
24
+ }
25
+ async startMicrophone(onAudioInput) {
26
+ if (this.isListening) return;
27
+ try {
28
+ const recorder = await import("node-record-lpcm16").catch(() => null);
29
+ if (!recorder) {
30
+ throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
31
+ }
32
+ console.log("\u{1F3A4} Starting microphone (Node.js)...");
33
+ this.recordingStream = recorder.record({
34
+ sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
35
+ threshold: 0,
36
+ verbose: false,
37
+ recordProgram: "sox"
38
+ // default
39
+ });
40
+ this.recordingStream.stream().on("data", (chunk) => {
41
+ if (!this.isMuted && onAudioInput) {
42
+ onAudioInput(new Uint8Array(chunk));
43
+ }
44
+ });
45
+ this.isListening = true;
46
+ } catch (e) {
47
+ console.error("Failed to start microphone:", e.message);
48
+ throw e;
49
+ }
50
+ }
51
+ stopMicrophone() {
52
+ if (this.recordingStream) {
53
+ this.recordingStream.stop();
54
+ this.recordingStream = null;
55
+ }
56
+ this.isListening = false;
57
+ }
58
+ async playAudio(pcm16Data) {
59
+ try {
60
+ if (!this.speaker) {
61
+ const Speaker = (await import("speaker")).default;
62
+ this.speaker = new Speaker({
63
+ channels: AUDIO_CONFIG.CHANNELS,
64
+ bitDepth: 16,
65
+ sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
66
+ });
67
+ }
68
+ this.speaker.write(Buffer.from(pcm16Data));
69
+ } catch (e) {
70
+ }
71
+ }
72
+ stopPlayback() {
73
+ if (this.speaker) {
74
+ this.speaker.end();
75
+ this.speaker = null;
76
+ }
77
+ }
78
+ cleanup() {
79
+ this.stopMicrophone();
80
+ this.stopPlayback();
81
+ }
82
+ isMicMuted() {
83
+ return this.isMuted;
84
+ }
85
+ setMuted(muted) {
86
+ this.isMuted = muted;
87
+ }
88
+ getAmplitude() {
89
+ return 0;
90
+ }
91
+ };
92
+ export {
93
+ NodeAudioManager
94
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lokutor/sdk",
3
- "version": "1.1.12",
3
+ "version": "1.1.13",
4
4
  "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",