@lokutor/sdk 1.1.11 → 1.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ // src/types.ts
2
+ var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
3
+ VoiceStyle2["F1"] = "F1";
4
+ VoiceStyle2["F2"] = "F2";
5
+ VoiceStyle2["F3"] = "F3";
6
+ VoiceStyle2["F4"] = "F4";
7
+ VoiceStyle2["F5"] = "F5";
8
+ VoiceStyle2["M1"] = "M1";
9
+ VoiceStyle2["M2"] = "M2";
10
+ VoiceStyle2["M3"] = "M3";
11
+ VoiceStyle2["M4"] = "M4";
12
+ VoiceStyle2["M5"] = "M5";
13
+ return VoiceStyle2;
14
+ })(VoiceStyle || {});
15
+ var Language = /* @__PURE__ */ ((Language2) => {
16
+ Language2["ENGLISH"] = "en";
17
+ Language2["SPANISH"] = "es";
18
+ Language2["FRENCH"] = "fr";
19
+ Language2["PORTUGUESE"] = "pt";
20
+ Language2["KOREAN"] = "ko";
21
+ return Language2;
22
+ })(Language || {});
23
+ var AUDIO_CONFIG = {
24
+ SAMPLE_RATE: 16e3,
25
+ SAMPLE_RATE_INPUT: 16e3,
26
+ SPEAKER_SAMPLE_RATE: 44100,
27
+ SAMPLE_RATE_OUTPUT: 44100,
28
+ CHANNELS: 1,
29
+ CHUNK_DURATION_MS: 20,
30
+ get CHUNK_SIZE() {
31
+ return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
32
+ }
33
+ };
34
+ var DEFAULT_URLS = {
35
+ VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
36
+ TTS: "wss://api.lokutor.com/ws/tts"
37
+ };
38
+
39
+ export {
40
+ VoiceStyle,
41
+ Language,
42
+ AUDIO_CONFIG,
43
+ DEFAULT_URLS
44
+ };
package/dist/index.d.mts CHANGED
@@ -28,7 +28,9 @@ declare enum Language {
28
28
  */
29
29
  declare const AUDIO_CONFIG: {
30
30
  SAMPLE_RATE: number;
31
+ SAMPLE_RATE_INPUT: number;
31
32
  SPEAKER_SAMPLE_RATE: number;
33
+ SAMPLE_RATE_OUTPUT: number;
32
34
  CHANNELS: number;
33
35
  CHUNK_DURATION_MS: number;
34
36
  readonly CHUNK_SIZE: number;
@@ -97,7 +99,43 @@ interface Viseme {
97
99
  c: string;
98
100
  t: number;
99
101
  }
102
+ /**
103
+ * Tool definition for LLM function calling (OpenAI format)
104
+ */
105
+ interface ToolDefinition {
106
+ type: 'function';
107
+ function: {
108
+ name: string;
109
+ description: string;
110
+ parameters: {
111
+ type: 'object';
112
+ properties: Record<string, any>;
113
+ required?: string[];
114
+ };
115
+ };
116
+ }
117
+ /**
118
+ * Event data for tool execution
119
+ */
120
+ interface ToolCall {
121
+ name: string;
122
+ arguments: string;
123
+ }
100
124
 
125
+ /**
126
+ * Interface for audio hardware management (Browser/Node parity)
127
+ */
128
+ interface AudioManager {
129
+ init(): Promise<void>;
130
+ startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
131
+ stopMicrophone(): void;
132
+ playAudio(pcm16Data: Uint8Array): void;
133
+ stopPlayback(): void;
134
+ cleanup(): void;
135
+ isMicMuted(): boolean;
136
+ setMuted(muted: boolean): void;
137
+ getAmplitude(): number;
138
+ }
101
139
  /**
102
140
  * Main client for Lokutor Voice Agent SDK
103
141
  *
@@ -109,6 +147,7 @@ declare class VoiceAgentClient {
109
147
  prompt: string;
110
148
  voice: VoiceStyle;
111
149
  language: Language;
150
+ tools: ToolDefinition[];
112
151
  private onTranscription?;
113
152
  private onResponse?;
114
153
  private onAudioCallback?;
@@ -121,6 +160,8 @@ declare class VoiceAgentClient {
121
160
  private wantVisemes;
122
161
  private audioManager;
123
162
  private enableAudio;
163
+ private currentGeneration;
164
+ private listeners;
124
165
  private isUserDisconnect;
125
166
  private reconnecting;
126
167
  private reconnectAttempts;
@@ -132,11 +173,20 @@ declare class VoiceAgentClient {
132
173
  visemes?: boolean;
133
174
  onVisemes?: (visemes: Viseme[]) => void;
134
175
  enableAudio?: boolean;
176
+ tools?: ToolDefinition[];
135
177
  });
136
178
  /**
137
179
  * Connect to the Lokutor Voice Agent server
180
+ * @param customAudioManager Optional replacement for the default audio hardware handler
138
181
  */
139
- connect(): Promise<boolean>;
182
+ connect(customAudioManager?: AudioManager): Promise<boolean>;
183
+ /**
184
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
185
+ * This is the recommended way to start a conversation in both Browser and Node.js.
186
+ */
187
+ startManaged(config?: {
188
+ audioManager?: AudioManager;
189
+ }): Promise<this>;
140
190
  /**
141
191
  * Send initial configuration to the server
142
192
  */
@@ -154,7 +204,13 @@ declare class VoiceAgentClient {
154
204
  * Handle incoming text messages (metadata/transcriptions)
155
205
  */
156
206
  private handleTextMessage;
157
- private audioListeners;
207
+ /**
208
+ * Register an event listener (for Python parity)
209
+ */
210
+ on(event: string, callback: Function): this;
211
+ /**
212
+ * Internal emitter for all events
213
+ */
158
214
  private emit;
159
215
  onAudio(callback: (data: Uint8Array) => void): void;
160
216
  onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -211,6 +267,7 @@ declare class TTSClient {
211
267
  visemes?: boolean;
212
268
  onAudio?: (data: Uint8Array) => void;
213
269
  onVisemes?: (visemes: any[]) => void;
270
+ onTTFB?: (ms: number) => void;
214
271
  onError?: (error: any) => void;
215
272
  }): Promise<void>;
216
273
  }
@@ -418,4 +475,4 @@ declare class BrowserAudioManager {
418
475
  isRecording(): boolean;
419
476
  }
420
477
 
421
- export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
478
+ export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
package/dist/index.d.ts CHANGED
@@ -28,7 +28,9 @@ declare enum Language {
28
28
  */
29
29
  declare const AUDIO_CONFIG: {
30
30
  SAMPLE_RATE: number;
31
+ SAMPLE_RATE_INPUT: number;
31
32
  SPEAKER_SAMPLE_RATE: number;
33
+ SAMPLE_RATE_OUTPUT: number;
32
34
  CHANNELS: number;
33
35
  CHUNK_DURATION_MS: number;
34
36
  readonly CHUNK_SIZE: number;
@@ -97,7 +99,43 @@ interface Viseme {
97
99
  c: string;
98
100
  t: number;
99
101
  }
102
+ /**
103
+ * Tool definition for LLM function calling (OpenAI format)
104
+ */
105
+ interface ToolDefinition {
106
+ type: 'function';
107
+ function: {
108
+ name: string;
109
+ description: string;
110
+ parameters: {
111
+ type: 'object';
112
+ properties: Record<string, any>;
113
+ required?: string[];
114
+ };
115
+ };
116
+ }
117
+ /**
118
+ * Event data for tool execution
119
+ */
120
+ interface ToolCall {
121
+ name: string;
122
+ arguments: string;
123
+ }
100
124
 
125
+ /**
126
+ * Interface for audio hardware management (Browser/Node parity)
127
+ */
128
+ interface AudioManager {
129
+ init(): Promise<void>;
130
+ startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
131
+ stopMicrophone(): void;
132
+ playAudio(pcm16Data: Uint8Array): void;
133
+ stopPlayback(): void;
134
+ cleanup(): void;
135
+ isMicMuted(): boolean;
136
+ setMuted(muted: boolean): void;
137
+ getAmplitude(): number;
138
+ }
101
139
  /**
102
140
  * Main client for Lokutor Voice Agent SDK
103
141
  *
@@ -109,6 +147,7 @@ declare class VoiceAgentClient {
109
147
  prompt: string;
110
148
  voice: VoiceStyle;
111
149
  language: Language;
150
+ tools: ToolDefinition[];
112
151
  private onTranscription?;
113
152
  private onResponse?;
114
153
  private onAudioCallback?;
@@ -121,6 +160,8 @@ declare class VoiceAgentClient {
121
160
  private wantVisemes;
122
161
  private audioManager;
123
162
  private enableAudio;
163
+ private currentGeneration;
164
+ private listeners;
124
165
  private isUserDisconnect;
125
166
  private reconnecting;
126
167
  private reconnectAttempts;
@@ -132,11 +173,20 @@ declare class VoiceAgentClient {
132
173
  visemes?: boolean;
133
174
  onVisemes?: (visemes: Viseme[]) => void;
134
175
  enableAudio?: boolean;
176
+ tools?: ToolDefinition[];
135
177
  });
136
178
  /**
137
179
  * Connect to the Lokutor Voice Agent server
180
+ * @param customAudioManager Optional replacement for the default audio hardware handler
138
181
  */
139
- connect(): Promise<boolean>;
182
+ connect(customAudioManager?: AudioManager): Promise<boolean>;
183
+ /**
184
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
185
+ * This is the recommended way to start a conversation in both Browser and Node.js.
186
+ */
187
+ startManaged(config?: {
188
+ audioManager?: AudioManager;
189
+ }): Promise<this>;
140
190
  /**
141
191
  * Send initial configuration to the server
142
192
  */
@@ -154,7 +204,13 @@ declare class VoiceAgentClient {
154
204
  * Handle incoming text messages (metadata/transcriptions)
155
205
  */
156
206
  private handleTextMessage;
157
- private audioListeners;
207
+ /**
208
+ * Register an event listener (for Python parity)
209
+ */
210
+ on(event: string, callback: Function): this;
211
+ /**
212
+ * Internal emitter for all events
213
+ */
158
214
  private emit;
159
215
  onAudio(callback: (data: Uint8Array) => void): void;
160
216
  onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -211,6 +267,7 @@ declare class TTSClient {
211
267
  visemes?: boolean;
212
268
  onAudio?: (data: Uint8Array) => void;
213
269
  onVisemes?: (visemes: any[]) => void;
270
+ onTTFB?: (ms: number) => void;
214
271
  onError?: (error: any) => void;
215
272
  }): Promise<void>;
216
273
  }
@@ -418,4 +475,4 @@ declare class BrowserAudioManager {
418
475
  isRecording(): boolean;
419
476
  }
420
477
 
421
- export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
478
+ export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
package/dist/index.js CHANGED
@@ -1,8 +1,13 @@
1
1
  "use strict";
2
+ var __create = Object.create;
2
3
  var __defProp = Object.defineProperty;
3
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
5
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __esm = (fn, res) => function __init() {
9
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
+ };
6
11
  var __export = (target, all) => {
7
12
  for (var name in all)
8
13
  __defProp(target, name, { get: all[name], enumerable: true });
@@ -15,8 +20,159 @@ var __copyProps = (to, from, except, desc) => {
15
20
  }
16
21
  return to;
17
22
  };
23
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
24
+ // If the importer is in node compatibility mode or this is not an ESM
25
+ // file that has been converted to a CommonJS file using a Babel-
26
+ // compatible transform (i.e. "__esModule" has not been set), then set
27
+ // "default" to the CommonJS "module.exports" for node compatibility.
28
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
29
+ mod
30
+ ));
18
31
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
32
 
33
+ // src/types.ts
34
+ var VoiceStyle, Language, AUDIO_CONFIG, DEFAULT_URLS;
35
+ var init_types = __esm({
36
+ "src/types.ts"() {
37
+ "use strict";
38
+ VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
39
+ VoiceStyle2["F1"] = "F1";
40
+ VoiceStyle2["F2"] = "F2";
41
+ VoiceStyle2["F3"] = "F3";
42
+ VoiceStyle2["F4"] = "F4";
43
+ VoiceStyle2["F5"] = "F5";
44
+ VoiceStyle2["M1"] = "M1";
45
+ VoiceStyle2["M2"] = "M2";
46
+ VoiceStyle2["M3"] = "M3";
47
+ VoiceStyle2["M4"] = "M4";
48
+ VoiceStyle2["M5"] = "M5";
49
+ return VoiceStyle2;
50
+ })(VoiceStyle || {});
51
+ Language = /* @__PURE__ */ ((Language2) => {
52
+ Language2["ENGLISH"] = "en";
53
+ Language2["SPANISH"] = "es";
54
+ Language2["FRENCH"] = "fr";
55
+ Language2["PORTUGUESE"] = "pt";
56
+ Language2["KOREAN"] = "ko";
57
+ return Language2;
58
+ })(Language || {});
59
+ AUDIO_CONFIG = {
60
+ SAMPLE_RATE: 16e3,
61
+ SAMPLE_RATE_INPUT: 16e3,
62
+ SPEAKER_SAMPLE_RATE: 44100,
63
+ SAMPLE_RATE_OUTPUT: 44100,
64
+ CHANNELS: 1,
65
+ CHUNK_DURATION_MS: 20,
66
+ get CHUNK_SIZE() {
67
+ return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
68
+ }
69
+ };
70
+ DEFAULT_URLS = {
71
+ VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
72
+ TTS: "wss://api.lokutor.com/ws/tts"
73
+ };
74
+ }
75
+ });
76
+
77
+ // src/node-audio.ts
78
+ var node_audio_exports = {};
79
+ __export(node_audio_exports, {
80
+ NodeAudioManager: () => NodeAudioManager
81
+ });
82
+ var NodeAudioManager;
83
+ var init_node_audio = __esm({
84
+ "src/node-audio.ts"() {
85
+ "use strict";
86
+ init_types();
87
+ NodeAudioManager = class {
88
+ speaker = null;
89
+ recorder = null;
90
+ recordingStream = null;
91
+ isMuted = false;
92
+ isListening = false;
93
+ constructor() {
94
+ }
95
+ async init() {
96
+ try {
97
+ const Speaker = await import("speaker").catch(() => null);
98
+ if (!Speaker) {
99
+ console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
100
+ console.warn("\u{1F449} Run: npm install speaker");
101
+ }
102
+ } catch (e) {
103
+ console.error("Error initializing Node audio:", e);
104
+ }
105
+ }
106
+ async startMicrophone(onAudioInput) {
107
+ if (this.isListening) return;
108
+ try {
109
+ const recorder = await import("node-record-lpcm16").catch(() => null);
110
+ if (!recorder) {
111
+ throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
112
+ }
113
+ console.log("\u{1F3A4} Starting microphone (Node.js)...");
114
+ this.recordingStream = recorder.record({
115
+ sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
116
+ threshold: 0,
117
+ verbose: false,
118
+ recordProgram: "sox"
119
+ // default
120
+ });
121
+ this.recordingStream.stream().on("data", (chunk) => {
122
+ if (!this.isMuted && onAudioInput) {
123
+ onAudioInput(new Uint8Array(chunk));
124
+ }
125
+ });
126
+ this.isListening = true;
127
+ } catch (e) {
128
+ console.error("Failed to start microphone:", e.message);
129
+ throw e;
130
+ }
131
+ }
132
+ stopMicrophone() {
133
+ if (this.recordingStream) {
134
+ this.recordingStream.stop();
135
+ this.recordingStream = null;
136
+ }
137
+ this.isListening = false;
138
+ }
139
+ async playAudio(pcm16Data) {
140
+ try {
141
+ if (!this.speaker) {
142
+ const Speaker = (await import("speaker")).default;
143
+ this.speaker = new Speaker({
144
+ channels: AUDIO_CONFIG.CHANNELS,
145
+ bitDepth: 16,
146
+ sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
147
+ });
148
+ }
149
+ this.speaker.write(Buffer.from(pcm16Data));
150
+ } catch (e) {
151
+ }
152
+ }
153
+ stopPlayback() {
154
+ if (this.speaker) {
155
+ this.speaker.end();
156
+ this.speaker = null;
157
+ }
158
+ }
159
+ cleanup() {
160
+ this.stopMicrophone();
161
+ this.stopPlayback();
162
+ }
163
+ isMicMuted() {
164
+ return this.isMuted;
165
+ }
166
+ setMuted(muted) {
167
+ this.isMuted = muted;
168
+ }
169
+ getAmplitude() {
170
+ return 0;
171
+ }
172
+ };
173
+ }
174
+ });
175
+
20
176
  // src/index.ts
21
177
  var index_exports = {};
22
178
  __export(index_exports, {
@@ -41,42 +197,13 @@ __export(index_exports, {
41
197
  simpleTTS: () => simpleTTS
42
198
  });
43
199
  module.exports = __toCommonJS(index_exports);
200
+ init_types();
44
201
 
45
- // src/types.ts
46
- var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
47
- VoiceStyle2["F1"] = "F1";
48
- VoiceStyle2["F2"] = "F2";
49
- VoiceStyle2["F3"] = "F3";
50
- VoiceStyle2["F4"] = "F4";
51
- VoiceStyle2["F5"] = "F5";
52
- VoiceStyle2["M1"] = "M1";
53
- VoiceStyle2["M2"] = "M2";
54
- VoiceStyle2["M3"] = "M3";
55
- VoiceStyle2["M4"] = "M4";
56
- VoiceStyle2["M5"] = "M5";
57
- return VoiceStyle2;
58
- })(VoiceStyle || {});
59
- var Language = /* @__PURE__ */ ((Language2) => {
60
- Language2["ENGLISH"] = "en";
61
- Language2["SPANISH"] = "es";
62
- Language2["FRENCH"] = "fr";
63
- Language2["PORTUGUESE"] = "pt";
64
- Language2["KOREAN"] = "ko";
65
- return Language2;
66
- })(Language || {});
67
- var AUDIO_CONFIG = {
68
- SAMPLE_RATE: 16e3,
69
- SPEAKER_SAMPLE_RATE: 44100,
70
- CHANNELS: 1,
71
- CHUNK_DURATION_MS: 20,
72
- get CHUNK_SIZE() {
73
- return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
74
- }
75
- };
76
- var DEFAULT_URLS = {
77
- VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
78
- TTS: "wss://api.lokutor.com/ws/tts"
79
- };
202
+ // src/client.ts
203
+ init_types();
204
+
205
+ // src/browser-audio.ts
206
+ init_types();
80
207
 
81
208
  // src/audio-utils.ts
82
209
  function pcm16ToFloat32(int16Data) {
@@ -506,6 +633,7 @@ var VoiceAgentClient = class {
506
633
  prompt;
507
634
  voice;
508
635
  language;
636
+ tools = [];
509
637
  // Callbacks
510
638
  onTranscription;
511
639
  onResponse;
@@ -519,6 +647,8 @@ var VoiceAgentClient = class {
519
647
  wantVisemes = false;
520
648
  audioManager = null;
521
649
  enableAudio = false;
650
+ currentGeneration = 0;
651
+ listeners = {};
522
652
  // Connection resilience
523
653
  isUserDisconnect = false;
524
654
  reconnecting = false;
@@ -537,17 +667,23 @@ var VoiceAgentClient = class {
537
667
  this.onError = config.onError;
538
668
  this.wantVisemes = config.visemes || false;
539
669
  this.enableAudio = config.enableAudio ?? false;
670
+ this.tools = config.tools || [];
540
671
  }
541
672
  /**
542
673
  * Connect to the Lokutor Voice Agent server
674
+ * @param customAudioManager Optional replacement for the default audio hardware handler
543
675
  */
544
- async connect() {
676
+ async connect(customAudioManager) {
545
677
  this.isUserDisconnect = false;
546
- if (this.enableAudio) {
547
- if (!this.audioManager) {
678
+ if (this.enableAudio || customAudioManager) {
679
+ if (customAudioManager) {
680
+ this.audioManager = customAudioManager;
681
+ } else if (!this.audioManager && typeof window !== "undefined") {
548
682
  this.audioManager = new BrowserAudioManager();
549
683
  }
550
- await this.audioManager.init();
684
+ if (this.audioManager) {
685
+ await this.audioManager.init();
686
+ }
551
687
  }
552
688
  return new Promise((resolve, reject) => {
553
689
  try {
@@ -608,6 +744,34 @@ var VoiceAgentClient = class {
608
744
  }
609
745
  });
610
746
  }
747
+ /**
748
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
749
+ * This is the recommended way to start a conversation in both Browser and Node.js.
750
+ */
751
+ async startManaged(config) {
752
+ this.enableAudio = true;
753
+ if (config?.audioManager) {
754
+ this.audioManager = config.audioManager;
755
+ } else if (!this.audioManager) {
756
+ if (typeof window !== "undefined") {
757
+ this.audioManager = new BrowserAudioManager();
758
+ } else {
759
+ try {
760
+ const { NodeAudioManager: NodeAudioManager2 } = await Promise.resolve().then(() => (init_node_audio(), node_audio_exports));
761
+ this.audioManager = new NodeAudioManager2();
762
+ } catch (e) {
763
+ console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
764
+ }
765
+ }
766
+ }
767
+ await this.connect();
768
+ if (this.audioManager && this.isConnected) {
769
+ await this.audioManager.startMicrophone((data) => {
770
+ this.sendAudio(data);
771
+ });
772
+ }
773
+ return this;
774
+ }
611
775
  /**
612
776
  * Send initial configuration to the server
613
777
  */
@@ -617,7 +781,10 @@ var VoiceAgentClient = class {
617
781
  this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
618
782
  this.ws.send(JSON.stringify({ type: "language", data: this.language }));
619
783
  this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
620
- console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
784
+ if (this.tools && this.tools.length > 0) {
785
+ this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
786
+ }
787
+ console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
621
788
  }
622
789
  /**
623
790
  * Send raw PCM audio data to the server
@@ -631,7 +798,11 @@ var VoiceAgentClient = class {
631
798
  /**
632
799
  * Handle incoming binary data (audio response)
633
800
  */
634
- handleBinaryMessage(data) {
801
+ handleBinaryMessage(data, generation) {
802
+ if (generation !== void 0 && generation < this.currentGeneration) {
803
+ console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
804
+ return;
805
+ }
635
806
  if (this.audioManager) {
636
807
  this.audioManager.playAudio(data);
637
808
  }
@@ -647,7 +818,7 @@ var VoiceAgentClient = class {
647
818
  case "audio":
648
819
  if (msg.data) {
649
820
  const buffer = base64ToUint8Array(msg.data);
650
- this.handleBinaryMessage(buffer);
821
+ this.handleBinaryMessage(buffer, msg.generation);
651
822
  }
652
823
  break;
653
824
  case "transcript":
@@ -666,6 +837,14 @@ var VoiceAgentClient = class {
666
837
  }
667
838
  break;
668
839
  case "status":
840
+ if (msg.data === "thinking") {
841
+ const newGen = msg.generation || 0;
842
+ if (newGen > this.currentGeneration) {
843
+ console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
844
+ this.currentGeneration = newGen;
845
+ if (this.audioManager) this.audioManager.stopPlayback();
846
+ }
847
+ }
669
848
  if (msg.data === "interrupted" && this.audioManager) {
670
849
  this.audioManager.stopPlayback();
671
850
  }
@@ -687,25 +866,58 @@ var VoiceAgentClient = class {
687
866
  if (this.onError) this.onError(msg.data);
688
867
  console.error(`\u274C Server error: ${msg.data}`);
689
868
  break;
869
+ case "tool_call":
870
+ console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
871
+ break;
690
872
  }
691
873
  } catch (e) {
692
874
  }
693
875
  }
694
- audioListeners = [];
695
- emit(event, data) {
696
- if (event === "audio") {
697
- if (this.onAudioCallback) this.onAudioCallback(data);
698
- this.audioListeners.forEach((l) => l(data));
699
- } else if (event === "visemes") {
700
- if (this.onVisemesCallback) this.onVisemesCallback(data);
701
- this.visemeListeners.forEach((l) => l(data));
876
+ /**
877
+ * Register an event listener (for Python parity)
878
+ */
879
+ on(event, callback) {
880
+ if (!this.listeners[event]) {
881
+ this.listeners[event] = [];
882
+ }
883
+ this.listeners[event].push(callback);
884
+ return this;
885
+ }
886
+ /**
887
+ * Internal emitter for all events
888
+ */
889
+ emit(event, ...args) {
890
+ const legacyMap = {
891
+ "transcription": "onTranscription",
892
+ "response": "onResponse",
893
+ "audio": "onAudioCallback",
894
+ "visemes": "onVisemesCallback",
895
+ "status": "onStatus",
896
+ "error": "onError"
897
+ };
898
+ const legacyKey = legacyMap[event];
899
+ if (legacyKey && this[legacyKey]) {
900
+ try {
901
+ this[legacyKey](...args);
902
+ } catch (e) {
903
+ console.error(`Error in legacy callback ${legacyKey}:`, e);
904
+ }
905
+ }
906
+ if (this.listeners[event]) {
907
+ this.listeners[event].forEach((cb) => {
908
+ try {
909
+ cb(...args);
910
+ } catch (e) {
911
+ console.error(`Error in listener for ${event}:`, e);
912
+ }
913
+ });
702
914
  }
703
915
  }
704
916
  onAudio(callback) {
705
- this.audioListeners.push(callback);
917
+ this.on("audio", callback);
706
918
  }
707
919
  onVisemes(callback) {
708
- this.visemeListeners.push(callback);
920
+ this.on("visemes", callback);
709
921
  }
710
922
  /**
711
923
  * Disconnect from the server
@@ -784,15 +996,28 @@ var TTSClient = class {
784
996
  */
785
997
  synthesize(options) {
786
998
  return new Promise((resolve, reject) => {
999
+ let activityTimeout;
1000
+ let ws;
1001
+ let startTime;
1002
+ let firstByteReceived = false;
1003
+ const refreshTimeout = () => {
1004
+ if (activityTimeout) clearTimeout(activityTimeout);
1005
+ activityTimeout = setTimeout(() => {
1006
+ console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
1007
+ if (ws) ws.close();
1008
+ resolve();
1009
+ }, 2e3);
1010
+ };
787
1011
  try {
788
1012
  let url = DEFAULT_URLS.TTS;
789
1013
  if (this.apiKey) {
790
1014
  const separator = url.includes("?") ? "&" : "?";
791
1015
  url += `${separator}api_key=${this.apiKey}`;
792
1016
  }
793
- const ws = new WebSocket(url);
1017
+ ws = new WebSocket(url);
794
1018
  ws.binaryType = "arraybuffer";
795
1019
  ws.onopen = () => {
1020
+ refreshTimeout();
796
1021
  const req = {
797
1022
  text: options.text,
798
1023
  voice: options.voice || "F1" /* F1 */,
@@ -802,9 +1027,16 @@ var TTSClient = class {
802
1027
  visemes: options.visemes || false
803
1028
  };
804
1029
  ws.send(JSON.stringify(req));
1030
+ startTime = Date.now();
805
1031
  };
806
1032
  ws.onmessage = async (event) => {
1033
+ refreshTimeout();
807
1034
  if (event.data instanceof ArrayBuffer) {
1035
+ if (!firstByteReceived) {
1036
+ const ttfb = Date.now() - startTime;
1037
+ if (options.onTTFB) options.onTTFB(ttfb);
1038
+ firstByteReceived = true;
1039
+ }
808
1040
  if (options.onAudio) options.onAudio(new Uint8Array(event.data));
809
1041
  } else {
810
1042
  try {
@@ -812,18 +1044,26 @@ var TTSClient = class {
812
1044
  if (Array.isArray(msg) && options.onVisemes) {
813
1045
  options.onVisemes(msg);
814
1046
  }
1047
+ if (msg.type === "eos") {
1048
+ if (activityTimeout) clearTimeout(activityTimeout);
1049
+ ws.close();
1050
+ resolve();
1051
+ }
815
1052
  } catch (e) {
816
1053
  }
817
1054
  }
818
1055
  };
819
1056
  ws.onerror = (err) => {
1057
+ if (activityTimeout) clearTimeout(activityTimeout);
820
1058
  if (options.onError) options.onError(err);
821
1059
  reject(err);
822
1060
  };
823
1061
  ws.onclose = () => {
1062
+ if (activityTimeout) clearTimeout(activityTimeout);
824
1063
  resolve();
825
1064
  };
826
1065
  } catch (err) {
1066
+ if (activityTimeout) clearTimeout(activityTimeout);
827
1067
  if (options.onError) options.onError(err);
828
1068
  reject(err);
829
1069
  }
package/dist/index.mjs CHANGED
@@ -1,38 +1,9 @@
1
- // src/types.ts
2
- var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
3
- VoiceStyle2["F1"] = "F1";
4
- VoiceStyle2["F2"] = "F2";
5
- VoiceStyle2["F3"] = "F3";
6
- VoiceStyle2["F4"] = "F4";
7
- VoiceStyle2["F5"] = "F5";
8
- VoiceStyle2["M1"] = "M1";
9
- VoiceStyle2["M2"] = "M2";
10
- VoiceStyle2["M3"] = "M3";
11
- VoiceStyle2["M4"] = "M4";
12
- VoiceStyle2["M5"] = "M5";
13
- return VoiceStyle2;
14
- })(VoiceStyle || {});
15
- var Language = /* @__PURE__ */ ((Language2) => {
16
- Language2["ENGLISH"] = "en";
17
- Language2["SPANISH"] = "es";
18
- Language2["FRENCH"] = "fr";
19
- Language2["PORTUGUESE"] = "pt";
20
- Language2["KOREAN"] = "ko";
21
- return Language2;
22
- })(Language || {});
23
- var AUDIO_CONFIG = {
24
- SAMPLE_RATE: 16e3,
25
- SPEAKER_SAMPLE_RATE: 44100,
26
- CHANNELS: 1,
27
- CHUNK_DURATION_MS: 20,
28
- get CHUNK_SIZE() {
29
- return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
30
- }
31
- };
32
- var DEFAULT_URLS = {
33
- VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
34
- TTS: "wss://api.lokutor.com/ws/tts"
35
- };
1
+ import {
2
+ AUDIO_CONFIG,
3
+ DEFAULT_URLS,
4
+ Language,
5
+ VoiceStyle
6
+ } from "./chunk-UI24THO7.mjs";
36
7
 
37
8
  // src/audio-utils.ts
38
9
  function pcm16ToFloat32(int16Data) {
@@ -462,6 +433,7 @@ var VoiceAgentClient = class {
462
433
  prompt;
463
434
  voice;
464
435
  language;
436
+ tools = [];
465
437
  // Callbacks
466
438
  onTranscription;
467
439
  onResponse;
@@ -475,6 +447,8 @@ var VoiceAgentClient = class {
475
447
  wantVisemes = false;
476
448
  audioManager = null;
477
449
  enableAudio = false;
450
+ currentGeneration = 0;
451
+ listeners = {};
478
452
  // Connection resilience
479
453
  isUserDisconnect = false;
480
454
  reconnecting = false;
@@ -493,17 +467,23 @@ var VoiceAgentClient = class {
493
467
  this.onError = config.onError;
494
468
  this.wantVisemes = config.visemes || false;
495
469
  this.enableAudio = config.enableAudio ?? false;
470
+ this.tools = config.tools || [];
496
471
  }
497
472
  /**
498
473
  * Connect to the Lokutor Voice Agent server
474
+ * @param customAudioManager Optional replacement for the default audio hardware handler
499
475
  */
500
- async connect() {
476
+ async connect(customAudioManager) {
501
477
  this.isUserDisconnect = false;
502
- if (this.enableAudio) {
503
- if (!this.audioManager) {
478
+ if (this.enableAudio || customAudioManager) {
479
+ if (customAudioManager) {
480
+ this.audioManager = customAudioManager;
481
+ } else if (!this.audioManager && typeof window !== "undefined") {
504
482
  this.audioManager = new BrowserAudioManager();
505
483
  }
506
- await this.audioManager.init();
484
+ if (this.audioManager) {
485
+ await this.audioManager.init();
486
+ }
507
487
  }
508
488
  return new Promise((resolve, reject) => {
509
489
  try {
@@ -564,6 +544,34 @@ var VoiceAgentClient = class {
564
544
  }
565
545
  });
566
546
  }
547
+ /**
548
+ * The "Golden Path" - Starts a managed session with hardware handled automatically.
549
+ * This is the recommended way to start a conversation in both Browser and Node.js.
550
+ */
551
+ async startManaged(config) {
552
+ this.enableAudio = true;
553
+ if (config?.audioManager) {
554
+ this.audioManager = config.audioManager;
555
+ } else if (!this.audioManager) {
556
+ if (typeof window !== "undefined") {
557
+ this.audioManager = new BrowserAudioManager();
558
+ } else {
559
+ try {
560
+ const { NodeAudioManager } = await import("./node-audio-5HOWE6MC.mjs");
561
+ this.audioManager = new NodeAudioManager();
562
+ } catch (e) {
563
+ console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
564
+ }
565
+ }
566
+ }
567
+ await this.connect();
568
+ if (this.audioManager && this.isConnected) {
569
+ await this.audioManager.startMicrophone((data) => {
570
+ this.sendAudio(data);
571
+ });
572
+ }
573
+ return this;
574
+ }
567
575
  /**
568
576
  * Send initial configuration to the server
569
577
  */
@@ -573,7 +581,10 @@ var VoiceAgentClient = class {
573
581
  this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
574
582
  this.ws.send(JSON.stringify({ type: "language", data: this.language }));
575
583
  this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
576
- console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
584
+ if (this.tools && this.tools.length > 0) {
585
+ this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
586
+ }
587
+ console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
577
588
  }
578
589
  /**
579
590
  * Send raw PCM audio data to the server
@@ -587,7 +598,11 @@ var VoiceAgentClient = class {
587
598
  /**
588
599
  * Handle incoming binary data (audio response)
589
600
  */
590
- handleBinaryMessage(data) {
601
+ handleBinaryMessage(data, generation) {
602
+ if (generation !== void 0 && generation < this.currentGeneration) {
603
+ console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
604
+ return;
605
+ }
591
606
  if (this.audioManager) {
592
607
  this.audioManager.playAudio(data);
593
608
  }
@@ -603,7 +618,7 @@ var VoiceAgentClient = class {
603
618
  case "audio":
604
619
  if (msg.data) {
605
620
  const buffer = base64ToUint8Array(msg.data);
606
- this.handleBinaryMessage(buffer);
621
+ this.handleBinaryMessage(buffer, msg.generation);
607
622
  }
608
623
  break;
609
624
  case "transcript":
@@ -622,6 +637,14 @@ var VoiceAgentClient = class {
622
637
  }
623
638
  break;
624
639
  case "status":
640
+ if (msg.data === "thinking") {
641
+ const newGen = msg.generation || 0;
642
+ if (newGen > this.currentGeneration) {
643
+ console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
644
+ this.currentGeneration = newGen;
645
+ if (this.audioManager) this.audioManager.stopPlayback();
646
+ }
647
+ }
625
648
  if (msg.data === "interrupted" && this.audioManager) {
626
649
  this.audioManager.stopPlayback();
627
650
  }
@@ -643,25 +666,58 @@ var VoiceAgentClient = class {
643
666
  if (this.onError) this.onError(msg.data);
644
667
  console.error(`\u274C Server error: ${msg.data}`);
645
668
  break;
669
+ case "tool_call":
670
+ console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
671
+ break;
646
672
  }
647
673
  } catch (e) {
648
674
  }
649
675
  }
650
- audioListeners = [];
651
- emit(event, data) {
652
- if (event === "audio") {
653
- if (this.onAudioCallback) this.onAudioCallback(data);
654
- this.audioListeners.forEach((l) => l(data));
655
- } else if (event === "visemes") {
656
- if (this.onVisemesCallback) this.onVisemesCallback(data);
657
- this.visemeListeners.forEach((l) => l(data));
676
+ /**
677
+ * Register an event listener (for Python parity)
678
+ */
679
+ on(event, callback) {
680
+ if (!this.listeners[event]) {
681
+ this.listeners[event] = [];
682
+ }
683
+ this.listeners[event].push(callback);
684
+ return this;
685
+ }
686
+ /**
687
+ * Internal emitter for all events
688
+ */
689
+ emit(event, ...args) {
690
+ const legacyMap = {
691
+ "transcription": "onTranscription",
692
+ "response": "onResponse",
693
+ "audio": "onAudioCallback",
694
+ "visemes": "onVisemesCallback",
695
+ "status": "onStatus",
696
+ "error": "onError"
697
+ };
698
+ const legacyKey = legacyMap[event];
699
+ if (legacyKey && this[legacyKey]) {
700
+ try {
701
+ this[legacyKey](...args);
702
+ } catch (e) {
703
+ console.error(`Error in legacy callback ${legacyKey}:`, e);
704
+ }
705
+ }
706
+ if (this.listeners[event]) {
707
+ this.listeners[event].forEach((cb) => {
708
+ try {
709
+ cb(...args);
710
+ } catch (e) {
711
+ console.error(`Error in listener for ${event}:`, e);
712
+ }
713
+ });
658
714
  }
659
715
  }
660
716
  onAudio(callback) {
661
- this.audioListeners.push(callback);
717
+ this.on("audio", callback);
662
718
  }
663
719
  onVisemes(callback) {
664
- this.visemeListeners.push(callback);
720
+ this.on("visemes", callback);
665
721
  }
666
722
  /**
667
723
  * Disconnect from the server
@@ -740,15 +796,28 @@ var TTSClient = class {
740
796
  */
741
797
  synthesize(options) {
742
798
  return new Promise((resolve, reject) => {
799
+ let activityTimeout;
800
+ let ws;
801
+ let startTime;
802
+ let firstByteReceived = false;
803
+ const refreshTimeout = () => {
804
+ if (activityTimeout) clearTimeout(activityTimeout);
805
+ activityTimeout = setTimeout(() => {
806
+ console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
807
+ if (ws) ws.close();
808
+ resolve();
809
+ }, 2e3);
810
+ };
743
811
  try {
744
812
  let url = DEFAULT_URLS.TTS;
745
813
  if (this.apiKey) {
746
814
  const separator = url.includes("?") ? "&" : "?";
747
815
  url += `${separator}api_key=${this.apiKey}`;
748
816
  }
749
- const ws = new WebSocket(url);
817
+ ws = new WebSocket(url);
750
818
  ws.binaryType = "arraybuffer";
751
819
  ws.onopen = () => {
820
+ refreshTimeout();
752
821
  const req = {
753
822
  text: options.text,
754
823
  voice: options.voice || "F1" /* F1 */,
@@ -758,9 +827,16 @@ var TTSClient = class {
758
827
  visemes: options.visemes || false
759
828
  };
760
829
  ws.send(JSON.stringify(req));
830
+ startTime = Date.now();
761
831
  };
762
832
  ws.onmessage = async (event) => {
833
+ refreshTimeout();
763
834
  if (event.data instanceof ArrayBuffer) {
835
+ if (!firstByteReceived) {
836
+ const ttfb = Date.now() - startTime;
837
+ if (options.onTTFB) options.onTTFB(ttfb);
838
+ firstByteReceived = true;
839
+ }
764
840
  if (options.onAudio) options.onAudio(new Uint8Array(event.data));
765
841
  } else {
766
842
  try {
@@ -768,18 +844,26 @@ var TTSClient = class {
768
844
  if (Array.isArray(msg) && options.onVisemes) {
769
845
  options.onVisemes(msg);
770
846
  }
847
+ if (msg.type === "eos") {
848
+ if (activityTimeout) clearTimeout(activityTimeout);
849
+ ws.close();
850
+ resolve();
851
+ }
771
852
  } catch (e) {
772
853
  }
773
854
  }
774
855
  };
775
856
  ws.onerror = (err) => {
857
+ if (activityTimeout) clearTimeout(activityTimeout);
776
858
  if (options.onError) options.onError(err);
777
859
  reject(err);
778
860
  };
779
861
  ws.onclose = () => {
862
+ if (activityTimeout) clearTimeout(activityTimeout);
780
863
  resolve();
781
864
  };
782
865
  } catch (err) {
866
+ if (activityTimeout) clearTimeout(activityTimeout);
783
867
  if (options.onError) options.onError(err);
784
868
  reject(err);
785
869
  }
@@ -0,0 +1,94 @@
1
+ import {
2
+ AUDIO_CONFIG
3
+ } from "./chunk-UI24THO7.mjs";
4
+
5
+ // src/node-audio.ts
6
+ var NodeAudioManager = class {
7
+ speaker = null;
8
+ recorder = null;
9
+ recordingStream = null;
10
+ isMuted = false;
11
+ isListening = false;
12
+ constructor() {
13
+ }
14
+ async init() {
15
+ try {
16
+ const Speaker = await import("speaker").catch(() => null);
17
+ if (!Speaker) {
18
+ console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
19
+ console.warn("\u{1F449} Run: npm install speaker");
20
+ }
21
+ } catch (e) {
22
+ console.error("Error initializing Node audio:", e);
23
+ }
24
+ }
25
+ async startMicrophone(onAudioInput) {
26
+ if (this.isListening) return;
27
+ try {
28
+ const recorder = await import("node-record-lpcm16").catch(() => null);
29
+ if (!recorder) {
30
+ throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
31
+ }
32
+ console.log("\u{1F3A4} Starting microphone (Node.js)...");
33
+ this.recordingStream = recorder.record({
34
+ sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
35
+ threshold: 0,
36
+ verbose: false,
37
+ recordProgram: "sox"
38
+ // default
39
+ });
40
+ this.recordingStream.stream().on("data", (chunk) => {
41
+ if (!this.isMuted && onAudioInput) {
42
+ onAudioInput(new Uint8Array(chunk));
43
+ }
44
+ });
45
+ this.isListening = true;
46
+ } catch (e) {
47
+ console.error("Failed to start microphone:", e.message);
48
+ throw e;
49
+ }
50
+ }
51
+ stopMicrophone() {
52
+ if (this.recordingStream) {
53
+ this.recordingStream.stop();
54
+ this.recordingStream = null;
55
+ }
56
+ this.isListening = false;
57
+ }
58
+ async playAudio(pcm16Data) {
59
+ try {
60
+ if (!this.speaker) {
61
+ const Speaker = (await import("speaker")).default;
62
+ this.speaker = new Speaker({
63
+ channels: AUDIO_CONFIG.CHANNELS,
64
+ bitDepth: 16,
65
+ sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
66
+ });
67
+ }
68
+ this.speaker.write(Buffer.from(pcm16Data));
69
+ } catch (e) {
70
+ }
71
+ }
72
+ stopPlayback() {
73
+ if (this.speaker) {
74
+ this.speaker.end();
75
+ this.speaker = null;
76
+ }
77
+ }
78
+ cleanup() {
79
+ this.stopMicrophone();
80
+ this.stopPlayback();
81
+ }
82
+ isMicMuted() {
83
+ return this.isMuted;
84
+ }
85
+ setMuted(muted) {
86
+ this.isMuted = muted;
87
+ }
88
+ getAmplitude() {
89
+ return 0;
90
+ }
91
+ };
92
+ export {
93
+ NodeAudioManager
94
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lokutor/sdk",
3
- "version": "1.1.11",
3
+ "version": "1.1.13",
4
4
  "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",