@lokutor/sdk 1.1.12 → 1.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-UI24THO7.mjs +44 -0
- package/dist/index.d.mts +35 -3
- package/dist/index.d.ts +35 -3
- package/dist/index.js +269 -50
- package/dist/index.mjs +113 -50
- package/dist/node-audio-5HOWE6MC.mjs +94 -0
- package/package.json +1 -1
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
// src/types.ts
|
|
2
|
+
var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
|
|
3
|
+
VoiceStyle2["F1"] = "F1";
|
|
4
|
+
VoiceStyle2["F2"] = "F2";
|
|
5
|
+
VoiceStyle2["F3"] = "F3";
|
|
6
|
+
VoiceStyle2["F4"] = "F4";
|
|
7
|
+
VoiceStyle2["F5"] = "F5";
|
|
8
|
+
VoiceStyle2["M1"] = "M1";
|
|
9
|
+
VoiceStyle2["M2"] = "M2";
|
|
10
|
+
VoiceStyle2["M3"] = "M3";
|
|
11
|
+
VoiceStyle2["M4"] = "M4";
|
|
12
|
+
VoiceStyle2["M5"] = "M5";
|
|
13
|
+
return VoiceStyle2;
|
|
14
|
+
})(VoiceStyle || {});
|
|
15
|
+
var Language = /* @__PURE__ */ ((Language2) => {
|
|
16
|
+
Language2["ENGLISH"] = "en";
|
|
17
|
+
Language2["SPANISH"] = "es";
|
|
18
|
+
Language2["FRENCH"] = "fr";
|
|
19
|
+
Language2["PORTUGUESE"] = "pt";
|
|
20
|
+
Language2["KOREAN"] = "ko";
|
|
21
|
+
return Language2;
|
|
22
|
+
})(Language || {});
|
|
23
|
+
var AUDIO_CONFIG = {
|
|
24
|
+
SAMPLE_RATE: 16e3,
|
|
25
|
+
SAMPLE_RATE_INPUT: 16e3,
|
|
26
|
+
SPEAKER_SAMPLE_RATE: 44100,
|
|
27
|
+
SAMPLE_RATE_OUTPUT: 44100,
|
|
28
|
+
CHANNELS: 1,
|
|
29
|
+
CHUNK_DURATION_MS: 20,
|
|
30
|
+
get CHUNK_SIZE() {
|
|
31
|
+
return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
var DEFAULT_URLS = {
|
|
35
|
+
VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
|
|
36
|
+
TTS: "wss://api.lokutor.com/ws/tts"
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
export {
|
|
40
|
+
VoiceStyle,
|
|
41
|
+
Language,
|
|
42
|
+
AUDIO_CONFIG,
|
|
43
|
+
DEFAULT_URLS
|
|
44
|
+
};
|
package/dist/index.d.mts
CHANGED
|
@@ -28,7 +28,9 @@ declare enum Language {
|
|
|
28
28
|
*/
|
|
29
29
|
declare const AUDIO_CONFIG: {
|
|
30
30
|
SAMPLE_RATE: number;
|
|
31
|
+
SAMPLE_RATE_INPUT: number;
|
|
31
32
|
SPEAKER_SAMPLE_RATE: number;
|
|
33
|
+
SAMPLE_RATE_OUTPUT: number;
|
|
32
34
|
CHANNELS: number;
|
|
33
35
|
CHUNK_DURATION_MS: number;
|
|
34
36
|
readonly CHUNK_SIZE: number;
|
|
@@ -120,6 +122,20 @@ interface ToolCall {
|
|
|
120
122
|
arguments: string;
|
|
121
123
|
}
|
|
122
124
|
|
|
125
|
+
/**
|
|
126
|
+
* Interface for audio hardware management (Browser/Node parity)
|
|
127
|
+
*/
|
|
128
|
+
interface AudioManager {
|
|
129
|
+
init(): Promise<void>;
|
|
130
|
+
startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
|
|
131
|
+
stopMicrophone(): void;
|
|
132
|
+
playAudio(pcm16Data: Uint8Array): void;
|
|
133
|
+
stopPlayback(): void;
|
|
134
|
+
cleanup(): void;
|
|
135
|
+
isMicMuted(): boolean;
|
|
136
|
+
setMuted(muted: boolean): void;
|
|
137
|
+
getAmplitude(): number;
|
|
138
|
+
}
|
|
123
139
|
/**
|
|
124
140
|
* Main client for Lokutor Voice Agent SDK
|
|
125
141
|
*
|
|
@@ -145,6 +161,7 @@ declare class VoiceAgentClient {
|
|
|
145
161
|
private audioManager;
|
|
146
162
|
private enableAudio;
|
|
147
163
|
private currentGeneration;
|
|
164
|
+
private listeners;
|
|
148
165
|
private isUserDisconnect;
|
|
149
166
|
private reconnecting;
|
|
150
167
|
private reconnectAttempts;
|
|
@@ -160,8 +177,16 @@ declare class VoiceAgentClient {
|
|
|
160
177
|
});
|
|
161
178
|
/**
|
|
162
179
|
* Connect to the Lokutor Voice Agent server
|
|
180
|
+
* @param customAudioManager Optional replacement for the default audio hardware handler
|
|
181
|
+
*/
|
|
182
|
+
connect(customAudioManager?: AudioManager): Promise<boolean>;
|
|
183
|
+
/**
|
|
184
|
+
* The "Golden Path" - Starts a managed session with hardware handled automatically.
|
|
185
|
+
* This is the recommended way to start a conversation in both Browser and Node.js.
|
|
163
186
|
*/
|
|
164
|
-
|
|
187
|
+
startManaged(config?: {
|
|
188
|
+
audioManager?: AudioManager;
|
|
189
|
+
}): Promise<this>;
|
|
165
190
|
/**
|
|
166
191
|
* Send initial configuration to the server
|
|
167
192
|
*/
|
|
@@ -179,7 +204,13 @@ declare class VoiceAgentClient {
|
|
|
179
204
|
* Handle incoming text messages (metadata/transcriptions)
|
|
180
205
|
*/
|
|
181
206
|
private handleTextMessage;
|
|
182
|
-
|
|
207
|
+
/**
|
|
208
|
+
* Register an event listener (for Python parity)
|
|
209
|
+
*/
|
|
210
|
+
on(event: string, callback: Function): this;
|
|
211
|
+
/**
|
|
212
|
+
* Internal emitter for all events
|
|
213
|
+
*/
|
|
183
214
|
private emit;
|
|
184
215
|
onAudio(callback: (data: Uint8Array) => void): void;
|
|
185
216
|
onVisemes(callback: (visemes: Viseme[]) => void): void;
|
|
@@ -236,6 +267,7 @@ declare class TTSClient {
|
|
|
236
267
|
visemes?: boolean;
|
|
237
268
|
onAudio?: (data: Uint8Array) => void;
|
|
238
269
|
onVisemes?: (visemes: any[]) => void;
|
|
270
|
+
onTTFB?: (ms: number) => void;
|
|
239
271
|
onError?: (error: any) => void;
|
|
240
272
|
}): Promise<void>;
|
|
241
273
|
}
|
|
@@ -443,4 +475,4 @@ declare class BrowserAudioManager {
|
|
|
443
475
|
isRecording(): boolean;
|
|
444
476
|
}
|
|
445
477
|
|
|
446
|
-
export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
|
|
478
|
+
export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
|
package/dist/index.d.ts
CHANGED
|
@@ -28,7 +28,9 @@ declare enum Language {
|
|
|
28
28
|
*/
|
|
29
29
|
declare const AUDIO_CONFIG: {
|
|
30
30
|
SAMPLE_RATE: number;
|
|
31
|
+
SAMPLE_RATE_INPUT: number;
|
|
31
32
|
SPEAKER_SAMPLE_RATE: number;
|
|
33
|
+
SAMPLE_RATE_OUTPUT: number;
|
|
32
34
|
CHANNELS: number;
|
|
33
35
|
CHUNK_DURATION_MS: number;
|
|
34
36
|
readonly CHUNK_SIZE: number;
|
|
@@ -120,6 +122,20 @@ interface ToolCall {
|
|
|
120
122
|
arguments: string;
|
|
121
123
|
}
|
|
122
124
|
|
|
125
|
+
/**
|
|
126
|
+
* Interface for audio hardware management (Browser/Node parity)
|
|
127
|
+
*/
|
|
128
|
+
interface AudioManager {
|
|
129
|
+
init(): Promise<void>;
|
|
130
|
+
startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
|
|
131
|
+
stopMicrophone(): void;
|
|
132
|
+
playAudio(pcm16Data: Uint8Array): void;
|
|
133
|
+
stopPlayback(): void;
|
|
134
|
+
cleanup(): void;
|
|
135
|
+
isMicMuted(): boolean;
|
|
136
|
+
setMuted(muted: boolean): void;
|
|
137
|
+
getAmplitude(): number;
|
|
138
|
+
}
|
|
123
139
|
/**
|
|
124
140
|
* Main client for Lokutor Voice Agent SDK
|
|
125
141
|
*
|
|
@@ -145,6 +161,7 @@ declare class VoiceAgentClient {
|
|
|
145
161
|
private audioManager;
|
|
146
162
|
private enableAudio;
|
|
147
163
|
private currentGeneration;
|
|
164
|
+
private listeners;
|
|
148
165
|
private isUserDisconnect;
|
|
149
166
|
private reconnecting;
|
|
150
167
|
private reconnectAttempts;
|
|
@@ -160,8 +177,16 @@ declare class VoiceAgentClient {
|
|
|
160
177
|
});
|
|
161
178
|
/**
|
|
162
179
|
* Connect to the Lokutor Voice Agent server
|
|
180
|
+
* @param customAudioManager Optional replacement for the default audio hardware handler
|
|
181
|
+
*/
|
|
182
|
+
connect(customAudioManager?: AudioManager): Promise<boolean>;
|
|
183
|
+
/**
|
|
184
|
+
* The "Golden Path" - Starts a managed session with hardware handled automatically.
|
|
185
|
+
* This is the recommended way to start a conversation in both Browser and Node.js.
|
|
163
186
|
*/
|
|
164
|
-
|
|
187
|
+
startManaged(config?: {
|
|
188
|
+
audioManager?: AudioManager;
|
|
189
|
+
}): Promise<this>;
|
|
165
190
|
/**
|
|
166
191
|
* Send initial configuration to the server
|
|
167
192
|
*/
|
|
@@ -179,7 +204,13 @@ declare class VoiceAgentClient {
|
|
|
179
204
|
* Handle incoming text messages (metadata/transcriptions)
|
|
180
205
|
*/
|
|
181
206
|
private handleTextMessage;
|
|
182
|
-
|
|
207
|
+
/**
|
|
208
|
+
* Register an event listener (for Python parity)
|
|
209
|
+
*/
|
|
210
|
+
on(event: string, callback: Function): this;
|
|
211
|
+
/**
|
|
212
|
+
* Internal emitter for all events
|
|
213
|
+
*/
|
|
183
214
|
private emit;
|
|
184
215
|
onAudio(callback: (data: Uint8Array) => void): void;
|
|
185
216
|
onVisemes(callback: (visemes: Viseme[]) => void): void;
|
|
@@ -236,6 +267,7 @@ declare class TTSClient {
|
|
|
236
267
|
visemes?: boolean;
|
|
237
268
|
onAudio?: (data: Uint8Array) => void;
|
|
238
269
|
onVisemes?: (visemes: any[]) => void;
|
|
270
|
+
onTTFB?: (ms: number) => void;
|
|
239
271
|
onError?: (error: any) => void;
|
|
240
272
|
}): Promise<void>;
|
|
241
273
|
}
|
|
@@ -443,4 +475,4 @@ declare class BrowserAudioManager {
|
|
|
443
475
|
isRecording(): boolean;
|
|
444
476
|
}
|
|
445
477
|
|
|
446
|
-
export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
|
|
478
|
+
export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
2
3
|
var __defProp = Object.defineProperty;
|
|
3
4
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
5
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
5
7
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __esm = (fn, res) => function __init() {
|
|
9
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
10
|
+
};
|
|
6
11
|
var __export = (target, all) => {
|
|
7
12
|
for (var name in all)
|
|
8
13
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
@@ -15,8 +20,159 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
15
20
|
}
|
|
16
21
|
return to;
|
|
17
22
|
};
|
|
23
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
24
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
25
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
26
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
27
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
28
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
29
|
+
mod
|
|
30
|
+
));
|
|
18
31
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
32
|
|
|
33
|
+
// src/types.ts
|
|
34
|
+
var VoiceStyle, Language, AUDIO_CONFIG, DEFAULT_URLS;
|
|
35
|
+
var init_types = __esm({
|
|
36
|
+
"src/types.ts"() {
|
|
37
|
+
"use strict";
|
|
38
|
+
VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
|
|
39
|
+
VoiceStyle2["F1"] = "F1";
|
|
40
|
+
VoiceStyle2["F2"] = "F2";
|
|
41
|
+
VoiceStyle2["F3"] = "F3";
|
|
42
|
+
VoiceStyle2["F4"] = "F4";
|
|
43
|
+
VoiceStyle2["F5"] = "F5";
|
|
44
|
+
VoiceStyle2["M1"] = "M1";
|
|
45
|
+
VoiceStyle2["M2"] = "M2";
|
|
46
|
+
VoiceStyle2["M3"] = "M3";
|
|
47
|
+
VoiceStyle2["M4"] = "M4";
|
|
48
|
+
VoiceStyle2["M5"] = "M5";
|
|
49
|
+
return VoiceStyle2;
|
|
50
|
+
})(VoiceStyle || {});
|
|
51
|
+
Language = /* @__PURE__ */ ((Language2) => {
|
|
52
|
+
Language2["ENGLISH"] = "en";
|
|
53
|
+
Language2["SPANISH"] = "es";
|
|
54
|
+
Language2["FRENCH"] = "fr";
|
|
55
|
+
Language2["PORTUGUESE"] = "pt";
|
|
56
|
+
Language2["KOREAN"] = "ko";
|
|
57
|
+
return Language2;
|
|
58
|
+
})(Language || {});
|
|
59
|
+
AUDIO_CONFIG = {
|
|
60
|
+
SAMPLE_RATE: 16e3,
|
|
61
|
+
SAMPLE_RATE_INPUT: 16e3,
|
|
62
|
+
SPEAKER_SAMPLE_RATE: 44100,
|
|
63
|
+
SAMPLE_RATE_OUTPUT: 44100,
|
|
64
|
+
CHANNELS: 1,
|
|
65
|
+
CHUNK_DURATION_MS: 20,
|
|
66
|
+
get CHUNK_SIZE() {
|
|
67
|
+
return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
|
|
68
|
+
}
|
|
69
|
+
};
|
|
70
|
+
DEFAULT_URLS = {
|
|
71
|
+
VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
|
|
72
|
+
TTS: "wss://api.lokutor.com/ws/tts"
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// src/node-audio.ts
|
|
78
|
+
var node_audio_exports = {};
|
|
79
|
+
__export(node_audio_exports, {
|
|
80
|
+
NodeAudioManager: () => NodeAudioManager
|
|
81
|
+
});
|
|
82
|
+
var NodeAudioManager;
|
|
83
|
+
var init_node_audio = __esm({
|
|
84
|
+
"src/node-audio.ts"() {
|
|
85
|
+
"use strict";
|
|
86
|
+
init_types();
|
|
87
|
+
NodeAudioManager = class {
|
|
88
|
+
speaker = null;
|
|
89
|
+
recorder = null;
|
|
90
|
+
recordingStream = null;
|
|
91
|
+
isMuted = false;
|
|
92
|
+
isListening = false;
|
|
93
|
+
constructor() {
|
|
94
|
+
}
|
|
95
|
+
async init() {
|
|
96
|
+
try {
|
|
97
|
+
const Speaker = await import("speaker").catch(() => null);
|
|
98
|
+
if (!Speaker) {
|
|
99
|
+
console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
|
|
100
|
+
console.warn("\u{1F449} Run: npm install speaker");
|
|
101
|
+
}
|
|
102
|
+
} catch (e) {
|
|
103
|
+
console.error("Error initializing Node audio:", e);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
async startMicrophone(onAudioInput) {
|
|
107
|
+
if (this.isListening) return;
|
|
108
|
+
try {
|
|
109
|
+
const recorder = await import("node-record-lpcm16").catch(() => null);
|
|
110
|
+
if (!recorder) {
|
|
111
|
+
throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
|
|
112
|
+
}
|
|
113
|
+
console.log("\u{1F3A4} Starting microphone (Node.js)...");
|
|
114
|
+
this.recordingStream = recorder.record({
|
|
115
|
+
sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
|
|
116
|
+
threshold: 0,
|
|
117
|
+
verbose: false,
|
|
118
|
+
recordProgram: "sox"
|
|
119
|
+
// default
|
|
120
|
+
});
|
|
121
|
+
this.recordingStream.stream().on("data", (chunk) => {
|
|
122
|
+
if (!this.isMuted && onAudioInput) {
|
|
123
|
+
onAudioInput(new Uint8Array(chunk));
|
|
124
|
+
}
|
|
125
|
+
});
|
|
126
|
+
this.isListening = true;
|
|
127
|
+
} catch (e) {
|
|
128
|
+
console.error("Failed to start microphone:", e.message);
|
|
129
|
+
throw e;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
stopMicrophone() {
|
|
133
|
+
if (this.recordingStream) {
|
|
134
|
+
this.recordingStream.stop();
|
|
135
|
+
this.recordingStream = null;
|
|
136
|
+
}
|
|
137
|
+
this.isListening = false;
|
|
138
|
+
}
|
|
139
|
+
async playAudio(pcm16Data) {
|
|
140
|
+
try {
|
|
141
|
+
if (!this.speaker) {
|
|
142
|
+
const Speaker = (await import("speaker")).default;
|
|
143
|
+
this.speaker = new Speaker({
|
|
144
|
+
channels: AUDIO_CONFIG.CHANNELS,
|
|
145
|
+
bitDepth: 16,
|
|
146
|
+
sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
this.speaker.write(Buffer.from(pcm16Data));
|
|
150
|
+
} catch (e) {
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
stopPlayback() {
|
|
154
|
+
if (this.speaker) {
|
|
155
|
+
this.speaker.end();
|
|
156
|
+
this.speaker = null;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
cleanup() {
|
|
160
|
+
this.stopMicrophone();
|
|
161
|
+
this.stopPlayback();
|
|
162
|
+
}
|
|
163
|
+
isMicMuted() {
|
|
164
|
+
return this.isMuted;
|
|
165
|
+
}
|
|
166
|
+
setMuted(muted) {
|
|
167
|
+
this.isMuted = muted;
|
|
168
|
+
}
|
|
169
|
+
getAmplitude() {
|
|
170
|
+
return 0;
|
|
171
|
+
}
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
});
|
|
175
|
+
|
|
20
176
|
// src/index.ts
|
|
21
177
|
var index_exports = {};
|
|
22
178
|
__export(index_exports, {
|
|
@@ -41,42 +197,13 @@ __export(index_exports, {
|
|
|
41
197
|
simpleTTS: () => simpleTTS
|
|
42
198
|
});
|
|
43
199
|
module.exports = __toCommonJS(index_exports);
|
|
200
|
+
init_types();
|
|
44
201
|
|
|
45
|
-
// src/
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
VoiceStyle2["F4"] = "F4";
|
|
51
|
-
VoiceStyle2["F5"] = "F5";
|
|
52
|
-
VoiceStyle2["M1"] = "M1";
|
|
53
|
-
VoiceStyle2["M2"] = "M2";
|
|
54
|
-
VoiceStyle2["M3"] = "M3";
|
|
55
|
-
VoiceStyle2["M4"] = "M4";
|
|
56
|
-
VoiceStyle2["M5"] = "M5";
|
|
57
|
-
return VoiceStyle2;
|
|
58
|
-
})(VoiceStyle || {});
|
|
59
|
-
var Language = /* @__PURE__ */ ((Language2) => {
|
|
60
|
-
Language2["ENGLISH"] = "en";
|
|
61
|
-
Language2["SPANISH"] = "es";
|
|
62
|
-
Language2["FRENCH"] = "fr";
|
|
63
|
-
Language2["PORTUGUESE"] = "pt";
|
|
64
|
-
Language2["KOREAN"] = "ko";
|
|
65
|
-
return Language2;
|
|
66
|
-
})(Language || {});
|
|
67
|
-
var AUDIO_CONFIG = {
|
|
68
|
-
SAMPLE_RATE: 16e3,
|
|
69
|
-
SPEAKER_SAMPLE_RATE: 44100,
|
|
70
|
-
CHANNELS: 1,
|
|
71
|
-
CHUNK_DURATION_MS: 20,
|
|
72
|
-
get CHUNK_SIZE() {
|
|
73
|
-
return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
|
|
74
|
-
}
|
|
75
|
-
};
|
|
76
|
-
var DEFAULT_URLS = {
|
|
77
|
-
VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
|
|
78
|
-
TTS: "wss://api.lokutor.com/ws/tts"
|
|
79
|
-
};
|
|
202
|
+
// src/client.ts
|
|
203
|
+
init_types();
|
|
204
|
+
|
|
205
|
+
// src/browser-audio.ts
|
|
206
|
+
init_types();
|
|
80
207
|
|
|
81
208
|
// src/audio-utils.ts
|
|
82
209
|
function pcm16ToFloat32(int16Data) {
|
|
@@ -521,6 +648,7 @@ var VoiceAgentClient = class {
|
|
|
521
648
|
audioManager = null;
|
|
522
649
|
enableAudio = false;
|
|
523
650
|
currentGeneration = 0;
|
|
651
|
+
listeners = {};
|
|
524
652
|
// Connection resilience
|
|
525
653
|
isUserDisconnect = false;
|
|
526
654
|
reconnecting = false;
|
|
@@ -543,14 +671,19 @@ var VoiceAgentClient = class {
|
|
|
543
671
|
}
|
|
544
672
|
/**
|
|
545
673
|
* Connect to the Lokutor Voice Agent server
|
|
674
|
+
* @param customAudioManager Optional replacement for the default audio hardware handler
|
|
546
675
|
*/
|
|
547
|
-
async connect() {
|
|
676
|
+
async connect(customAudioManager) {
|
|
548
677
|
this.isUserDisconnect = false;
|
|
549
|
-
if (this.enableAudio) {
|
|
550
|
-
if (
|
|
678
|
+
if (this.enableAudio || customAudioManager) {
|
|
679
|
+
if (customAudioManager) {
|
|
680
|
+
this.audioManager = customAudioManager;
|
|
681
|
+
} else if (!this.audioManager && typeof window !== "undefined") {
|
|
551
682
|
this.audioManager = new BrowserAudioManager();
|
|
552
683
|
}
|
|
553
|
-
|
|
684
|
+
if (this.audioManager) {
|
|
685
|
+
await this.audioManager.init();
|
|
686
|
+
}
|
|
554
687
|
}
|
|
555
688
|
return new Promise((resolve, reject) => {
|
|
556
689
|
try {
|
|
@@ -611,6 +744,34 @@ var VoiceAgentClient = class {
|
|
|
611
744
|
}
|
|
612
745
|
});
|
|
613
746
|
}
|
|
747
|
+
/**
|
|
748
|
+
* The "Golden Path" - Starts a managed session with hardware handled automatically.
|
|
749
|
+
* This is the recommended way to start a conversation in both Browser and Node.js.
|
|
750
|
+
*/
|
|
751
|
+
async startManaged(config) {
|
|
752
|
+
this.enableAudio = true;
|
|
753
|
+
if (config?.audioManager) {
|
|
754
|
+
this.audioManager = config.audioManager;
|
|
755
|
+
} else if (!this.audioManager) {
|
|
756
|
+
if (typeof window !== "undefined") {
|
|
757
|
+
this.audioManager = new BrowserAudioManager();
|
|
758
|
+
} else {
|
|
759
|
+
try {
|
|
760
|
+
const { NodeAudioManager: NodeAudioManager2 } = await Promise.resolve().then(() => (init_node_audio(), node_audio_exports));
|
|
761
|
+
this.audioManager = new NodeAudioManager2();
|
|
762
|
+
} catch (e) {
|
|
763
|
+
console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
await this.connect();
|
|
768
|
+
if (this.audioManager && this.isConnected) {
|
|
769
|
+
await this.audioManager.startMicrophone((data) => {
|
|
770
|
+
this.sendAudio(data);
|
|
771
|
+
});
|
|
772
|
+
}
|
|
773
|
+
return this;
|
|
774
|
+
}
|
|
614
775
|
/**
|
|
615
776
|
* Send initial configuration to the server
|
|
616
777
|
*/
|
|
@@ -712,21 +873,51 @@ var VoiceAgentClient = class {
|
|
|
712
873
|
} catch (e) {
|
|
713
874
|
}
|
|
714
875
|
}
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
876
|
+
/**
|
|
877
|
+
* Register an event listener (for Python parity)
|
|
878
|
+
*/
|
|
879
|
+
on(event, callback) {
|
|
880
|
+
if (!this.listeners[event]) {
|
|
881
|
+
this.listeners[event] = [];
|
|
882
|
+
}
|
|
883
|
+
this.listeners[event].push(callback);
|
|
884
|
+
return this;
|
|
885
|
+
}
|
|
886
|
+
/**
|
|
887
|
+
* Internal emitter for all events
|
|
888
|
+
*/
|
|
889
|
+
emit(event, ...args) {
|
|
890
|
+
const legacyMap = {
|
|
891
|
+
"transcription": "onTranscription",
|
|
892
|
+
"response": "onResponse",
|
|
893
|
+
"audio": "onAudioCallback",
|
|
894
|
+
"visemes": "onVisemesCallback",
|
|
895
|
+
"status": "onStatus",
|
|
896
|
+
"error": "onError"
|
|
897
|
+
};
|
|
898
|
+
const legacyKey = legacyMap[event];
|
|
899
|
+
if (legacyKey && this[legacyKey]) {
|
|
900
|
+
try {
|
|
901
|
+
this[legacyKey](...args);
|
|
902
|
+
} catch (e) {
|
|
903
|
+
console.error(`Error in legacy callback ${legacyKey}:`, e);
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
if (this.listeners[event]) {
|
|
907
|
+
this.listeners[event].forEach((cb) => {
|
|
908
|
+
try {
|
|
909
|
+
cb(...args);
|
|
910
|
+
} catch (e) {
|
|
911
|
+
console.error(`Error in listener for ${event}:`, e);
|
|
912
|
+
}
|
|
913
|
+
});
|
|
723
914
|
}
|
|
724
915
|
}
|
|
725
916
|
onAudio(callback) {
|
|
726
|
-
this.
|
|
917
|
+
this.on("audio", callback);
|
|
727
918
|
}
|
|
728
919
|
onVisemes(callback) {
|
|
729
|
-
this.
|
|
920
|
+
this.on("visemes", callback);
|
|
730
921
|
}
|
|
731
922
|
/**
|
|
732
923
|
* Disconnect from the server
|
|
@@ -805,15 +996,28 @@ var TTSClient = class {
|
|
|
805
996
|
*/
|
|
806
997
|
synthesize(options) {
|
|
807
998
|
return new Promise((resolve, reject) => {
|
|
999
|
+
let activityTimeout;
|
|
1000
|
+
let ws;
|
|
1001
|
+
let startTime;
|
|
1002
|
+
let firstByteReceived = false;
|
|
1003
|
+
const refreshTimeout = () => {
|
|
1004
|
+
if (activityTimeout) clearTimeout(activityTimeout);
|
|
1005
|
+
activityTimeout = setTimeout(() => {
|
|
1006
|
+
console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
|
|
1007
|
+
if (ws) ws.close();
|
|
1008
|
+
resolve();
|
|
1009
|
+
}, 2e3);
|
|
1010
|
+
};
|
|
808
1011
|
try {
|
|
809
1012
|
let url = DEFAULT_URLS.TTS;
|
|
810
1013
|
if (this.apiKey) {
|
|
811
1014
|
const separator = url.includes("?") ? "&" : "?";
|
|
812
1015
|
url += `${separator}api_key=${this.apiKey}`;
|
|
813
1016
|
}
|
|
814
|
-
|
|
1017
|
+
ws = new WebSocket(url);
|
|
815
1018
|
ws.binaryType = "arraybuffer";
|
|
816
1019
|
ws.onopen = () => {
|
|
1020
|
+
refreshTimeout();
|
|
817
1021
|
const req = {
|
|
818
1022
|
text: options.text,
|
|
819
1023
|
voice: options.voice || "F1" /* F1 */,
|
|
@@ -823,9 +1027,16 @@ var TTSClient = class {
|
|
|
823
1027
|
visemes: options.visemes || false
|
|
824
1028
|
};
|
|
825
1029
|
ws.send(JSON.stringify(req));
|
|
1030
|
+
startTime = Date.now();
|
|
826
1031
|
};
|
|
827
1032
|
ws.onmessage = async (event) => {
|
|
1033
|
+
refreshTimeout();
|
|
828
1034
|
if (event.data instanceof ArrayBuffer) {
|
|
1035
|
+
if (!firstByteReceived) {
|
|
1036
|
+
const ttfb = Date.now() - startTime;
|
|
1037
|
+
if (options.onTTFB) options.onTTFB(ttfb);
|
|
1038
|
+
firstByteReceived = true;
|
|
1039
|
+
}
|
|
829
1040
|
if (options.onAudio) options.onAudio(new Uint8Array(event.data));
|
|
830
1041
|
} else {
|
|
831
1042
|
try {
|
|
@@ -833,18 +1044,26 @@ var TTSClient = class {
|
|
|
833
1044
|
if (Array.isArray(msg) && options.onVisemes) {
|
|
834
1045
|
options.onVisemes(msg);
|
|
835
1046
|
}
|
|
1047
|
+
if (msg.type === "eos") {
|
|
1048
|
+
if (activityTimeout) clearTimeout(activityTimeout);
|
|
1049
|
+
ws.close();
|
|
1050
|
+
resolve();
|
|
1051
|
+
}
|
|
836
1052
|
} catch (e) {
|
|
837
1053
|
}
|
|
838
1054
|
}
|
|
839
1055
|
};
|
|
840
1056
|
ws.onerror = (err) => {
|
|
1057
|
+
if (activityTimeout) clearTimeout(activityTimeout);
|
|
841
1058
|
if (options.onError) options.onError(err);
|
|
842
1059
|
reject(err);
|
|
843
1060
|
};
|
|
844
1061
|
ws.onclose = () => {
|
|
1062
|
+
if (activityTimeout) clearTimeout(activityTimeout);
|
|
845
1063
|
resolve();
|
|
846
1064
|
};
|
|
847
1065
|
} catch (err) {
|
|
1066
|
+
if (activityTimeout) clearTimeout(activityTimeout);
|
|
848
1067
|
if (options.onError) options.onError(err);
|
|
849
1068
|
reject(err);
|
|
850
1069
|
}
|
package/dist/index.mjs
CHANGED
|
@@ -1,38 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
VoiceStyle2["F5"] = "F5";
|
|
8
|
-
VoiceStyle2["M1"] = "M1";
|
|
9
|
-
VoiceStyle2["M2"] = "M2";
|
|
10
|
-
VoiceStyle2["M3"] = "M3";
|
|
11
|
-
VoiceStyle2["M4"] = "M4";
|
|
12
|
-
VoiceStyle2["M5"] = "M5";
|
|
13
|
-
return VoiceStyle2;
|
|
14
|
-
})(VoiceStyle || {});
|
|
15
|
-
var Language = /* @__PURE__ */ ((Language2) => {
|
|
16
|
-
Language2["ENGLISH"] = "en";
|
|
17
|
-
Language2["SPANISH"] = "es";
|
|
18
|
-
Language2["FRENCH"] = "fr";
|
|
19
|
-
Language2["PORTUGUESE"] = "pt";
|
|
20
|
-
Language2["KOREAN"] = "ko";
|
|
21
|
-
return Language2;
|
|
22
|
-
})(Language || {});
|
|
23
|
-
var AUDIO_CONFIG = {
|
|
24
|
-
SAMPLE_RATE: 16e3,
|
|
25
|
-
SPEAKER_SAMPLE_RATE: 44100,
|
|
26
|
-
CHANNELS: 1,
|
|
27
|
-
CHUNK_DURATION_MS: 20,
|
|
28
|
-
get CHUNK_SIZE() {
|
|
29
|
-
return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
|
|
30
|
-
}
|
|
31
|
-
};
|
|
32
|
-
var DEFAULT_URLS = {
|
|
33
|
-
VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
|
|
34
|
-
TTS: "wss://api.lokutor.com/ws/tts"
|
|
35
|
-
};
|
|
1
|
+
import {
|
|
2
|
+
AUDIO_CONFIG,
|
|
3
|
+
DEFAULT_URLS,
|
|
4
|
+
Language,
|
|
5
|
+
VoiceStyle
|
|
6
|
+
} from "./chunk-UI24THO7.mjs";
|
|
36
7
|
|
|
37
8
|
// src/audio-utils.ts
|
|
38
9
|
function pcm16ToFloat32(int16Data) {
|
|
@@ -477,6 +448,7 @@ var VoiceAgentClient = class {
|
|
|
477
448
|
audioManager = null;
|
|
478
449
|
enableAudio = false;
|
|
479
450
|
currentGeneration = 0;
|
|
451
|
+
listeners = {};
|
|
480
452
|
// Connection resilience
|
|
481
453
|
isUserDisconnect = false;
|
|
482
454
|
reconnecting = false;
|
|
@@ -499,14 +471,19 @@ var VoiceAgentClient = class {
|
|
|
499
471
|
}
|
|
500
472
|
/**
|
|
501
473
|
* Connect to the Lokutor Voice Agent server
|
|
474
|
+
* @param customAudioManager Optional replacement for the default audio hardware handler
|
|
502
475
|
*/
|
|
503
|
-
async connect() {
|
|
476
|
+
async connect(customAudioManager) {
|
|
504
477
|
this.isUserDisconnect = false;
|
|
505
|
-
if (this.enableAudio) {
|
|
506
|
-
if (
|
|
478
|
+
if (this.enableAudio || customAudioManager) {
|
|
479
|
+
if (customAudioManager) {
|
|
480
|
+
this.audioManager = customAudioManager;
|
|
481
|
+
} else if (!this.audioManager && typeof window !== "undefined") {
|
|
507
482
|
this.audioManager = new BrowserAudioManager();
|
|
508
483
|
}
|
|
509
|
-
|
|
484
|
+
if (this.audioManager) {
|
|
485
|
+
await this.audioManager.init();
|
|
486
|
+
}
|
|
510
487
|
}
|
|
511
488
|
return new Promise((resolve, reject) => {
|
|
512
489
|
try {
|
|
@@ -567,6 +544,34 @@ var VoiceAgentClient = class {
|
|
|
567
544
|
}
|
|
568
545
|
});
|
|
569
546
|
}
|
|
547
|
+
/**
|
|
548
|
+
* The "Golden Path" - Starts a managed session with hardware handled automatically.
|
|
549
|
+
* This is the recommended way to start a conversation in both Browser and Node.js.
|
|
550
|
+
*/
|
|
551
|
+
async startManaged(config) {
|
|
552
|
+
this.enableAudio = true;
|
|
553
|
+
if (config?.audioManager) {
|
|
554
|
+
this.audioManager = config.audioManager;
|
|
555
|
+
} else if (!this.audioManager) {
|
|
556
|
+
if (typeof window !== "undefined") {
|
|
557
|
+
this.audioManager = new BrowserAudioManager();
|
|
558
|
+
} else {
|
|
559
|
+
try {
|
|
560
|
+
const { NodeAudioManager } = await import("./node-audio-5HOWE6MC.mjs");
|
|
561
|
+
this.audioManager = new NodeAudioManager();
|
|
562
|
+
} catch (e) {
|
|
563
|
+
console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
await this.connect();
|
|
568
|
+
if (this.audioManager && this.isConnected) {
|
|
569
|
+
await this.audioManager.startMicrophone((data) => {
|
|
570
|
+
this.sendAudio(data);
|
|
571
|
+
});
|
|
572
|
+
}
|
|
573
|
+
return this;
|
|
574
|
+
}
|
|
570
575
|
/**
|
|
571
576
|
* Send initial configuration to the server
|
|
572
577
|
*/
|
|
@@ -668,21 +673,51 @@ var VoiceAgentClient = class {
|
|
|
668
673
|
} catch (e) {
|
|
669
674
|
}
|
|
670
675
|
}
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
676
|
+
/**
|
|
677
|
+
* Register an event listener (for Python parity)
|
|
678
|
+
*/
|
|
679
|
+
on(event, callback) {
|
|
680
|
+
if (!this.listeners[event]) {
|
|
681
|
+
this.listeners[event] = [];
|
|
682
|
+
}
|
|
683
|
+
this.listeners[event].push(callback);
|
|
684
|
+
return this;
|
|
685
|
+
}
|
|
686
|
+
/**
|
|
687
|
+
* Internal emitter for all events
|
|
688
|
+
*/
|
|
689
|
+
emit(event, ...args) {
|
|
690
|
+
const legacyMap = {
|
|
691
|
+
"transcription": "onTranscription",
|
|
692
|
+
"response": "onResponse",
|
|
693
|
+
"audio": "onAudioCallback",
|
|
694
|
+
"visemes": "onVisemesCallback",
|
|
695
|
+
"status": "onStatus",
|
|
696
|
+
"error": "onError"
|
|
697
|
+
};
|
|
698
|
+
const legacyKey = legacyMap[event];
|
|
699
|
+
if (legacyKey && this[legacyKey]) {
|
|
700
|
+
try {
|
|
701
|
+
this[legacyKey](...args);
|
|
702
|
+
} catch (e) {
|
|
703
|
+
console.error(`Error in legacy callback ${legacyKey}:`, e);
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
if (this.listeners[event]) {
|
|
707
|
+
this.listeners[event].forEach((cb) => {
|
|
708
|
+
try {
|
|
709
|
+
cb(...args);
|
|
710
|
+
} catch (e) {
|
|
711
|
+
console.error(`Error in listener for ${event}:`, e);
|
|
712
|
+
}
|
|
713
|
+
});
|
|
679
714
|
}
|
|
680
715
|
}
|
|
681
716
|
onAudio(callback) {
|
|
682
|
-
this.
|
|
717
|
+
this.on("audio", callback);
|
|
683
718
|
}
|
|
684
719
|
onVisemes(callback) {
|
|
685
|
-
this.
|
|
720
|
+
this.on("visemes", callback);
|
|
686
721
|
}
|
|
687
722
|
/**
|
|
688
723
|
* Disconnect from the server
|
|
@@ -761,15 +796,28 @@ var TTSClient = class {
|
|
|
761
796
|
*/
|
|
762
797
|
synthesize(options) {
|
|
763
798
|
return new Promise((resolve, reject) => {
|
|
799
|
+
let activityTimeout;
|
|
800
|
+
let ws;
|
|
801
|
+
let startTime;
|
|
802
|
+
let firstByteReceived = false;
|
|
803
|
+
const refreshTimeout = () => {
|
|
804
|
+
if (activityTimeout) clearTimeout(activityTimeout);
|
|
805
|
+
activityTimeout = setTimeout(() => {
|
|
806
|
+
console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
|
|
807
|
+
if (ws) ws.close();
|
|
808
|
+
resolve();
|
|
809
|
+
}, 2e3);
|
|
810
|
+
};
|
|
764
811
|
try {
|
|
765
812
|
let url = DEFAULT_URLS.TTS;
|
|
766
813
|
if (this.apiKey) {
|
|
767
814
|
const separator = url.includes("?") ? "&" : "?";
|
|
768
815
|
url += `${separator}api_key=${this.apiKey}`;
|
|
769
816
|
}
|
|
770
|
-
|
|
817
|
+
ws = new WebSocket(url);
|
|
771
818
|
ws.binaryType = "arraybuffer";
|
|
772
819
|
ws.onopen = () => {
|
|
820
|
+
refreshTimeout();
|
|
773
821
|
const req = {
|
|
774
822
|
text: options.text,
|
|
775
823
|
voice: options.voice || "F1" /* F1 */,
|
|
@@ -779,9 +827,16 @@ var TTSClient = class {
|
|
|
779
827
|
visemes: options.visemes || false
|
|
780
828
|
};
|
|
781
829
|
ws.send(JSON.stringify(req));
|
|
830
|
+
startTime = Date.now();
|
|
782
831
|
};
|
|
783
832
|
ws.onmessage = async (event) => {
|
|
833
|
+
refreshTimeout();
|
|
784
834
|
if (event.data instanceof ArrayBuffer) {
|
|
835
|
+
if (!firstByteReceived) {
|
|
836
|
+
const ttfb = Date.now() - startTime;
|
|
837
|
+
if (options.onTTFB) options.onTTFB(ttfb);
|
|
838
|
+
firstByteReceived = true;
|
|
839
|
+
}
|
|
785
840
|
if (options.onAudio) options.onAudio(new Uint8Array(event.data));
|
|
786
841
|
} else {
|
|
787
842
|
try {
|
|
@@ -789,18 +844,26 @@ var TTSClient = class {
|
|
|
789
844
|
if (Array.isArray(msg) && options.onVisemes) {
|
|
790
845
|
options.onVisemes(msg);
|
|
791
846
|
}
|
|
847
|
+
if (msg.type === "eos") {
|
|
848
|
+
if (activityTimeout) clearTimeout(activityTimeout);
|
|
849
|
+
ws.close();
|
|
850
|
+
resolve();
|
|
851
|
+
}
|
|
792
852
|
} catch (e) {
|
|
793
853
|
}
|
|
794
854
|
}
|
|
795
855
|
};
|
|
796
856
|
ws.onerror = (err) => {
|
|
857
|
+
if (activityTimeout) clearTimeout(activityTimeout);
|
|
797
858
|
if (options.onError) options.onError(err);
|
|
798
859
|
reject(err);
|
|
799
860
|
};
|
|
800
861
|
ws.onclose = () => {
|
|
862
|
+
if (activityTimeout) clearTimeout(activityTimeout);
|
|
801
863
|
resolve();
|
|
802
864
|
};
|
|
803
865
|
} catch (err) {
|
|
866
|
+
if (activityTimeout) clearTimeout(activityTimeout);
|
|
804
867
|
if (options.onError) options.onError(err);
|
|
805
868
|
reject(err);
|
|
806
869
|
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AUDIO_CONFIG
|
|
3
|
+
} from "./chunk-UI24THO7.mjs";
|
|
4
|
+
|
|
5
|
+
// src/node-audio.ts
|
|
6
|
+
var NodeAudioManager = class {
|
|
7
|
+
speaker = null;
|
|
8
|
+
recorder = null;
|
|
9
|
+
recordingStream = null;
|
|
10
|
+
isMuted = false;
|
|
11
|
+
isListening = false;
|
|
12
|
+
constructor() {
|
|
13
|
+
}
|
|
14
|
+
async init() {
|
|
15
|
+
try {
|
|
16
|
+
const Speaker = await import("speaker").catch(() => null);
|
|
17
|
+
if (!Speaker) {
|
|
18
|
+
console.warn('\u26A0\uFE0F Package "speaker" is missing. Hardware output will be disabled.');
|
|
19
|
+
console.warn("\u{1F449} Run: npm install speaker");
|
|
20
|
+
}
|
|
21
|
+
} catch (e) {
|
|
22
|
+
console.error("Error initializing Node audio:", e);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
async startMicrophone(onAudioInput) {
|
|
26
|
+
if (this.isListening) return;
|
|
27
|
+
try {
|
|
28
|
+
const recorder = await import("node-record-lpcm16").catch(() => null);
|
|
29
|
+
if (!recorder) {
|
|
30
|
+
throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
|
|
31
|
+
}
|
|
32
|
+
console.log("\u{1F3A4} Starting microphone (Node.js)...");
|
|
33
|
+
this.recordingStream = recorder.record({
|
|
34
|
+
sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
|
|
35
|
+
threshold: 0,
|
|
36
|
+
verbose: false,
|
|
37
|
+
recordProgram: "sox"
|
|
38
|
+
// default
|
|
39
|
+
});
|
|
40
|
+
this.recordingStream.stream().on("data", (chunk) => {
|
|
41
|
+
if (!this.isMuted && onAudioInput) {
|
|
42
|
+
onAudioInput(new Uint8Array(chunk));
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
this.isListening = true;
|
|
46
|
+
} catch (e) {
|
|
47
|
+
console.error("Failed to start microphone:", e.message);
|
|
48
|
+
throw e;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
stopMicrophone() {
|
|
52
|
+
if (this.recordingStream) {
|
|
53
|
+
this.recordingStream.stop();
|
|
54
|
+
this.recordingStream = null;
|
|
55
|
+
}
|
|
56
|
+
this.isListening = false;
|
|
57
|
+
}
|
|
58
|
+
async playAudio(pcm16Data) {
|
|
59
|
+
try {
|
|
60
|
+
if (!this.speaker) {
|
|
61
|
+
const Speaker = (await import("speaker")).default;
|
|
62
|
+
this.speaker = new Speaker({
|
|
63
|
+
channels: AUDIO_CONFIG.CHANNELS,
|
|
64
|
+
bitDepth: 16,
|
|
65
|
+
sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
this.speaker.write(Buffer.from(pcm16Data));
|
|
69
|
+
} catch (e) {
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
stopPlayback() {
|
|
73
|
+
if (this.speaker) {
|
|
74
|
+
this.speaker.end();
|
|
75
|
+
this.speaker = null;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
cleanup() {
|
|
79
|
+
this.stopMicrophone();
|
|
80
|
+
this.stopPlayback();
|
|
81
|
+
}
|
|
82
|
+
isMicMuted() {
|
|
83
|
+
return this.isMuted;
|
|
84
|
+
}
|
|
85
|
+
setMuted(muted) {
|
|
86
|
+
this.isMuted = muted;
|
|
87
|
+
}
|
|
88
|
+
getAmplitude() {
|
|
89
|
+
return 0;
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
export {
|
|
93
|
+
NodeAudioManager
|
|
94
|
+
};
|