@micdrop/server 1.7.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -232
- package/dist/index.d.mts +150 -53
- package/dist/index.d.ts +150 -53
- package/dist/index.js +460 -220
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +444 -215
- package/dist/index.mjs.map +1 -1
- package/package.json +7 -3
package/dist/index.d.mts
CHANGED
|
@@ -1,86 +1,183 @@
|
|
|
1
|
-
import
|
|
1
|
+
import EventEmitter from 'eventemitter3';
|
|
2
|
+
import { Readable, PassThrough } from 'stream';
|
|
3
|
+
import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
|
|
2
4
|
|
|
3
|
-
declare
|
|
5
|
+
declare class Logger {
|
|
6
|
+
private readonly name;
|
|
7
|
+
constructor(name: string);
|
|
8
|
+
log(...message: any[]): void;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
declare const MIME_TYPE_TO_EXTENSION: {
|
|
12
|
+
readonly 'audio/wav': "wav";
|
|
13
|
+
readonly 'audio/ogg': "ogg";
|
|
14
|
+
readonly 'audio/mpeg': "mp3";
|
|
15
|
+
readonly 'audio/webm': "webm";
|
|
16
|
+
readonly 'audio/mp4': "mp4";
|
|
17
|
+
readonly 'audio/flac': "flac";
|
|
18
|
+
};
|
|
19
|
+
interface STTEvents {
|
|
20
|
+
Transcript: [string];
|
|
21
|
+
}
|
|
22
|
+
declare abstract class STT extends EventEmitter<STTEvents> {
|
|
23
|
+
protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
|
|
24
|
+
logger?: Logger;
|
|
25
|
+
transcribe(audioStream: Readable): void;
|
|
26
|
+
protected log(...message: any[]): void;
|
|
27
|
+
destroy(): void;
|
|
28
|
+
protected get extension(): string;
|
|
29
|
+
private detectMimeType;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Abstract class for STT, converting stream to file before transcribing
|
|
34
|
+
*/
|
|
35
|
+
declare abstract class FileSTT extends STT {
|
|
36
|
+
abstract transcribeFile(file: File): Promise<string>;
|
|
37
|
+
transcribe(audioStream: Readable): void;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
declare class MockSTT extends FileSTT {
|
|
41
|
+
private i;
|
|
42
|
+
transcribeFile(file: File): Promise<string>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
declare abstract class TTS {
|
|
46
|
+
logger?: Logger;
|
|
47
|
+
abstract speak(textStream: Readable): Readable;
|
|
48
|
+
abstract cancel(): void;
|
|
49
|
+
protected log(...message: any[]): void;
|
|
50
|
+
destroy(): void;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
declare class MockTTS extends TTS {
|
|
54
|
+
private audioFilePaths;
|
|
55
|
+
constructor(audioFilePaths: string[]);
|
|
56
|
+
speak(textStream: Readable): PassThrough;
|
|
57
|
+
cancel(): void;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
declare enum MicdropClientCommands {
|
|
4
61
|
StartSpeaking = "StartSpeaking",
|
|
5
62
|
StopSpeaking = "StopSpeaking",
|
|
6
63
|
Mute = "Mute"
|
|
7
64
|
}
|
|
8
|
-
declare enum
|
|
65
|
+
declare enum MicdropServerCommands {
|
|
9
66
|
Message = "Message",
|
|
10
67
|
CancelLastAssistantMessage = "CancelLastAssistantMessage",
|
|
11
68
|
CancelLastUserMessage = "CancelLastUserMessage",
|
|
12
69
|
SkipAnswer = "SkipAnswer",
|
|
13
|
-
EnableSpeakerStreaming = "EnableSpeakerStreaming",
|
|
14
70
|
EndCall = "EndCall"
|
|
15
71
|
}
|
|
16
|
-
interface
|
|
17
|
-
systemPrompt: string;
|
|
72
|
+
interface MicdropConfig {
|
|
18
73
|
firstMessage?: string;
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
}
|
|
28
|
-
interface CallSummary {
|
|
29
|
-
conversation: Conversation;
|
|
74
|
+
generateFirstMessage?: boolean;
|
|
75
|
+
agent: Agent;
|
|
76
|
+
stt: STT;
|
|
77
|
+
tts: TTS;
|
|
78
|
+
onEnd?(call: MicdropCallSummary): void;
|
|
79
|
+
}
|
|
80
|
+
interface MicdropCallSummary {
|
|
81
|
+
conversation: MicdropConversation;
|
|
30
82
|
duration: number;
|
|
31
83
|
}
|
|
32
|
-
type
|
|
33
|
-
type
|
|
34
|
-
endCall?: boolean;
|
|
35
|
-
cancelLastUserMessage?: boolean;
|
|
36
|
-
skipAnswer?: boolean;
|
|
37
|
-
};
|
|
38
|
-
type AnswerMetadata = {
|
|
84
|
+
type MicdropConversation = MicdropConversationMessage[];
|
|
85
|
+
type MicdropAnswerMetadata = {
|
|
39
86
|
[key: string]: any;
|
|
40
87
|
};
|
|
41
|
-
interface
|
|
88
|
+
interface MicdropConversationMessage<Data extends MicdropAnswerMetadata = MicdropAnswerMetadata> {
|
|
42
89
|
role: 'system' | 'user' | 'assistant';
|
|
43
90
|
content: string;
|
|
44
|
-
commands?: AnswerCommands;
|
|
45
91
|
metadata?: Data;
|
|
46
92
|
}
|
|
93
|
+
type DeepPartial<T> = T extends object ? {
|
|
94
|
+
[P in keyof T]?: DeepPartial<T[P]>;
|
|
95
|
+
} : T;
|
|
47
96
|
|
|
48
|
-
interface
|
|
49
|
-
|
|
97
|
+
interface AgentOptions {
|
|
98
|
+
systemPrompt: string;
|
|
50
99
|
}
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
100
|
+
interface AgentEvents {
|
|
101
|
+
Message: [MicdropConversationMessage];
|
|
102
|
+
CancelLastUserMessage: [];
|
|
103
|
+
CancelLastAssistantMessage: [];
|
|
104
|
+
SkipAnswer: [];
|
|
105
|
+
EndCall: [];
|
|
106
|
+
}
|
|
107
|
+
interface AgentAnswerReturn {
|
|
108
|
+
message: Promise<string>;
|
|
109
|
+
stream: Readable;
|
|
110
|
+
}
|
|
111
|
+
interface TextPromise {
|
|
112
|
+
promise: Promise<string>;
|
|
113
|
+
resolve: (value: string) => void;
|
|
114
|
+
reject: (reason?: any) => void;
|
|
115
|
+
}
|
|
116
|
+
declare abstract class Agent<Options extends AgentOptions = AgentOptions> extends EventEmitter {
|
|
117
|
+
protected options: Options;
|
|
118
|
+
logger?: Logger;
|
|
119
|
+
conversation: MicdropConversation;
|
|
120
|
+
constructor(options: Options);
|
|
121
|
+
abstract answer(): AgentAnswerReturn;
|
|
122
|
+
abstract cancel(): void;
|
|
123
|
+
addUserMessage(text: string): void;
|
|
124
|
+
addAssistantMessage(text: string): void;
|
|
125
|
+
protected addMessage(role: 'user' | 'assistant' | 'system', text: string): void;
|
|
126
|
+
protected endCall(): void;
|
|
127
|
+
protected cancelLastUserMessage(): void;
|
|
128
|
+
protected cancelLastAssistantMessage(): void;
|
|
129
|
+
protected skipAnswer(): void;
|
|
130
|
+
protected createTextPromise(): TextPromise;
|
|
131
|
+
protected log(...message: any[]): void;
|
|
132
|
+
destroy(): void;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
declare class MockAgent extends Agent {
|
|
136
|
+
private i;
|
|
137
|
+
constructor();
|
|
138
|
+
answer(): {
|
|
139
|
+
message: Promise<string>;
|
|
140
|
+
stream: PassThrough;
|
|
141
|
+
};
|
|
142
|
+
cancel(): void;
|
|
71
143
|
}
|
|
72
144
|
|
|
73
|
-
declare
|
|
145
|
+
declare function convertToPCM(audioStream: Readable, sampleRate?: number, bitDepth?: number): PassThrough;
|
|
146
|
+
declare function convertToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
|
|
147
|
+
declare function convertPCMToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
|
|
148
|
+
|
|
149
|
+
declare enum MicdropErrorCode {
|
|
74
150
|
BadRequest = 4400,
|
|
75
151
|
Unauthorized = 4401,
|
|
76
152
|
NotFound = 4404
|
|
77
153
|
}
|
|
78
|
-
declare class
|
|
154
|
+
declare class MicdropError extends Error {
|
|
79
155
|
code: number;
|
|
80
156
|
constructor(code: number, message: string);
|
|
81
157
|
}
|
|
82
|
-
declare function handleError(socket: WebSocket
|
|
158
|
+
declare function handleError(socket: WebSocket, error: unknown): void;
|
|
159
|
+
|
|
160
|
+
declare class MicdropServer {
|
|
161
|
+
socket: WebSocket$1 | null;
|
|
162
|
+
config: MicdropConfig | null;
|
|
163
|
+
logger?: Logger;
|
|
164
|
+
private startTime;
|
|
165
|
+
private currentUserStream?;
|
|
166
|
+
constructor(socket: WebSocket$1, config: MicdropConfig);
|
|
167
|
+
private log;
|
|
168
|
+
private cancel;
|
|
169
|
+
private onClose;
|
|
170
|
+
private onMessage;
|
|
171
|
+
private onMute;
|
|
172
|
+
private onStartSpeaking;
|
|
173
|
+
private onStopSpeaking;
|
|
174
|
+
private onTranscript;
|
|
175
|
+
private sendFirstMessage;
|
|
176
|
+
private answer;
|
|
177
|
+
private speak;
|
|
178
|
+
private sendAudio;
|
|
179
|
+
}
|
|
83
180
|
|
|
84
|
-
declare function waitForParams<CallParams>(socket: WebSocket, validate: (params: any) => CallParams): Promise<CallParams>;
|
|
181
|
+
declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
|
|
85
182
|
|
|
86
|
-
export { type
|
|
183
|
+
export { Agent, type AgentAnswerReturn, type AgentEvents, type AgentOptions, type DeepPartial, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationMessage, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type TextPromise, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,86 +1,183 @@
|
|
|
1
|
-
import
|
|
1
|
+
import EventEmitter from 'eventemitter3';
|
|
2
|
+
import { Readable, PassThrough } from 'stream';
|
|
3
|
+
import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
|
|
2
4
|
|
|
3
|
-
declare
|
|
5
|
+
declare class Logger {
|
|
6
|
+
private readonly name;
|
|
7
|
+
constructor(name: string);
|
|
8
|
+
log(...message: any[]): void;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
declare const MIME_TYPE_TO_EXTENSION: {
|
|
12
|
+
readonly 'audio/wav': "wav";
|
|
13
|
+
readonly 'audio/ogg': "ogg";
|
|
14
|
+
readonly 'audio/mpeg': "mp3";
|
|
15
|
+
readonly 'audio/webm': "webm";
|
|
16
|
+
readonly 'audio/mp4': "mp4";
|
|
17
|
+
readonly 'audio/flac': "flac";
|
|
18
|
+
};
|
|
19
|
+
interface STTEvents {
|
|
20
|
+
Transcript: [string];
|
|
21
|
+
}
|
|
22
|
+
declare abstract class STT extends EventEmitter<STTEvents> {
|
|
23
|
+
protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
|
|
24
|
+
logger?: Logger;
|
|
25
|
+
transcribe(audioStream: Readable): void;
|
|
26
|
+
protected log(...message: any[]): void;
|
|
27
|
+
destroy(): void;
|
|
28
|
+
protected get extension(): string;
|
|
29
|
+
private detectMimeType;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Abstract class for STT, converting stream to file before transcribing
|
|
34
|
+
*/
|
|
35
|
+
declare abstract class FileSTT extends STT {
|
|
36
|
+
abstract transcribeFile(file: File): Promise<string>;
|
|
37
|
+
transcribe(audioStream: Readable): void;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
declare class MockSTT extends FileSTT {
|
|
41
|
+
private i;
|
|
42
|
+
transcribeFile(file: File): Promise<string>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
declare abstract class TTS {
|
|
46
|
+
logger?: Logger;
|
|
47
|
+
abstract speak(textStream: Readable): Readable;
|
|
48
|
+
abstract cancel(): void;
|
|
49
|
+
protected log(...message: any[]): void;
|
|
50
|
+
destroy(): void;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
declare class MockTTS extends TTS {
|
|
54
|
+
private audioFilePaths;
|
|
55
|
+
constructor(audioFilePaths: string[]);
|
|
56
|
+
speak(textStream: Readable): PassThrough;
|
|
57
|
+
cancel(): void;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
declare enum MicdropClientCommands {
|
|
4
61
|
StartSpeaking = "StartSpeaking",
|
|
5
62
|
StopSpeaking = "StopSpeaking",
|
|
6
63
|
Mute = "Mute"
|
|
7
64
|
}
|
|
8
|
-
declare enum
|
|
65
|
+
declare enum MicdropServerCommands {
|
|
9
66
|
Message = "Message",
|
|
10
67
|
CancelLastAssistantMessage = "CancelLastAssistantMessage",
|
|
11
68
|
CancelLastUserMessage = "CancelLastUserMessage",
|
|
12
69
|
SkipAnswer = "SkipAnswer",
|
|
13
|
-
EnableSpeakerStreaming = "EnableSpeakerStreaming",
|
|
14
70
|
EndCall = "EndCall"
|
|
15
71
|
}
|
|
16
|
-
interface
|
|
17
|
-
systemPrompt: string;
|
|
72
|
+
interface MicdropConfig {
|
|
18
73
|
firstMessage?: string;
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
}
|
|
28
|
-
interface CallSummary {
|
|
29
|
-
conversation: Conversation;
|
|
74
|
+
generateFirstMessage?: boolean;
|
|
75
|
+
agent: Agent;
|
|
76
|
+
stt: STT;
|
|
77
|
+
tts: TTS;
|
|
78
|
+
onEnd?(call: MicdropCallSummary): void;
|
|
79
|
+
}
|
|
80
|
+
interface MicdropCallSummary {
|
|
81
|
+
conversation: MicdropConversation;
|
|
30
82
|
duration: number;
|
|
31
83
|
}
|
|
32
|
-
type
|
|
33
|
-
type
|
|
34
|
-
endCall?: boolean;
|
|
35
|
-
cancelLastUserMessage?: boolean;
|
|
36
|
-
skipAnswer?: boolean;
|
|
37
|
-
};
|
|
38
|
-
type AnswerMetadata = {
|
|
84
|
+
type MicdropConversation = MicdropConversationMessage[];
|
|
85
|
+
type MicdropAnswerMetadata = {
|
|
39
86
|
[key: string]: any;
|
|
40
87
|
};
|
|
41
|
-
interface
|
|
88
|
+
interface MicdropConversationMessage<Data extends MicdropAnswerMetadata = MicdropAnswerMetadata> {
|
|
42
89
|
role: 'system' | 'user' | 'assistant';
|
|
43
90
|
content: string;
|
|
44
|
-
commands?: AnswerCommands;
|
|
45
91
|
metadata?: Data;
|
|
46
92
|
}
|
|
93
|
+
type DeepPartial<T> = T extends object ? {
|
|
94
|
+
[P in keyof T]?: DeepPartial<T[P]>;
|
|
95
|
+
} : T;
|
|
47
96
|
|
|
48
|
-
interface
|
|
49
|
-
|
|
97
|
+
interface AgentOptions {
|
|
98
|
+
systemPrompt: string;
|
|
50
99
|
}
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
100
|
+
interface AgentEvents {
|
|
101
|
+
Message: [MicdropConversationMessage];
|
|
102
|
+
CancelLastUserMessage: [];
|
|
103
|
+
CancelLastAssistantMessage: [];
|
|
104
|
+
SkipAnswer: [];
|
|
105
|
+
EndCall: [];
|
|
106
|
+
}
|
|
107
|
+
interface AgentAnswerReturn {
|
|
108
|
+
message: Promise<string>;
|
|
109
|
+
stream: Readable;
|
|
110
|
+
}
|
|
111
|
+
interface TextPromise {
|
|
112
|
+
promise: Promise<string>;
|
|
113
|
+
resolve: (value: string) => void;
|
|
114
|
+
reject: (reason?: any) => void;
|
|
115
|
+
}
|
|
116
|
+
declare abstract class Agent<Options extends AgentOptions = AgentOptions> extends EventEmitter {
|
|
117
|
+
protected options: Options;
|
|
118
|
+
logger?: Logger;
|
|
119
|
+
conversation: MicdropConversation;
|
|
120
|
+
constructor(options: Options);
|
|
121
|
+
abstract answer(): AgentAnswerReturn;
|
|
122
|
+
abstract cancel(): void;
|
|
123
|
+
addUserMessage(text: string): void;
|
|
124
|
+
addAssistantMessage(text: string): void;
|
|
125
|
+
protected addMessage(role: 'user' | 'assistant' | 'system', text: string): void;
|
|
126
|
+
protected endCall(): void;
|
|
127
|
+
protected cancelLastUserMessage(): void;
|
|
128
|
+
protected cancelLastAssistantMessage(): void;
|
|
129
|
+
protected skipAnswer(): void;
|
|
130
|
+
protected createTextPromise(): TextPromise;
|
|
131
|
+
protected log(...message: any[]): void;
|
|
132
|
+
destroy(): void;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
declare class MockAgent extends Agent {
|
|
136
|
+
private i;
|
|
137
|
+
constructor();
|
|
138
|
+
answer(): {
|
|
139
|
+
message: Promise<string>;
|
|
140
|
+
stream: PassThrough;
|
|
141
|
+
};
|
|
142
|
+
cancel(): void;
|
|
71
143
|
}
|
|
72
144
|
|
|
73
|
-
declare
|
|
145
|
+
declare function convertToPCM(audioStream: Readable, sampleRate?: number, bitDepth?: number): PassThrough;
|
|
146
|
+
declare function convertToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
|
|
147
|
+
declare function convertPCMToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
|
|
148
|
+
|
|
149
|
+
declare enum MicdropErrorCode {
|
|
74
150
|
BadRequest = 4400,
|
|
75
151
|
Unauthorized = 4401,
|
|
76
152
|
NotFound = 4404
|
|
77
153
|
}
|
|
78
|
-
declare class
|
|
154
|
+
declare class MicdropError extends Error {
|
|
79
155
|
code: number;
|
|
80
156
|
constructor(code: number, message: string);
|
|
81
157
|
}
|
|
82
|
-
declare function handleError(socket: WebSocket
|
|
158
|
+
declare function handleError(socket: WebSocket, error: unknown): void;
|
|
159
|
+
|
|
160
|
+
declare class MicdropServer {
|
|
161
|
+
socket: WebSocket$1 | null;
|
|
162
|
+
config: MicdropConfig | null;
|
|
163
|
+
logger?: Logger;
|
|
164
|
+
private startTime;
|
|
165
|
+
private currentUserStream?;
|
|
166
|
+
constructor(socket: WebSocket$1, config: MicdropConfig);
|
|
167
|
+
private log;
|
|
168
|
+
private cancel;
|
|
169
|
+
private onClose;
|
|
170
|
+
private onMessage;
|
|
171
|
+
private onMute;
|
|
172
|
+
private onStartSpeaking;
|
|
173
|
+
private onStopSpeaking;
|
|
174
|
+
private onTranscript;
|
|
175
|
+
private sendFirstMessage;
|
|
176
|
+
private answer;
|
|
177
|
+
private speak;
|
|
178
|
+
private sendAudio;
|
|
179
|
+
}
|
|
83
180
|
|
|
84
|
-
declare function waitForParams<CallParams>(socket: WebSocket, validate: (params: any) => CallParams): Promise<CallParams>;
|
|
181
|
+
declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
|
|
85
182
|
|
|
86
|
-
export { type
|
|
183
|
+
export { Agent, type AgentAnswerReturn, type AgentEvents, type AgentOptions, type DeepPartial, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationMessage, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type TextPromise, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
|