@micdrop/server 1.7.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,86 +1,183 @@
1
- import WebSocket$1, { WebSocket } from 'ws';
1
+ import EventEmitter from 'eventemitter3';
2
+ import { Readable, PassThrough } from 'stream';
3
+ import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
2
4
 
3
- declare enum CallClientCommands {
5
+ declare class Logger {
6
+ private readonly name;
7
+ constructor(name: string);
8
+ log(...message: any[]): void;
9
+ }
10
+
11
+ declare const MIME_TYPE_TO_EXTENSION: {
12
+ readonly 'audio/wav': "wav";
13
+ readonly 'audio/ogg': "ogg";
14
+ readonly 'audio/mpeg': "mp3";
15
+ readonly 'audio/webm': "webm";
16
+ readonly 'audio/mp4': "mp4";
17
+ readonly 'audio/flac': "flac";
18
+ };
19
+ interface STTEvents {
20
+ Transcript: [string];
21
+ }
22
+ declare abstract class STT extends EventEmitter<STTEvents> {
23
+ protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
24
+ logger?: Logger;
25
+ transcribe(audioStream: Readable): void;
26
+ protected log(...message: any[]): void;
27
+ destroy(): void;
28
+ protected get extension(): string;
29
+ private detectMimeType;
30
+ }
31
+
32
+ /**
33
+ * Abstract class for STT, converting stream to file before transcribing
34
+ */
35
+ declare abstract class FileSTT extends STT {
36
+ abstract transcribeFile(file: File): Promise<string>;
37
+ transcribe(audioStream: Readable): void;
38
+ }
39
+
40
+ declare class MockSTT extends FileSTT {
41
+ private i;
42
+ transcribeFile(file: File): Promise<string>;
43
+ }
44
+
45
+ declare abstract class TTS {
46
+ logger?: Logger;
47
+ abstract speak(textStream: Readable): Readable;
48
+ abstract cancel(): void;
49
+ protected log(...message: any[]): void;
50
+ destroy(): void;
51
+ }
52
+
53
+ declare class MockTTS extends TTS {
54
+ private audioFilePaths;
55
+ constructor(audioFilePaths: string[]);
56
+ speak(textStream: Readable): PassThrough;
57
+ cancel(): void;
58
+ }
59
+
60
+ declare enum MicdropClientCommands {
4
61
  StartSpeaking = "StartSpeaking",
5
62
  StopSpeaking = "StopSpeaking",
6
63
  Mute = "Mute"
7
64
  }
8
- declare enum CallServerCommands {
65
+ declare enum MicdropServerCommands {
9
66
  Message = "Message",
10
67
  CancelLastAssistantMessage = "CancelLastAssistantMessage",
11
68
  CancelLastUserMessage = "CancelLastUserMessage",
12
69
  SkipAnswer = "SkipAnswer",
13
- EnableSpeakerStreaming = "EnableSpeakerStreaming",
14
70
  EndCall = "EndCall"
15
71
  }
16
- interface CallConfig {
17
- systemPrompt: string;
72
+ interface MicdropConfig {
18
73
  firstMessage?: string;
19
- debugLog?: boolean;
20
- debugSaveSpeech?: boolean;
21
- disableTTS?: boolean;
22
- generateAnswer(conversation: Conversation): Promise<string | ConversationMessage>;
23
- speech2Text(audioBlob: Blob, prevMessage?: string): Promise<string>;
24
- text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>;
25
- onMessage?(message: ConversationMessage): void;
26
- onEnd?(call: CallSummary): void;
27
- }
28
- interface CallSummary {
29
- conversation: Conversation;
74
+ generateFirstMessage?: boolean;
75
+ agent: Agent;
76
+ stt: STT;
77
+ tts: TTS;
78
+ onEnd?(call: MicdropCallSummary): void;
79
+ }
80
+ interface MicdropCallSummary {
81
+ conversation: MicdropConversation;
30
82
  duration: number;
31
83
  }
32
- type Conversation = ConversationMessage[];
33
- type AnswerCommands = {
34
- endCall?: boolean;
35
- cancelLastUserMessage?: boolean;
36
- skipAnswer?: boolean;
37
- };
38
- type AnswerMetadata = {
84
+ type MicdropConversation = MicdropConversationMessage[];
85
+ type MicdropAnswerMetadata = {
39
86
  [key: string]: any;
40
87
  };
41
- interface ConversationMessage<Data extends AnswerMetadata = AnswerMetadata> {
88
+ interface MicdropConversationMessage<Data extends MicdropAnswerMetadata = MicdropAnswerMetadata> {
42
89
  role: 'system' | 'user' | 'assistant';
43
90
  content: string;
44
- commands?: AnswerCommands;
45
91
  metadata?: Data;
46
92
  }
93
+ type DeepPartial<T> = T extends object ? {
94
+ [P in keyof T]?: DeepPartial<T[P]>;
95
+ } : T;
47
96
 
48
- interface Processing {
49
- aborted: boolean;
97
+ interface AgentOptions {
98
+ systemPrompt: string;
50
99
  }
51
- declare class CallServer {
52
- socket: WebSocket | null;
53
- config: CallConfig | null;
54
- private startTime;
55
- private lastDebug;
56
- private processing?;
57
- private isSpeaking;
58
- private chunks;
59
- private conversation;
60
- private speakerStreamingEnabled;
61
- constructor(socket: WebSocket, config: CallConfig);
62
- resetConversation(conversation: Conversation): void;
63
- private abortProcessing;
64
- private addMessage;
65
- private sendAudio;
66
- private onClose;
67
- private onMessage;
68
- private onStopSpeaking;
69
- answer(message: string | ConversationMessage, processing?: Processing): Promise<void>;
70
- private log;
100
+ interface AgentEvents {
101
+ Message: [MicdropConversationMessage];
102
+ CancelLastUserMessage: [];
103
+ CancelLastAssistantMessage: [];
104
+ SkipAnswer: [];
105
+ EndCall: [];
106
+ }
107
+ interface AgentAnswerReturn {
108
+ message: Promise<string>;
109
+ stream: Readable;
110
+ }
111
+ interface TextPromise {
112
+ promise: Promise<string>;
113
+ resolve: (value: string) => void;
114
+ reject: (reason?: any) => void;
115
+ }
116
+ declare abstract class Agent<Options extends AgentOptions = AgentOptions> extends EventEmitter {
117
+ protected options: Options;
118
+ logger?: Logger;
119
+ conversation: MicdropConversation;
120
+ constructor(options: Options);
121
+ abstract answer(): AgentAnswerReturn;
122
+ abstract cancel(): void;
123
+ addUserMessage(text: string): void;
124
+ addAssistantMessage(text: string): void;
125
+ protected addMessage(role: 'user' | 'assistant' | 'system', text: string): void;
126
+ protected endCall(): void;
127
+ protected cancelLastUserMessage(): void;
128
+ protected cancelLastAssistantMessage(): void;
129
+ protected skipAnswer(): void;
130
+ protected createTextPromise(): TextPromise;
131
+ protected log(...message: any[]): void;
132
+ destroy(): void;
133
+ }
134
+
135
+ declare class MockAgent extends Agent {
136
+ private i;
137
+ constructor();
138
+ answer(): {
139
+ message: Promise<string>;
140
+ stream: PassThrough;
141
+ };
142
+ cancel(): void;
71
143
  }
72
144
 
73
- declare enum CallErrorCode {
145
+ declare function convertToPCM(audioStream: Readable, sampleRate?: number, bitDepth?: number): PassThrough;
146
+ declare function convertToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
147
+ declare function convertPCMToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
148
+
149
+ declare enum MicdropErrorCode {
74
150
  BadRequest = 4400,
75
151
  Unauthorized = 4401,
76
152
  NotFound = 4404
77
153
  }
78
- declare class CallError extends Error {
154
+ declare class MicdropError extends Error {
79
155
  code: number;
80
156
  constructor(code: number, message: string);
81
157
  }
82
- declare function handleError(socket: WebSocket$1, error: unknown): void;
158
+ declare function handleError(socket: WebSocket, error: unknown): void;
159
+
160
+ declare class MicdropServer {
161
+ socket: WebSocket$1 | null;
162
+ config: MicdropConfig | null;
163
+ logger?: Logger;
164
+ private startTime;
165
+ private currentUserStream?;
166
+ constructor(socket: WebSocket$1, config: MicdropConfig);
167
+ private log;
168
+ private cancel;
169
+ private onClose;
170
+ private onMessage;
171
+ private onMute;
172
+ private onStartSpeaking;
173
+ private onStopSpeaking;
174
+ private onTranscript;
175
+ private sendFirstMessage;
176
+ private answer;
177
+ private speak;
178
+ private sendAudio;
179
+ }
83
180
 
84
- declare function waitForParams<CallParams>(socket: WebSocket, validate: (params: any) => CallParams): Promise<CallParams>;
181
+ declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
85
182
 
86
- export { type AnswerCommands, type AnswerMetadata, CallClientCommands, type CallConfig, CallError, CallErrorCode, CallServer, CallServerCommands, type CallSummary, type Conversation, type ConversationMessage, handleError, waitForParams };
183
+ export { Agent, type AgentAnswerReturn, type AgentEvents, type AgentOptions, type DeepPartial, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationMessage, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type TextPromise, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
package/dist/index.d.ts CHANGED
@@ -1,86 +1,183 @@
1
- import WebSocket$1, { WebSocket } from 'ws';
1
+ import EventEmitter from 'eventemitter3';
2
+ import { Readable, PassThrough } from 'stream';
3
+ import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
2
4
 
3
- declare enum CallClientCommands {
5
+ declare class Logger {
6
+ private readonly name;
7
+ constructor(name: string);
8
+ log(...message: any[]): void;
9
+ }
10
+
11
+ declare const MIME_TYPE_TO_EXTENSION: {
12
+ readonly 'audio/wav': "wav";
13
+ readonly 'audio/ogg': "ogg";
14
+ readonly 'audio/mpeg': "mp3";
15
+ readonly 'audio/webm': "webm";
16
+ readonly 'audio/mp4': "mp4";
17
+ readonly 'audio/flac': "flac";
18
+ };
19
+ interface STTEvents {
20
+ Transcript: [string];
21
+ }
22
+ declare abstract class STT extends EventEmitter<STTEvents> {
23
+ protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
24
+ logger?: Logger;
25
+ transcribe(audioStream: Readable): void;
26
+ protected log(...message: any[]): void;
27
+ destroy(): void;
28
+ protected get extension(): string;
29
+ private detectMimeType;
30
+ }
31
+
32
+ /**
33
+ * Abstract class for STT, converting stream to file before transcribing
34
+ */
35
+ declare abstract class FileSTT extends STT {
36
+ abstract transcribeFile(file: File): Promise<string>;
37
+ transcribe(audioStream: Readable): void;
38
+ }
39
+
40
+ declare class MockSTT extends FileSTT {
41
+ private i;
42
+ transcribeFile(file: File): Promise<string>;
43
+ }
44
+
45
+ declare abstract class TTS {
46
+ logger?: Logger;
47
+ abstract speak(textStream: Readable): Readable;
48
+ abstract cancel(): void;
49
+ protected log(...message: any[]): void;
50
+ destroy(): void;
51
+ }
52
+
53
+ declare class MockTTS extends TTS {
54
+ private audioFilePaths;
55
+ constructor(audioFilePaths: string[]);
56
+ speak(textStream: Readable): PassThrough;
57
+ cancel(): void;
58
+ }
59
+
60
+ declare enum MicdropClientCommands {
4
61
  StartSpeaking = "StartSpeaking",
5
62
  StopSpeaking = "StopSpeaking",
6
63
  Mute = "Mute"
7
64
  }
8
- declare enum CallServerCommands {
65
+ declare enum MicdropServerCommands {
9
66
  Message = "Message",
10
67
  CancelLastAssistantMessage = "CancelLastAssistantMessage",
11
68
  CancelLastUserMessage = "CancelLastUserMessage",
12
69
  SkipAnswer = "SkipAnswer",
13
- EnableSpeakerStreaming = "EnableSpeakerStreaming",
14
70
  EndCall = "EndCall"
15
71
  }
16
- interface CallConfig {
17
- systemPrompt: string;
72
+ interface MicdropConfig {
18
73
  firstMessage?: string;
19
- debugLog?: boolean;
20
- debugSaveSpeech?: boolean;
21
- disableTTS?: boolean;
22
- generateAnswer(conversation: Conversation): Promise<string | ConversationMessage>;
23
- speech2Text(audioBlob: Blob, prevMessage?: string): Promise<string>;
24
- text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>;
25
- onMessage?(message: ConversationMessage): void;
26
- onEnd?(call: CallSummary): void;
27
- }
28
- interface CallSummary {
29
- conversation: Conversation;
74
+ generateFirstMessage?: boolean;
75
+ agent: Agent;
76
+ stt: STT;
77
+ tts: TTS;
78
+ onEnd?(call: MicdropCallSummary): void;
79
+ }
80
+ interface MicdropCallSummary {
81
+ conversation: MicdropConversation;
30
82
  duration: number;
31
83
  }
32
- type Conversation = ConversationMessage[];
33
- type AnswerCommands = {
34
- endCall?: boolean;
35
- cancelLastUserMessage?: boolean;
36
- skipAnswer?: boolean;
37
- };
38
- type AnswerMetadata = {
84
+ type MicdropConversation = MicdropConversationMessage[];
85
+ type MicdropAnswerMetadata = {
39
86
  [key: string]: any;
40
87
  };
41
- interface ConversationMessage<Data extends AnswerMetadata = AnswerMetadata> {
88
+ interface MicdropConversationMessage<Data extends MicdropAnswerMetadata = MicdropAnswerMetadata> {
42
89
  role: 'system' | 'user' | 'assistant';
43
90
  content: string;
44
- commands?: AnswerCommands;
45
91
  metadata?: Data;
46
92
  }
93
+ type DeepPartial<T> = T extends object ? {
94
+ [P in keyof T]?: DeepPartial<T[P]>;
95
+ } : T;
47
96
 
48
- interface Processing {
49
- aborted: boolean;
97
+ interface AgentOptions {
98
+ systemPrompt: string;
50
99
  }
51
- declare class CallServer {
52
- socket: WebSocket | null;
53
- config: CallConfig | null;
54
- private startTime;
55
- private lastDebug;
56
- private processing?;
57
- private isSpeaking;
58
- private chunks;
59
- private conversation;
60
- private speakerStreamingEnabled;
61
- constructor(socket: WebSocket, config: CallConfig);
62
- resetConversation(conversation: Conversation): void;
63
- private abortProcessing;
64
- private addMessage;
65
- private sendAudio;
66
- private onClose;
67
- private onMessage;
68
- private onStopSpeaking;
69
- answer(message: string | ConversationMessage, processing?: Processing): Promise<void>;
70
- private log;
100
+ interface AgentEvents {
101
+ Message: [MicdropConversationMessage];
102
+ CancelLastUserMessage: [];
103
+ CancelLastAssistantMessage: [];
104
+ SkipAnswer: [];
105
+ EndCall: [];
106
+ }
107
+ interface AgentAnswerReturn {
108
+ message: Promise<string>;
109
+ stream: Readable;
110
+ }
111
+ interface TextPromise {
112
+ promise: Promise<string>;
113
+ resolve: (value: string) => void;
114
+ reject: (reason?: any) => void;
115
+ }
116
+ declare abstract class Agent<Options extends AgentOptions = AgentOptions> extends EventEmitter {
117
+ protected options: Options;
118
+ logger?: Logger;
119
+ conversation: MicdropConversation;
120
+ constructor(options: Options);
121
+ abstract answer(): AgentAnswerReturn;
122
+ abstract cancel(): void;
123
+ addUserMessage(text: string): void;
124
+ addAssistantMessage(text: string): void;
125
+ protected addMessage(role: 'user' | 'assistant' | 'system', text: string): void;
126
+ protected endCall(): void;
127
+ protected cancelLastUserMessage(): void;
128
+ protected cancelLastAssistantMessage(): void;
129
+ protected skipAnswer(): void;
130
+ protected createTextPromise(): TextPromise;
131
+ protected log(...message: any[]): void;
132
+ destroy(): void;
133
+ }
134
+
135
+ declare class MockAgent extends Agent {
136
+ private i;
137
+ constructor();
138
+ answer(): {
139
+ message: Promise<string>;
140
+ stream: PassThrough;
141
+ };
142
+ cancel(): void;
71
143
  }
72
144
 
73
- declare enum CallErrorCode {
145
+ declare function convertToPCM(audioStream: Readable, sampleRate?: number, bitDepth?: number): PassThrough;
146
+ declare function convertToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
147
+ declare function convertPCMToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
148
+
149
+ declare enum MicdropErrorCode {
74
150
  BadRequest = 4400,
75
151
  Unauthorized = 4401,
76
152
  NotFound = 4404
77
153
  }
78
- declare class CallError extends Error {
154
+ declare class MicdropError extends Error {
79
155
  code: number;
80
156
  constructor(code: number, message: string);
81
157
  }
82
- declare function handleError(socket: WebSocket$1, error: unknown): void;
158
+ declare function handleError(socket: WebSocket, error: unknown): void;
159
+
160
+ declare class MicdropServer {
161
+ socket: WebSocket$1 | null;
162
+ config: MicdropConfig | null;
163
+ logger?: Logger;
164
+ private startTime;
165
+ private currentUserStream?;
166
+ constructor(socket: WebSocket$1, config: MicdropConfig);
167
+ private log;
168
+ private cancel;
169
+ private onClose;
170
+ private onMessage;
171
+ private onMute;
172
+ private onStartSpeaking;
173
+ private onStopSpeaking;
174
+ private onTranscript;
175
+ private sendFirstMessage;
176
+ private answer;
177
+ private speak;
178
+ private sendAudio;
179
+ }
83
180
 
84
- declare function waitForParams<CallParams>(socket: WebSocket, validate: (params: any) => CallParams): Promise<CallParams>;
181
+ declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
85
182
 
86
- export { type AnswerCommands, type AnswerMetadata, CallClientCommands, type CallConfig, CallError, CallErrorCode, CallServer, CallServerCommands, type CallSummary, type Conversation, type ConversationMessage, handleError, waitForParams };
183
+ export { Agent, type AgentAnswerReturn, type AgentEvents, type AgentOptions, type DeepPartial, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationMessage, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type TextPromise, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };