@micdrop/server 2.0.6 → 2.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +126 -63
- package/dist/index.d.ts +126 -63
- package/dist/index.js +154 -16
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +148 -16
- package/dist/index.mjs.map +1 -1
- package/package.json +3 -2
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { EventEmitter } from 'eventemitter3';
|
|
2
2
|
import { Readable, PassThrough } from 'stream';
|
|
3
|
+
import { z } from 'zod';
|
|
3
4
|
import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
|
|
4
5
|
|
|
5
6
|
declare class Logger {
|
|
@@ -8,55 +9,6 @@ declare class Logger {
|
|
|
8
9
|
log(...message: any[]): void;
|
|
9
10
|
}
|
|
10
11
|
|
|
11
|
-
declare const MIME_TYPE_TO_EXTENSION: {
|
|
12
|
-
readonly 'audio/wav': "wav";
|
|
13
|
-
readonly 'audio/ogg': "ogg";
|
|
14
|
-
readonly 'audio/mpeg': "mp3";
|
|
15
|
-
readonly 'audio/webm': "webm";
|
|
16
|
-
readonly 'audio/mp4': "mp4";
|
|
17
|
-
readonly 'audio/flac': "flac";
|
|
18
|
-
};
|
|
19
|
-
interface STTEvents {
|
|
20
|
-
Transcript: [string];
|
|
21
|
-
}
|
|
22
|
-
declare abstract class STT extends EventEmitter<STTEvents> {
|
|
23
|
-
protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
|
|
24
|
-
logger?: Logger;
|
|
25
|
-
transcribe(audioStream: Readable): void;
|
|
26
|
-
protected log(...message: any[]): void;
|
|
27
|
-
destroy(): void;
|
|
28
|
-
protected get extension(): string;
|
|
29
|
-
private detectMimeType;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Abstract class for STT, converting stream to file before transcribing
|
|
34
|
-
*/
|
|
35
|
-
declare abstract class FileSTT extends STT {
|
|
36
|
-
abstract transcribeFile(file: File): Promise<string>;
|
|
37
|
-
transcribe(audioStream: Readable): void;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
declare class MockSTT extends FileSTT {
|
|
41
|
-
private i;
|
|
42
|
-
transcribeFile(file: File): Promise<string>;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
declare abstract class TTS {
|
|
46
|
-
logger?: Logger;
|
|
47
|
-
abstract speak(textStream: Readable): Readable;
|
|
48
|
-
abstract cancel(): void;
|
|
49
|
-
protected log(...message: any[]): void;
|
|
50
|
-
destroy(): void;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
declare class MockTTS extends TTS {
|
|
54
|
-
private audioFilePaths;
|
|
55
|
-
constructor(audioFilePaths: string[]);
|
|
56
|
-
speak(textStream: Readable): PassThrough;
|
|
57
|
-
cancel(): void;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
12
|
declare enum MicdropClientCommands {
|
|
61
13
|
StartSpeaking = "StartSpeaking",
|
|
62
14
|
StopSpeaking = "StopSpeaking",
|
|
@@ -64,25 +16,17 @@ declare enum MicdropClientCommands {
|
|
|
64
16
|
}
|
|
65
17
|
declare enum MicdropServerCommands {
|
|
66
18
|
Message = "Message",
|
|
67
|
-
CancelLastAssistantMessage = "CancelLastAssistantMessage",
|
|
68
19
|
CancelLastUserMessage = "CancelLastUserMessage",
|
|
69
20
|
SkipAnswer = "SkipAnswer",
|
|
70
21
|
EndCall = "EndCall",
|
|
71
22
|
ToolCall = "ToolCall"
|
|
72
23
|
}
|
|
73
|
-
interface MicdropConfig {
|
|
74
|
-
firstMessage?: string;
|
|
75
|
-
generateFirstMessage?: boolean;
|
|
76
|
-
agent: Agent;
|
|
77
|
-
stt: STT;
|
|
78
|
-
tts: TTS;
|
|
79
|
-
onEnd?(call: MicdropCallSummary): void;
|
|
80
|
-
}
|
|
81
24
|
interface MicdropCallSummary {
|
|
82
25
|
conversation: MicdropConversation;
|
|
83
26
|
duration: number;
|
|
84
27
|
}
|
|
85
|
-
type
|
|
28
|
+
type MicdropConversationItem = MicdropConversationMessage | MicdropConversationToolCall | MicdropConversationToolResult;
|
|
29
|
+
type MicdropConversation = Array<MicdropConversationItem>;
|
|
86
30
|
type MicdropAnswerMetadata = {
|
|
87
31
|
[key: string]: any;
|
|
88
32
|
};
|
|
@@ -91,6 +35,18 @@ interface MicdropConversationMessage<Data extends MicdropAnswerMetadata = Micdro
|
|
|
91
35
|
content: string;
|
|
92
36
|
metadata?: Data;
|
|
93
37
|
}
|
|
38
|
+
interface MicdropConversationToolCall {
|
|
39
|
+
role: 'tool_call';
|
|
40
|
+
toolCallId: string;
|
|
41
|
+
toolName: string;
|
|
42
|
+
parameters: string;
|
|
43
|
+
}
|
|
44
|
+
interface MicdropConversationToolResult {
|
|
45
|
+
role: 'tool_result';
|
|
46
|
+
toolCallId: string;
|
|
47
|
+
toolName: string;
|
|
48
|
+
output: string;
|
|
49
|
+
}
|
|
94
50
|
interface MicdropToolCall {
|
|
95
51
|
name: string;
|
|
96
52
|
parameters: any;
|
|
@@ -100,31 +56,80 @@ type DeepPartial<T> = T extends object ? {
|
|
|
100
56
|
[P in keyof T]?: DeepPartial<T[P]>;
|
|
101
57
|
} : T;
|
|
102
58
|
|
|
59
|
+
interface Tool<Schema extends z.ZodObject = z.ZodObject> {
|
|
60
|
+
name: string;
|
|
61
|
+
description: string;
|
|
62
|
+
inputSchema?: Schema;
|
|
63
|
+
execute?: (input: z.infer<Schema>) => any | Promise<any>;
|
|
64
|
+
skipAnswer?: boolean;
|
|
65
|
+
emitOutput?: boolean;
|
|
66
|
+
}
|
|
67
|
+
declare const AUTO_END_CALL_TOOL_NAME = "end_call";
|
|
68
|
+
declare const AUTO_END_CALL_PROMPT = "Call this tool only if user asks to end the call";
|
|
69
|
+
declare const AUTO_SEMANTIC_TURN_TOOL_NAME = "semantic_turn";
|
|
70
|
+
declare const AUTO_SEMANTIC_TURN_PROMPT = "Call this tool only if last user message is obviously an incomplete sentence that you need to wait for the end before answering";
|
|
71
|
+
declare const AUTO_IGNORE_USER_NOISE_TOOL_NAME = "ignore_user_noise";
|
|
72
|
+
declare const AUTO_IGNORE_USER_NOISE_PROMPT = "Call this tool only if last user message is just an interjection or a sound that expresses emotion, hesitation, or reaction (ex: \"Uh\", \"Ahem\", \"Hmm\", \"Ah\") but doesn't carry any clear meaning like agreeing, refusing, or commanding";
|
|
73
|
+
|
|
103
74
|
interface AgentOptions {
|
|
104
75
|
systemPrompt: string;
|
|
76
|
+
autoEndCall?: boolean | string;
|
|
77
|
+
autoSemanticTurn?: boolean | string;
|
|
78
|
+
autoIgnoreUserNoise?: boolean | string;
|
|
79
|
+
extract?: ExtractJsonOptions | ExtractTagOptions;
|
|
105
80
|
}
|
|
106
81
|
interface AgentEvents {
|
|
107
|
-
Message: [
|
|
82
|
+
Message: [MicdropConversationItem];
|
|
108
83
|
CancelLastUserMessage: [];
|
|
109
|
-
CancelLastAssistantMessage: [];
|
|
110
84
|
SkipAnswer: [];
|
|
111
85
|
EndCall: [];
|
|
112
86
|
ToolCall: [MicdropToolCall];
|
|
113
87
|
}
|
|
88
|
+
interface ExtractOptions {
|
|
89
|
+
callback?: (value: string) => void;
|
|
90
|
+
saveInMetadata?: boolean;
|
|
91
|
+
}
|
|
92
|
+
interface ExtractJsonOptions extends ExtractOptions {
|
|
93
|
+
json: true;
|
|
94
|
+
callback?: (value: any) => void;
|
|
95
|
+
}
|
|
96
|
+
interface ExtractTagOptions extends ExtractOptions {
|
|
97
|
+
startTag: string;
|
|
98
|
+
endTag: string;
|
|
99
|
+
}
|
|
114
100
|
declare abstract class Agent<Options extends AgentOptions = AgentOptions> extends EventEmitter<AgentEvents> {
|
|
115
101
|
protected options: Options;
|
|
116
102
|
logger?: Logger;
|
|
117
103
|
conversation: MicdropConversation;
|
|
104
|
+
protected tools: Tool[];
|
|
118
105
|
constructor(options: Options);
|
|
119
106
|
abstract answer(): Readable;
|
|
120
107
|
abstract cancel(): void;
|
|
121
108
|
addUserMessage(text: string, metadata?: MicdropAnswerMetadata): void;
|
|
122
109
|
addAssistantMessage(text: string, metadata?: MicdropAnswerMetadata): void;
|
|
110
|
+
addTool<Schema extends z.ZodObject>(tool: Tool<Schema>): void;
|
|
111
|
+
removeTool(name: string): void;
|
|
112
|
+
getTool(name: string): Tool | undefined;
|
|
123
113
|
protected addMessage(role: 'user' | 'assistant' | 'system', text: string, metadata?: MicdropAnswerMetadata): void;
|
|
114
|
+
protected addToolMessage(message: MicdropConversationToolCall | MicdropConversationToolResult): void;
|
|
124
115
|
protected endCall(): void;
|
|
125
116
|
protected cancelLastUserMessage(): void;
|
|
126
|
-
protected cancelLastAssistantMessage(): void;
|
|
127
117
|
protected skipAnswer(): void;
|
|
118
|
+
protected getDefaultTools(): Tool<z.ZodObject<z.core.$ZodLooseShape, z.core.$strip>>[];
|
|
119
|
+
protected executeTool(toolCall: MicdropConversationToolCall): Promise<{
|
|
120
|
+
output: any;
|
|
121
|
+
skipAnswer: boolean | undefined;
|
|
122
|
+
} | {
|
|
123
|
+
output: {
|
|
124
|
+
error: any;
|
|
125
|
+
};
|
|
126
|
+
skipAnswer?: undefined;
|
|
127
|
+
}>;
|
|
128
|
+
protected getExtractOptions(): ExtractTagOptions | undefined;
|
|
129
|
+
protected extract(message: string): {
|
|
130
|
+
message: string;
|
|
131
|
+
metadata: MicdropAnswerMetadata | undefined;
|
|
132
|
+
};
|
|
128
133
|
protected log(...message: any[]): void;
|
|
129
134
|
destroy(): void;
|
|
130
135
|
}
|
|
@@ -151,11 +156,69 @@ declare class MicdropError extends Error {
|
|
|
151
156
|
}
|
|
152
157
|
declare function handleError(socket: WebSocket, error: unknown): void;
|
|
153
158
|
|
|
159
|
+
declare const MIME_TYPE_TO_EXTENSION: {
|
|
160
|
+
readonly 'audio/wav': "wav";
|
|
161
|
+
readonly 'audio/ogg': "ogg";
|
|
162
|
+
readonly 'audio/mpeg': "mp3";
|
|
163
|
+
readonly 'audio/webm': "webm";
|
|
164
|
+
readonly 'audio/mp4': "mp4";
|
|
165
|
+
readonly 'audio/flac': "flac";
|
|
166
|
+
};
|
|
167
|
+
interface STTEvents {
|
|
168
|
+
Transcript: [string];
|
|
169
|
+
}
|
|
170
|
+
declare abstract class STT extends EventEmitter<STTEvents> {
|
|
171
|
+
protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
|
|
172
|
+
logger?: Logger;
|
|
173
|
+
transcribe(audioStream: Readable): void;
|
|
174
|
+
protected log(...message: any[]): void;
|
|
175
|
+
destroy(): void;
|
|
176
|
+
protected get extension(): string;
|
|
177
|
+
private detectMimeType;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Abstract class for STT, converting stream to file before transcribing
|
|
182
|
+
*/
|
|
183
|
+
declare abstract class FileSTT extends STT {
|
|
184
|
+
abstract transcribeFile(file: File): Promise<string>;
|
|
185
|
+
transcribe(audioStream: Readable): void;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
declare class MockSTT extends FileSTT {
|
|
189
|
+
private i;
|
|
190
|
+
transcribeFile(file: File): Promise<string>;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
declare abstract class TTS {
|
|
194
|
+
logger?: Logger;
|
|
195
|
+
abstract speak(textStream: Readable): Readable;
|
|
196
|
+
abstract cancel(): void;
|
|
197
|
+
protected log(...message: any[]): void;
|
|
198
|
+
destroy(): void;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
declare class MockTTS extends TTS {
|
|
202
|
+
private audioFilePaths;
|
|
203
|
+
constructor(audioFilePaths: string[]);
|
|
204
|
+
speak(textStream: Readable): PassThrough;
|
|
205
|
+
cancel(): void;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
interface MicdropConfig {
|
|
209
|
+
firstMessage?: string;
|
|
210
|
+
generateFirstMessage?: boolean;
|
|
211
|
+
agent: Agent;
|
|
212
|
+
stt: STT;
|
|
213
|
+
tts: TTS;
|
|
214
|
+
onEnd?(call: MicdropCallSummary): void;
|
|
215
|
+
}
|
|
154
216
|
declare class MicdropServer {
|
|
155
217
|
socket: WebSocket$1 | null;
|
|
156
218
|
config: MicdropConfig | null;
|
|
157
219
|
logger?: Logger;
|
|
158
220
|
private startTime;
|
|
221
|
+
private lastMessageSpeeched?;
|
|
159
222
|
private currentUserStream?;
|
|
160
223
|
constructor(socket: WebSocket$1, config: MicdropConfig);
|
|
161
224
|
private log;
|
|
@@ -174,4 +237,4 @@ declare class MicdropServer {
|
|
|
174
237
|
|
|
175
238
|
declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
|
|
176
239
|
|
|
177
|
-
export { Agent, type AgentEvents, type AgentOptions, type DeepPartial, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationMessage, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
|
|
240
|
+
export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type Tool, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { EventEmitter } from 'eventemitter3';
|
|
2
2
|
import { Readable, PassThrough } from 'stream';
|
|
3
|
+
import { z } from 'zod';
|
|
3
4
|
import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
|
|
4
5
|
|
|
5
6
|
declare class Logger {
|
|
@@ -8,55 +9,6 @@ declare class Logger {
|
|
|
8
9
|
log(...message: any[]): void;
|
|
9
10
|
}
|
|
10
11
|
|
|
11
|
-
declare const MIME_TYPE_TO_EXTENSION: {
|
|
12
|
-
readonly 'audio/wav': "wav";
|
|
13
|
-
readonly 'audio/ogg': "ogg";
|
|
14
|
-
readonly 'audio/mpeg': "mp3";
|
|
15
|
-
readonly 'audio/webm': "webm";
|
|
16
|
-
readonly 'audio/mp4': "mp4";
|
|
17
|
-
readonly 'audio/flac': "flac";
|
|
18
|
-
};
|
|
19
|
-
interface STTEvents {
|
|
20
|
-
Transcript: [string];
|
|
21
|
-
}
|
|
22
|
-
declare abstract class STT extends EventEmitter<STTEvents> {
|
|
23
|
-
protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
|
|
24
|
-
logger?: Logger;
|
|
25
|
-
transcribe(audioStream: Readable): void;
|
|
26
|
-
protected log(...message: any[]): void;
|
|
27
|
-
destroy(): void;
|
|
28
|
-
protected get extension(): string;
|
|
29
|
-
private detectMimeType;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Abstract class for STT, converting stream to file before transcribing
|
|
34
|
-
*/
|
|
35
|
-
declare abstract class FileSTT extends STT {
|
|
36
|
-
abstract transcribeFile(file: File): Promise<string>;
|
|
37
|
-
transcribe(audioStream: Readable): void;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
declare class MockSTT extends FileSTT {
|
|
41
|
-
private i;
|
|
42
|
-
transcribeFile(file: File): Promise<string>;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
declare abstract class TTS {
|
|
46
|
-
logger?: Logger;
|
|
47
|
-
abstract speak(textStream: Readable): Readable;
|
|
48
|
-
abstract cancel(): void;
|
|
49
|
-
protected log(...message: any[]): void;
|
|
50
|
-
destroy(): void;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
declare class MockTTS extends TTS {
|
|
54
|
-
private audioFilePaths;
|
|
55
|
-
constructor(audioFilePaths: string[]);
|
|
56
|
-
speak(textStream: Readable): PassThrough;
|
|
57
|
-
cancel(): void;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
12
|
declare enum MicdropClientCommands {
|
|
61
13
|
StartSpeaking = "StartSpeaking",
|
|
62
14
|
StopSpeaking = "StopSpeaking",
|
|
@@ -64,25 +16,17 @@ declare enum MicdropClientCommands {
|
|
|
64
16
|
}
|
|
65
17
|
declare enum MicdropServerCommands {
|
|
66
18
|
Message = "Message",
|
|
67
|
-
CancelLastAssistantMessage = "CancelLastAssistantMessage",
|
|
68
19
|
CancelLastUserMessage = "CancelLastUserMessage",
|
|
69
20
|
SkipAnswer = "SkipAnswer",
|
|
70
21
|
EndCall = "EndCall",
|
|
71
22
|
ToolCall = "ToolCall"
|
|
72
23
|
}
|
|
73
|
-
interface MicdropConfig {
|
|
74
|
-
firstMessage?: string;
|
|
75
|
-
generateFirstMessage?: boolean;
|
|
76
|
-
agent: Agent;
|
|
77
|
-
stt: STT;
|
|
78
|
-
tts: TTS;
|
|
79
|
-
onEnd?(call: MicdropCallSummary): void;
|
|
80
|
-
}
|
|
81
24
|
interface MicdropCallSummary {
|
|
82
25
|
conversation: MicdropConversation;
|
|
83
26
|
duration: number;
|
|
84
27
|
}
|
|
85
|
-
type
|
|
28
|
+
type MicdropConversationItem = MicdropConversationMessage | MicdropConversationToolCall | MicdropConversationToolResult;
|
|
29
|
+
type MicdropConversation = Array<MicdropConversationItem>;
|
|
86
30
|
type MicdropAnswerMetadata = {
|
|
87
31
|
[key: string]: any;
|
|
88
32
|
};
|
|
@@ -91,6 +35,18 @@ interface MicdropConversationMessage<Data extends MicdropAnswerMetadata = Micdro
|
|
|
91
35
|
content: string;
|
|
92
36
|
metadata?: Data;
|
|
93
37
|
}
|
|
38
|
+
interface MicdropConversationToolCall {
|
|
39
|
+
role: 'tool_call';
|
|
40
|
+
toolCallId: string;
|
|
41
|
+
toolName: string;
|
|
42
|
+
parameters: string;
|
|
43
|
+
}
|
|
44
|
+
interface MicdropConversationToolResult {
|
|
45
|
+
role: 'tool_result';
|
|
46
|
+
toolCallId: string;
|
|
47
|
+
toolName: string;
|
|
48
|
+
output: string;
|
|
49
|
+
}
|
|
94
50
|
interface MicdropToolCall {
|
|
95
51
|
name: string;
|
|
96
52
|
parameters: any;
|
|
@@ -100,31 +56,80 @@ type DeepPartial<T> = T extends object ? {
|
|
|
100
56
|
[P in keyof T]?: DeepPartial<T[P]>;
|
|
101
57
|
} : T;
|
|
102
58
|
|
|
59
|
+
interface Tool<Schema extends z.ZodObject = z.ZodObject> {
|
|
60
|
+
name: string;
|
|
61
|
+
description: string;
|
|
62
|
+
inputSchema?: Schema;
|
|
63
|
+
execute?: (input: z.infer<Schema>) => any | Promise<any>;
|
|
64
|
+
skipAnswer?: boolean;
|
|
65
|
+
emitOutput?: boolean;
|
|
66
|
+
}
|
|
67
|
+
declare const AUTO_END_CALL_TOOL_NAME = "end_call";
|
|
68
|
+
declare const AUTO_END_CALL_PROMPT = "Call this tool only if user asks to end the call";
|
|
69
|
+
declare const AUTO_SEMANTIC_TURN_TOOL_NAME = "semantic_turn";
|
|
70
|
+
declare const AUTO_SEMANTIC_TURN_PROMPT = "Call this tool only if last user message is obviously an incomplete sentence that you need to wait for the end before answering";
|
|
71
|
+
declare const AUTO_IGNORE_USER_NOISE_TOOL_NAME = "ignore_user_noise";
|
|
72
|
+
declare const AUTO_IGNORE_USER_NOISE_PROMPT = "Call this tool only if last user message is just an interjection or a sound that expresses emotion, hesitation, or reaction (ex: \"Uh\", \"Ahem\", \"Hmm\", \"Ah\") but doesn't carry any clear meaning like agreeing, refusing, or commanding";
|
|
73
|
+
|
|
103
74
|
interface AgentOptions {
|
|
104
75
|
systemPrompt: string;
|
|
76
|
+
autoEndCall?: boolean | string;
|
|
77
|
+
autoSemanticTurn?: boolean | string;
|
|
78
|
+
autoIgnoreUserNoise?: boolean | string;
|
|
79
|
+
extract?: ExtractJsonOptions | ExtractTagOptions;
|
|
105
80
|
}
|
|
106
81
|
interface AgentEvents {
|
|
107
|
-
Message: [
|
|
82
|
+
Message: [MicdropConversationItem];
|
|
108
83
|
CancelLastUserMessage: [];
|
|
109
|
-
CancelLastAssistantMessage: [];
|
|
110
84
|
SkipAnswer: [];
|
|
111
85
|
EndCall: [];
|
|
112
86
|
ToolCall: [MicdropToolCall];
|
|
113
87
|
}
|
|
88
|
+
interface ExtractOptions {
|
|
89
|
+
callback?: (value: string) => void;
|
|
90
|
+
saveInMetadata?: boolean;
|
|
91
|
+
}
|
|
92
|
+
interface ExtractJsonOptions extends ExtractOptions {
|
|
93
|
+
json: true;
|
|
94
|
+
callback?: (value: any) => void;
|
|
95
|
+
}
|
|
96
|
+
interface ExtractTagOptions extends ExtractOptions {
|
|
97
|
+
startTag: string;
|
|
98
|
+
endTag: string;
|
|
99
|
+
}
|
|
114
100
|
declare abstract class Agent<Options extends AgentOptions = AgentOptions> extends EventEmitter<AgentEvents> {
|
|
115
101
|
protected options: Options;
|
|
116
102
|
logger?: Logger;
|
|
117
103
|
conversation: MicdropConversation;
|
|
104
|
+
protected tools: Tool[];
|
|
118
105
|
constructor(options: Options);
|
|
119
106
|
abstract answer(): Readable;
|
|
120
107
|
abstract cancel(): void;
|
|
121
108
|
addUserMessage(text: string, metadata?: MicdropAnswerMetadata): void;
|
|
122
109
|
addAssistantMessage(text: string, metadata?: MicdropAnswerMetadata): void;
|
|
110
|
+
addTool<Schema extends z.ZodObject>(tool: Tool<Schema>): void;
|
|
111
|
+
removeTool(name: string): void;
|
|
112
|
+
getTool(name: string): Tool | undefined;
|
|
123
113
|
protected addMessage(role: 'user' | 'assistant' | 'system', text: string, metadata?: MicdropAnswerMetadata): void;
|
|
114
|
+
protected addToolMessage(message: MicdropConversationToolCall | MicdropConversationToolResult): void;
|
|
124
115
|
protected endCall(): void;
|
|
125
116
|
protected cancelLastUserMessage(): void;
|
|
126
|
-
protected cancelLastAssistantMessage(): void;
|
|
127
117
|
protected skipAnswer(): void;
|
|
118
|
+
protected getDefaultTools(): Tool<z.ZodObject<z.core.$ZodLooseShape, z.core.$strip>>[];
|
|
119
|
+
protected executeTool(toolCall: MicdropConversationToolCall): Promise<{
|
|
120
|
+
output: any;
|
|
121
|
+
skipAnswer: boolean | undefined;
|
|
122
|
+
} | {
|
|
123
|
+
output: {
|
|
124
|
+
error: any;
|
|
125
|
+
};
|
|
126
|
+
skipAnswer?: undefined;
|
|
127
|
+
}>;
|
|
128
|
+
protected getExtractOptions(): ExtractTagOptions | undefined;
|
|
129
|
+
protected extract(message: string): {
|
|
130
|
+
message: string;
|
|
131
|
+
metadata: MicdropAnswerMetadata | undefined;
|
|
132
|
+
};
|
|
128
133
|
protected log(...message: any[]): void;
|
|
129
134
|
destroy(): void;
|
|
130
135
|
}
|
|
@@ -151,11 +156,69 @@ declare class MicdropError extends Error {
|
|
|
151
156
|
}
|
|
152
157
|
declare function handleError(socket: WebSocket, error: unknown): void;
|
|
153
158
|
|
|
159
|
+
declare const MIME_TYPE_TO_EXTENSION: {
|
|
160
|
+
readonly 'audio/wav': "wav";
|
|
161
|
+
readonly 'audio/ogg': "ogg";
|
|
162
|
+
readonly 'audio/mpeg': "mp3";
|
|
163
|
+
readonly 'audio/webm': "webm";
|
|
164
|
+
readonly 'audio/mp4': "mp4";
|
|
165
|
+
readonly 'audio/flac': "flac";
|
|
166
|
+
};
|
|
167
|
+
interface STTEvents {
|
|
168
|
+
Transcript: [string];
|
|
169
|
+
}
|
|
170
|
+
declare abstract class STT extends EventEmitter<STTEvents> {
|
|
171
|
+
protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
|
|
172
|
+
logger?: Logger;
|
|
173
|
+
transcribe(audioStream: Readable): void;
|
|
174
|
+
protected log(...message: any[]): void;
|
|
175
|
+
destroy(): void;
|
|
176
|
+
protected get extension(): string;
|
|
177
|
+
private detectMimeType;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Abstract class for STT, converting stream to file before transcribing
|
|
182
|
+
*/
|
|
183
|
+
declare abstract class FileSTT extends STT {
|
|
184
|
+
abstract transcribeFile(file: File): Promise<string>;
|
|
185
|
+
transcribe(audioStream: Readable): void;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
declare class MockSTT extends FileSTT {
|
|
189
|
+
private i;
|
|
190
|
+
transcribeFile(file: File): Promise<string>;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
declare abstract class TTS {
|
|
194
|
+
logger?: Logger;
|
|
195
|
+
abstract speak(textStream: Readable): Readable;
|
|
196
|
+
abstract cancel(): void;
|
|
197
|
+
protected log(...message: any[]): void;
|
|
198
|
+
destroy(): void;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
declare class MockTTS extends TTS {
|
|
202
|
+
private audioFilePaths;
|
|
203
|
+
constructor(audioFilePaths: string[]);
|
|
204
|
+
speak(textStream: Readable): PassThrough;
|
|
205
|
+
cancel(): void;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
interface MicdropConfig {
|
|
209
|
+
firstMessage?: string;
|
|
210
|
+
generateFirstMessage?: boolean;
|
|
211
|
+
agent: Agent;
|
|
212
|
+
stt: STT;
|
|
213
|
+
tts: TTS;
|
|
214
|
+
onEnd?(call: MicdropCallSummary): void;
|
|
215
|
+
}
|
|
154
216
|
declare class MicdropServer {
|
|
155
217
|
socket: WebSocket$1 | null;
|
|
156
218
|
config: MicdropConfig | null;
|
|
157
219
|
logger?: Logger;
|
|
158
220
|
private startTime;
|
|
221
|
+
private lastMessageSpeeched?;
|
|
159
222
|
private currentUserStream?;
|
|
160
223
|
constructor(socket: WebSocket$1, config: MicdropConfig);
|
|
161
224
|
private log;
|
|
@@ -174,4 +237,4 @@ declare class MicdropServer {
|
|
|
174
237
|
|
|
175
238
|
declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
|
|
176
239
|
|
|
177
|
-
export { Agent, type AgentEvents, type AgentOptions, type DeepPartial, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationMessage, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
|
|
240
|
+
export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type Tool, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
|