npm - @micdrop/server - Versions diffs - 2.0.6 → 2.0.8 - Mend

@micdrop/server 2.0.6 → 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { EventEmitter } from 'eventemitter3';
 import { Readable, PassThrough } from 'stream';
+import { z } from 'zod';
 import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
 declare class Logger {
@@ -8,55 +9,6 @@ declare class Logger {
     log(...message: any[]): void;
 }
-declare const MIME_TYPE_TO_EXTENSION: {
-    readonly 'audio/wav': "wav";
-    readonly 'audio/ogg': "ogg";
-    readonly 'audio/mpeg': "mp3";
-    readonly 'audio/webm': "webm";
-    readonly 'audio/mp4': "mp4";
-    readonly 'audio/flac': "flac";
-};
-interface STTEvents {
-    Transcript: [string];
-}
-declare abstract class STT extends EventEmitter<STTEvents> {
-    protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
-    logger?: Logger;
-    transcribe(audioStream: Readable): void;
-    protected log(...message: any[]): void;
-    destroy(): void;
-    protected get extension(): string;
-    private detectMimeType;
-}
-/**
- * Abstract class for STT, converting stream to file before transcribing
- */
-declare abstract class FileSTT extends STT {
-    abstract transcribeFile(file: File): Promise<string>;
-    transcribe(audioStream: Readable): void;
-}
-declare class MockSTT extends FileSTT {
-    private i;
-    transcribeFile(file: File): Promise<string>;
-}
-declare abstract class TTS {
-    logger?: Logger;
-    abstract speak(textStream: Readable): Readable;
-    abstract cancel(): void;
-    protected log(...message: any[]): void;
-    destroy(): void;
-}
-declare class MockTTS extends TTS {
-    private audioFilePaths;
-    constructor(audioFilePaths: string[]);
-    speak(textStream: Readable): PassThrough;
-    cancel(): void;
-}
 declare enum MicdropClientCommands {
     StartSpeaking = "StartSpeaking",
     StopSpeaking = "StopSpeaking",
@@ -64,25 +16,17 @@ declare enum MicdropClientCommands {
 }
 declare enum MicdropServerCommands {
     Message = "Message",
-    CancelLastAssistantMessage = "CancelLastAssistantMessage",
     CancelLastUserMessage = "CancelLastUserMessage",
     SkipAnswer = "SkipAnswer",
     EndCall = "EndCall",
     ToolCall = "ToolCall"
 }
-interface MicdropConfig {
-    firstMessage?: string;
-    generateFirstMessage?: boolean;
-    agent: Agent;
-    stt: STT;
-    tts: TTS;
-    onEnd?(call: MicdropCallSummary): void;
-}
 interface MicdropCallSummary {
     conversation: MicdropConversation;
     duration: number;
 }
-type MicdropConversation = MicdropConversationMessage[];
+type MicdropConversationItem = MicdropConversationMessage | MicdropConversationToolCall | MicdropConversationToolResult;
+type MicdropConversation = Array<MicdropConversationItem>;
 type MicdropAnswerMetadata = {
     [key: string]: any;
 };
@@ -91,6 +35,18 @@ interface MicdropConversationMessage<Data extends MicdropAnswerMetadata = Micdro
     content: string;
     metadata?: Data;
 }
+interface MicdropConversationToolCall {
+    role: 'tool_call';
+    toolCallId: string;
+    toolName: string;
+    parameters: string;
+}
+interface MicdropConversationToolResult {
+    role: 'tool_result';
+    toolCallId: string;
+    toolName: string;
+    output: string;
+}
 interface MicdropToolCall {
     name: string;
     parameters: any;
@@ -100,31 +56,80 @@ type DeepPartial<T> = T extends object ? {
     [P in keyof T]?: DeepPartial<T[P]>;
 } : T;
+interface Tool<Schema extends z.ZodObject = z.ZodObject> {
+    name: string;
+    description: string;
+    inputSchema?: Schema;
+    execute?: (input: z.infer<Schema>) => any | Promise<any>;
+    skipAnswer?: boolean;
+    emitOutput?: boolean;
+}
+declare const AUTO_END_CALL_TOOL_NAME = "end_call";
+declare const AUTO_END_CALL_PROMPT = "Call this tool only if user asks to end the call";
+declare const AUTO_SEMANTIC_TURN_TOOL_NAME = "semantic_turn";
+declare const AUTO_SEMANTIC_TURN_PROMPT = "Call this tool only if last user message is obviously an incomplete sentence that you need to wait for the end before answering";
+declare const AUTO_IGNORE_USER_NOISE_TOOL_NAME = "ignore_user_noise";
+declare const AUTO_IGNORE_USER_NOISE_PROMPT = "Call this tool only if last user message is just an interjection or a sound that expresses emotion, hesitation, or reaction (ex: \"Uh\", \"Ahem\", \"Hmm\", \"Ah\") but doesn't carry any clear meaning like agreeing, refusing, or commanding";
 interface AgentOptions {
     systemPrompt: string;
+    autoEndCall?: boolean | string;
+    autoSemanticTurn?: boolean | string;
+    autoIgnoreUserNoise?: boolean | string;
+    extract?: ExtractJsonOptions | ExtractTagOptions;
 }
 interface AgentEvents {
-    Message: [MicdropConversationMessage];
+    Message: [MicdropConversationItem];
     CancelLastUserMessage: [];
-    CancelLastAssistantMessage: [];
     SkipAnswer: [];
     EndCall: [];
     ToolCall: [MicdropToolCall];
 }
+interface ExtractOptions {
+    callback?: (value: string) => void;
+    saveInMetadata?: boolean;
+}
+interface ExtractJsonOptions extends ExtractOptions {
+    json: true;
+    callback?: (value: any) => void;
+}
+interface ExtractTagOptions extends ExtractOptions {
+    startTag: string;
+    endTag: string;
+}
 declare abstract class Agent<Options extends AgentOptions = AgentOptions> extends EventEmitter<AgentEvents> {
     protected options: Options;
     logger?: Logger;
     conversation: MicdropConversation;
+    protected tools: Tool[];
     constructor(options: Options);
     abstract answer(): Readable;
     abstract cancel(): void;
     addUserMessage(text: string, metadata?: MicdropAnswerMetadata): void;
     addAssistantMessage(text: string, metadata?: MicdropAnswerMetadata): void;
+    addTool<Schema extends z.ZodObject>(tool: Tool<Schema>): void;
+    removeTool(name: string): void;
+    getTool(name: string): Tool | undefined;
     protected addMessage(role: 'user' | 'assistant' | 'system', text: string, metadata?: MicdropAnswerMetadata): void;
+    protected addToolMessage(message: MicdropConversationToolCall | MicdropConversationToolResult): void;
     protected endCall(): void;
     protected cancelLastUserMessage(): void;
-    protected cancelLastAssistantMessage(): void;
     protected skipAnswer(): void;
+    protected getDefaultTools(): Tool<z.ZodObject<z.core.$ZodLooseShape, z.core.$strip>>[];
+    protected executeTool(toolCall: MicdropConversationToolCall): Promise<{
+        output: any;
+        skipAnswer: boolean | undefined;
+    } | {
+        output: {
+            error: any;
+        };
+        skipAnswer?: undefined;
+    }>;
+    protected getExtractOptions(): ExtractTagOptions | undefined;
+    protected extract(message: string): {
+        message: string;
+        metadata: MicdropAnswerMetadata | undefined;
+    };
     protected log(...message: any[]): void;
     destroy(): void;
 }
@@ -151,11 +156,69 @@ declare class MicdropError extends Error {
 }
 declare function handleError(socket: WebSocket, error: unknown): void;
+declare const MIME_TYPE_TO_EXTENSION: {
+    readonly 'audio/wav': "wav";
+    readonly 'audio/ogg': "ogg";
+    readonly 'audio/mpeg': "mp3";
+    readonly 'audio/webm': "webm";
+    readonly 'audio/mp4': "mp4";
+    readonly 'audio/flac': "flac";
+};
+interface STTEvents {
+    Transcript: [string];
+}
+declare abstract class STT extends EventEmitter<STTEvents> {
+    protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
+    logger?: Logger;
+    transcribe(audioStream: Readable): void;
+    protected log(...message: any[]): void;
+    destroy(): void;
+    protected get extension(): string;
+    private detectMimeType;
+}
+/**
+ * Abstract class for STT, converting stream to file before transcribing
+ */
+declare abstract class FileSTT extends STT {
+    abstract transcribeFile(file: File): Promise<string>;
+    transcribe(audioStream: Readable): void;
+}
+declare class MockSTT extends FileSTT {
+    private i;
+    transcribeFile(file: File): Promise<string>;
+}
+declare abstract class TTS {
+    logger?: Logger;
+    abstract speak(textStream: Readable): Readable;
+    abstract cancel(): void;
+    protected log(...message: any[]): void;
+    destroy(): void;
+}
+declare class MockTTS extends TTS {
+    private audioFilePaths;
+    constructor(audioFilePaths: string[]);
+    speak(textStream: Readable): PassThrough;
+    cancel(): void;
+}
+interface MicdropConfig {
+    firstMessage?: string;
+    generateFirstMessage?: boolean;
+    agent: Agent;
+    stt: STT;
+    tts: TTS;
+    onEnd?(call: MicdropCallSummary): void;
+}
 declare class MicdropServer {
     socket: WebSocket$1 | null;
     config: MicdropConfig | null;
     logger?: Logger;
     private startTime;
+    private lastMessageSpeeched?;
     private currentUserStream?;
     constructor(socket: WebSocket$1, config: MicdropConfig);
     private log;
@@ -174,4 +237,4 @@ declare class MicdropServer {
 declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
-export { Agent, type AgentEvents, type AgentOptions, type DeepPartial, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationMessage, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
+export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type Tool, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { EventEmitter } from 'eventemitter3';
 import { Readable, PassThrough } from 'stream';
+import { z } from 'zod';
 import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
 declare class Logger {
@@ -8,55 +9,6 @@ declare class Logger {
     log(...message: any[]): void;
 }
-declare const MIME_TYPE_TO_EXTENSION: {
-    readonly 'audio/wav': "wav";
-    readonly 'audio/ogg': "ogg";
-    readonly 'audio/mpeg': "mp3";
-    readonly 'audio/webm': "webm";
-    readonly 'audio/mp4': "mp4";
-    readonly 'audio/flac': "flac";
-};
-interface STTEvents {
-    Transcript: [string];
-}
-declare abstract class STT extends EventEmitter<STTEvents> {
-    protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
-    logger?: Logger;
-    transcribe(audioStream: Readable): void;
-    protected log(...message: any[]): void;
-    destroy(): void;
-    protected get extension(): string;
-    private detectMimeType;
-}
-/**
- * Abstract class for STT, converting stream to file before transcribing
- */
-declare abstract class FileSTT extends STT {
-    abstract transcribeFile(file: File): Promise<string>;
-    transcribe(audioStream: Readable): void;
-}
-declare class MockSTT extends FileSTT {
-    private i;
-    transcribeFile(file: File): Promise<string>;
-}
-declare abstract class TTS {
-    logger?: Logger;
-    abstract speak(textStream: Readable): Readable;
-    abstract cancel(): void;
-    protected log(...message: any[]): void;
-    destroy(): void;
-}
-declare class MockTTS extends TTS {
-    private audioFilePaths;
-    constructor(audioFilePaths: string[]);
-    speak(textStream: Readable): PassThrough;
-    cancel(): void;
-}
 declare enum MicdropClientCommands {
     StartSpeaking = "StartSpeaking",
     StopSpeaking = "StopSpeaking",
@@ -64,25 +16,17 @@ declare enum MicdropClientCommands {
 }
 declare enum MicdropServerCommands {
     Message = "Message",
-    CancelLastAssistantMessage = "CancelLastAssistantMessage",
     CancelLastUserMessage = "CancelLastUserMessage",
     SkipAnswer = "SkipAnswer",
     EndCall = "EndCall",
     ToolCall = "ToolCall"
 }
-interface MicdropConfig {
-    firstMessage?: string;
-    generateFirstMessage?: boolean;
-    agent: Agent;
-    stt: STT;
-    tts: TTS;
-    onEnd?(call: MicdropCallSummary): void;
-}
 interface MicdropCallSummary {
     conversation: MicdropConversation;
     duration: number;
 }
-type MicdropConversation = MicdropConversationMessage[];
+type MicdropConversationItem = MicdropConversationMessage | MicdropConversationToolCall | MicdropConversationToolResult;
+type MicdropConversation = Array<MicdropConversationItem>;
 type MicdropAnswerMetadata = {
     [key: string]: any;
 };
@@ -91,6 +35,18 @@ interface MicdropConversationMessage<Data extends MicdropAnswerMetadata = Micdro
     content: string;
     metadata?: Data;
 }
+interface MicdropConversationToolCall {
+    role: 'tool_call';
+    toolCallId: string;
+    toolName: string;
+    parameters: string;
+}
+interface MicdropConversationToolResult {
+    role: 'tool_result';
+    toolCallId: string;
+    toolName: string;
+    output: string;
+}
 interface MicdropToolCall {
     name: string;
     parameters: any;
@@ -100,31 +56,80 @@ type DeepPartial<T> = T extends object ? {
     [P in keyof T]?: DeepPartial<T[P]>;
 } : T;
+interface Tool<Schema extends z.ZodObject = z.ZodObject> {
+    name: string;
+    description: string;
+    inputSchema?: Schema;
+    execute?: (input: z.infer<Schema>) => any | Promise<any>;
+    skipAnswer?: boolean;
+    emitOutput?: boolean;
+}
+declare const AUTO_END_CALL_TOOL_NAME = "end_call";
+declare const AUTO_END_CALL_PROMPT = "Call this tool only if user asks to end the call";
+declare const AUTO_SEMANTIC_TURN_TOOL_NAME = "semantic_turn";
+declare const AUTO_SEMANTIC_TURN_PROMPT = "Call this tool only if last user message is obviously an incomplete sentence that you need to wait for the end before answering";
+declare const AUTO_IGNORE_USER_NOISE_TOOL_NAME = "ignore_user_noise";
+declare const AUTO_IGNORE_USER_NOISE_PROMPT = "Call this tool only if last user message is just an interjection or a sound that expresses emotion, hesitation, or reaction (ex: \"Uh\", \"Ahem\", \"Hmm\", \"Ah\") but doesn't carry any clear meaning like agreeing, refusing, or commanding";
 interface AgentOptions {
     systemPrompt: string;
+    autoEndCall?: boolean | string;
+    autoSemanticTurn?: boolean | string;
+    autoIgnoreUserNoise?: boolean | string;
+    extract?: ExtractJsonOptions | ExtractTagOptions;
 }
 interface AgentEvents {
-    Message: [MicdropConversationMessage];
+    Message: [MicdropConversationItem];
     CancelLastUserMessage: [];
-    CancelLastAssistantMessage: [];
     SkipAnswer: [];
     EndCall: [];
     ToolCall: [MicdropToolCall];
 }
+interface ExtractOptions {
+    callback?: (value: string) => void;
+    saveInMetadata?: boolean;
+}
+interface ExtractJsonOptions extends ExtractOptions {
+    json: true;
+    callback?: (value: any) => void;
+}
+interface ExtractTagOptions extends ExtractOptions {
+    startTag: string;
+    endTag: string;
+}
 declare abstract class Agent<Options extends AgentOptions = AgentOptions> extends EventEmitter<AgentEvents> {
     protected options: Options;
     logger?: Logger;
     conversation: MicdropConversation;
+    protected tools: Tool[];
     constructor(options: Options);
     abstract answer(): Readable;
     abstract cancel(): void;
     addUserMessage(text: string, metadata?: MicdropAnswerMetadata): void;
     addAssistantMessage(text: string, metadata?: MicdropAnswerMetadata): void;
+    addTool<Schema extends z.ZodObject>(tool: Tool<Schema>): void;
+    removeTool(name: string): void;
+    getTool(name: string): Tool | undefined;
     protected addMessage(role: 'user' | 'assistant' | 'system', text: string, metadata?: MicdropAnswerMetadata): void;
+    protected addToolMessage(message: MicdropConversationToolCall | MicdropConversationToolResult): void;
     protected endCall(): void;
     protected cancelLastUserMessage(): void;
-    protected cancelLastAssistantMessage(): void;
     protected skipAnswer(): void;
+    protected getDefaultTools(): Tool<z.ZodObject<z.core.$ZodLooseShape, z.core.$strip>>[];
+    protected executeTool(toolCall: MicdropConversationToolCall): Promise<{
+        output: any;
+        skipAnswer: boolean | undefined;
+    } | {
+        output: {
+            error: any;
+        };
+        skipAnswer?: undefined;
+    }>;
+    protected getExtractOptions(): ExtractTagOptions | undefined;
+    protected extract(message: string): {
+        message: string;
+        metadata: MicdropAnswerMetadata | undefined;
+    };
     protected log(...message: any[]): void;
     destroy(): void;
 }
@@ -151,11 +156,69 @@ declare class MicdropError extends Error {
 }
 declare function handleError(socket: WebSocket, error: unknown): void;
+declare const MIME_TYPE_TO_EXTENSION: {
+    readonly 'audio/wav': "wav";
+    readonly 'audio/ogg': "ogg";
+    readonly 'audio/mpeg': "mp3";
+    readonly 'audio/webm': "webm";
+    readonly 'audio/mp4': "mp4";
+    readonly 'audio/flac': "flac";
+};
+interface STTEvents {
+    Transcript: [string];
+}
+declare abstract class STT extends EventEmitter<STTEvents> {
+    protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
+    logger?: Logger;
+    transcribe(audioStream: Readable): void;
+    protected log(...message: any[]): void;
+    destroy(): void;
+    protected get extension(): string;
+    private detectMimeType;
+}
+/**
+ * Abstract class for STT, converting stream to file before transcribing
+ */
+declare abstract class FileSTT extends STT {
+    abstract transcribeFile(file: File): Promise<string>;
+    transcribe(audioStream: Readable): void;
+}
+declare class MockSTT extends FileSTT {
+    private i;
+    transcribeFile(file: File): Promise<string>;
+}
+declare abstract class TTS {
+    logger?: Logger;
+    abstract speak(textStream: Readable): Readable;
+    abstract cancel(): void;
+    protected log(...message: any[]): void;
+    destroy(): void;
+}
+declare class MockTTS extends TTS {
+    private audioFilePaths;
+    constructor(audioFilePaths: string[]);
+    speak(textStream: Readable): PassThrough;
+    cancel(): void;
+}
+interface MicdropConfig {
+    firstMessage?: string;
+    generateFirstMessage?: boolean;
+    agent: Agent;
+    stt: STT;
+    tts: TTS;
+    onEnd?(call: MicdropCallSummary): void;
+}
 declare class MicdropServer {
     socket: WebSocket$1 | null;
     config: MicdropConfig | null;
     logger?: Logger;
     private startTime;
+    private lastMessageSpeeched?;
     private currentUserStream?;
     constructor(socket: WebSocket$1, config: MicdropConfig);
     private log;
@@ -174,4 +237,4 @@ declare class MicdropServer {
 declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
-export { Agent, type AgentEvents, type AgentOptions, type DeepPartial, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationMessage, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
+export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type Tool, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };