npm - @micdrop/server - Versions diffs - 1.7.1 → 2.0.0 - Mend

@micdrop/server 1.7.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -1,86 +1,183 @@
-import WebSocket$1, { WebSocket } from 'ws';
+import EventEmitter from 'eventemitter3';
+import { Readable, PassThrough } from 'stream';
+import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
-declare enum CallClientCommands {
+declare class Logger {
+    private readonly name;
+    constructor(name: string);
+    log(...message: any[]): void;
+}
+declare const MIME_TYPE_TO_EXTENSION: {
+    readonly 'audio/wav': "wav";
+    readonly 'audio/ogg': "ogg";
+    readonly 'audio/mpeg': "mp3";
+    readonly 'audio/webm': "webm";
+    readonly 'audio/mp4': "mp4";
+    readonly 'audio/flac': "flac";
+};
+interface STTEvents {
+    Transcript: [string];
+}
+declare abstract class STT extends EventEmitter<STTEvents> {
+    protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
+    logger?: Logger;
+    transcribe(audioStream: Readable): void;
+    protected log(...message: any[]): void;
+    destroy(): void;
+    protected get extension(): string;
+    private detectMimeType;
+}
+/**
+ * Abstract class for STT, converting stream to file before transcribing
+ */
+declare abstract class FileSTT extends STT {
+    abstract transcribeFile(file: File): Promise<string>;
+    transcribe(audioStream: Readable): void;
+}
+declare class MockSTT extends FileSTT {
+    private i;
+    transcribeFile(file: File): Promise<string>;
+}
+declare abstract class TTS {
+    logger?: Logger;
+    abstract speak(textStream: Readable): Readable;
+    abstract cancel(): void;
+    protected log(...message: any[]): void;
+    destroy(): void;
+}
+declare class MockTTS extends TTS {
+    private audioFilePaths;
+    constructor(audioFilePaths: string[]);
+    speak(textStream: Readable): PassThrough;
+    cancel(): void;
+}
+declare enum MicdropClientCommands {
     StartSpeaking = "StartSpeaking",
     StopSpeaking = "StopSpeaking",
     Mute = "Mute"
 }
-declare enum CallServerCommands {
+declare enum MicdropServerCommands {
     Message = "Message",
     CancelLastAssistantMessage = "CancelLastAssistantMessage",
     CancelLastUserMessage = "CancelLastUserMessage",
     SkipAnswer = "SkipAnswer",
-    EnableSpeakerStreaming = "EnableSpeakerStreaming",
     EndCall = "EndCall"
 }
-interface CallConfig {
-    systemPrompt: string;
+interface MicdropConfig {
     firstMessage?: string;
-    debugLog?: boolean;
-    debugSaveSpeech?: boolean;
-    disableTTS?: boolean;
-    generateAnswer(conversation: Conversation): Promise<string | ConversationMessage>;
-    speech2Text(audioBlob: Blob, prevMessage?: string): Promise<string>;
-    text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>;
-    onMessage?(message: ConversationMessage): void;
-    onEnd?(call: CallSummary): void;
-}
-interface CallSummary {
-    conversation: Conversation;
+    generateFirstMessage?: boolean;
+    agent: Agent;
+    stt: STT;
+    tts: TTS;
+    onEnd?(call: MicdropCallSummary): void;
+}
+interface MicdropCallSummary {
+    conversation: MicdropConversation;
     duration: number;
 }
-type Conversation = ConversationMessage[];
-type AnswerCommands = {
-    endCall?: boolean;
-    cancelLastUserMessage?: boolean;
-    skipAnswer?: boolean;
-};
-type AnswerMetadata = {
+type MicdropConversation = MicdropConversationMessage[];
+type MicdropAnswerMetadata = {
     [key: string]: any;
 };
-interface ConversationMessage<Data extends AnswerMetadata = AnswerMetadata> {
+interface MicdropConversationMessage<Data extends MicdropAnswerMetadata = MicdropAnswerMetadata> {
     role: 'system' | 'user' | 'assistant';
     content: string;
-    commands?: AnswerCommands;
     metadata?: Data;
 }
+type DeepPartial<T> = T extends object ? {
+    [P in keyof T]?: DeepPartial<T[P]>;
+} : T;
-interface Processing {
-    aborted: boolean;
+interface AgentOptions {
+    systemPrompt: string;
 }
-declare class CallServer {
-    socket: WebSocket | null;
-    config: CallConfig | null;
-    private startTime;
-    private lastDebug;
-    private processing?;
-    private isSpeaking;
-    private chunks;
-    private conversation;
-    private speakerStreamingEnabled;
-    constructor(socket: WebSocket, config: CallConfig);
-    resetConversation(conversation: Conversation): void;
-    private abortProcessing;
-    private addMessage;
-    private sendAudio;
-    private onClose;
-    private onMessage;
-    private onStopSpeaking;
-    answer(message: string | ConversationMessage, processing?: Processing): Promise<void>;
-    private log;
+interface AgentEvents {
+    Message: [MicdropConversationMessage];
+    CancelLastUserMessage: [];
+    CancelLastAssistantMessage: [];
+    SkipAnswer: [];
+    EndCall: [];
+}
+interface AgentAnswerReturn {
+    message: Promise<string>;
+    stream: Readable;
+}
+interface TextPromise {
+    promise: Promise<string>;
+    resolve: (value: string) => void;
+    reject: (reason?: any) => void;
+}
+declare abstract class Agent<Options extends AgentOptions = AgentOptions> extends EventEmitter {
+    protected options: Options;
+    logger?: Logger;
+    conversation: MicdropConversation;
+    constructor(options: Options);
+    abstract answer(): AgentAnswerReturn;
+    abstract cancel(): void;
+    addUserMessage(text: string): void;
+    addAssistantMessage(text: string): void;
+    protected addMessage(role: 'user' | 'assistant' | 'system', text: string): void;
+    protected endCall(): void;
+    protected cancelLastUserMessage(): void;
+    protected cancelLastAssistantMessage(): void;
+    protected skipAnswer(): void;
+    protected createTextPromise(): TextPromise;
+    protected log(...message: any[]): void;
+    destroy(): void;
+}
+declare class MockAgent extends Agent {
+    private i;
+    constructor();
+    answer(): {
+        message: Promise<string>;
+        stream: PassThrough;
+    };
+    cancel(): void;
 }
-declare enum CallErrorCode {
+declare function convertToPCM(audioStream: Readable, sampleRate?: number, bitDepth?: number): PassThrough;
+declare function convertToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
+declare function convertPCMToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
+declare enum MicdropErrorCode {
     BadRequest = 4400,
     Unauthorized = 4401,
     NotFound = 4404
 }
-declare class CallError extends Error {
+declare class MicdropError extends Error {
     code: number;
     constructor(code: number, message: string);
 }
-declare function handleError(socket: WebSocket$1, error: unknown): void;
+declare function handleError(socket: WebSocket, error: unknown): void;
+declare class MicdropServer {
+    socket: WebSocket$1 | null;
+    config: MicdropConfig | null;
+    logger?: Logger;
+    private startTime;
+    private currentUserStream?;
+    constructor(socket: WebSocket$1, config: MicdropConfig);
+    private log;
+    private cancel;
+    private onClose;
+    private onMessage;
+    private onMute;
+    private onStartSpeaking;
+    private onStopSpeaking;
+    private onTranscript;
+    private sendFirstMessage;
+    private answer;
+    private speak;
+    private sendAudio;
+}
-declare function waitForParams<CallParams>(socket: WebSocket, validate: (params: any) => CallParams): Promise<CallParams>;
+declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
-export { type AnswerCommands, type AnswerMetadata, CallClientCommands, type CallConfig, CallError, CallErrorCode, CallServer, CallServerCommands, type CallSummary, type Conversation, type ConversationMessage, handleError, waitForParams };
+export { Agent, type AgentAnswerReturn, type AgentEvents, type AgentOptions, type DeepPartial, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationMessage, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type TextPromise, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };

package/dist/index.d.ts CHANGED Viewed

@@ -1,86 +1,183 @@
-import WebSocket$1, { WebSocket } from 'ws';
+import EventEmitter from 'eventemitter3';
+import { Readable, PassThrough } from 'stream';
+import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
-declare enum CallClientCommands {
+declare class Logger {
+    private readonly name;
+    constructor(name: string);
+    log(...message: any[]): void;
+}
+declare const MIME_TYPE_TO_EXTENSION: {
+    readonly 'audio/wav': "wav";
+    readonly 'audio/ogg': "ogg";
+    readonly 'audio/mpeg': "mp3";
+    readonly 'audio/webm': "webm";
+    readonly 'audio/mp4': "mp4";
+    readonly 'audio/flac': "flac";
+};
+interface STTEvents {
+    Transcript: [string];
+}
+declare abstract class STT extends EventEmitter<STTEvents> {
+    protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
+    logger?: Logger;
+    transcribe(audioStream: Readable): void;
+    protected log(...message: any[]): void;
+    destroy(): void;
+    protected get extension(): string;
+    private detectMimeType;
+}
+/**
+ * Abstract class for STT, converting stream to file before transcribing
+ */
+declare abstract class FileSTT extends STT {
+    abstract transcribeFile(file: File): Promise<string>;
+    transcribe(audioStream: Readable): void;
+}
+declare class MockSTT extends FileSTT {
+    private i;
+    transcribeFile(file: File): Promise<string>;
+}
+declare abstract class TTS {
+    logger?: Logger;
+    abstract speak(textStream: Readable): Readable;
+    abstract cancel(): void;
+    protected log(...message: any[]): void;
+    destroy(): void;
+}
+declare class MockTTS extends TTS {
+    private audioFilePaths;
+    constructor(audioFilePaths: string[]);
+    speak(textStream: Readable): PassThrough;
+    cancel(): void;
+}
+declare enum MicdropClientCommands {
     StartSpeaking = "StartSpeaking",
     StopSpeaking = "StopSpeaking",
     Mute = "Mute"
 }
-declare enum CallServerCommands {
+declare enum MicdropServerCommands {
     Message = "Message",
     CancelLastAssistantMessage = "CancelLastAssistantMessage",
     CancelLastUserMessage = "CancelLastUserMessage",
     SkipAnswer = "SkipAnswer",
-    EnableSpeakerStreaming = "EnableSpeakerStreaming",
     EndCall = "EndCall"
 }
-interface CallConfig {
-    systemPrompt: string;
+interface MicdropConfig {
     firstMessage?: string;
-    debugLog?: boolean;
-    debugSaveSpeech?: boolean;
-    disableTTS?: boolean;
-    generateAnswer(conversation: Conversation): Promise<string | ConversationMessage>;
-    speech2Text(audioBlob: Blob, prevMessage?: string): Promise<string>;
-    text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>;
-    onMessage?(message: ConversationMessage): void;
-    onEnd?(call: CallSummary): void;
-}
-interface CallSummary {
-    conversation: Conversation;
+    generateFirstMessage?: boolean;
+    agent: Agent;
+    stt: STT;
+    tts: TTS;
+    onEnd?(call: MicdropCallSummary): void;
+}
+interface MicdropCallSummary {
+    conversation: MicdropConversation;
     duration: number;
 }
-type Conversation = ConversationMessage[];
-type AnswerCommands = {
-    endCall?: boolean;
-    cancelLastUserMessage?: boolean;
-    skipAnswer?: boolean;
-};
-type AnswerMetadata = {
+type MicdropConversation = MicdropConversationMessage[];
+type MicdropAnswerMetadata = {
     [key: string]: any;
 };
-interface ConversationMessage<Data extends AnswerMetadata = AnswerMetadata> {
+interface MicdropConversationMessage<Data extends MicdropAnswerMetadata = MicdropAnswerMetadata> {
     role: 'system' | 'user' | 'assistant';
     content: string;
-    commands?: AnswerCommands;
     metadata?: Data;
 }
+type DeepPartial<T> = T extends object ? {
+    [P in keyof T]?: DeepPartial<T[P]>;
+} : T;
-interface Processing {
-    aborted: boolean;
+interface AgentOptions {
+    systemPrompt: string;
 }
-declare class CallServer {
-    socket: WebSocket | null;
-    config: CallConfig | null;
-    private startTime;
-    private lastDebug;
-    private processing?;
-    private isSpeaking;
-    private chunks;
-    private conversation;
-    private speakerStreamingEnabled;
-    constructor(socket: WebSocket, config: CallConfig);
-    resetConversation(conversation: Conversation): void;
-    private abortProcessing;
-    private addMessage;
-    private sendAudio;
-    private onClose;
-    private onMessage;
-    private onStopSpeaking;
-    answer(message: string | ConversationMessage, processing?: Processing): Promise<void>;
-    private log;
+interface AgentEvents {
+    Message: [MicdropConversationMessage];
+    CancelLastUserMessage: [];
+    CancelLastAssistantMessage: [];
+    SkipAnswer: [];
+    EndCall: [];
+}
+interface AgentAnswerReturn {
+    message: Promise<string>;
+    stream: Readable;
+}
+interface TextPromise {
+    promise: Promise<string>;
+    resolve: (value: string) => void;
+    reject: (reason?: any) => void;
+}
+declare abstract class Agent<Options extends AgentOptions = AgentOptions> extends EventEmitter {
+    protected options: Options;
+    logger?: Logger;
+    conversation: MicdropConversation;
+    constructor(options: Options);
+    abstract answer(): AgentAnswerReturn;
+    abstract cancel(): void;
+    addUserMessage(text: string): void;
+    addAssistantMessage(text: string): void;
+    protected addMessage(role: 'user' | 'assistant' | 'system', text: string): void;
+    protected endCall(): void;
+    protected cancelLastUserMessage(): void;
+    protected cancelLastAssistantMessage(): void;
+    protected skipAnswer(): void;
+    protected createTextPromise(): TextPromise;
+    protected log(...message: any[]): void;
+    destroy(): void;
+}
+declare class MockAgent extends Agent {
+    private i;
+    constructor();
+    answer(): {
+        message: Promise<string>;
+        stream: PassThrough;
+    };
+    cancel(): void;
 }
-declare enum CallErrorCode {
+declare function convertToPCM(audioStream: Readable, sampleRate?: number, bitDepth?: number): PassThrough;
+declare function convertToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
+declare function convertPCMToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
+declare enum MicdropErrorCode {
     BadRequest = 4400,
     Unauthorized = 4401,
     NotFound = 4404
 }
-declare class CallError extends Error {
+declare class MicdropError extends Error {
     code: number;
     constructor(code: number, message: string);
 }
-declare function handleError(socket: WebSocket$1, error: unknown): void;
+declare function handleError(socket: WebSocket, error: unknown): void;
+declare class MicdropServer {
+    socket: WebSocket$1 | null;
+    config: MicdropConfig | null;
+    logger?: Logger;
+    private startTime;
+    private currentUserStream?;
+    constructor(socket: WebSocket$1, config: MicdropConfig);
+    private log;
+    private cancel;
+    private onClose;
+    private onMessage;
+    private onMute;
+    private onStartSpeaking;
+    private onStopSpeaking;
+    private onTranscript;
+    private sendFirstMessage;
+    private answer;
+    private speak;
+    private sendAudio;
+}
-declare function waitForParams<CallParams>(socket: WebSocket, validate: (params: any) => CallParams): Promise<CallParams>;
+declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
-export { type AnswerCommands, type AnswerMetadata, CallClientCommands, type CallConfig, CallError, CallErrorCode, CallServer, CallServerCommands, type CallSummary, type Conversation, type ConversationMessage, handleError, waitForParams };
+export { Agent, type AgentAnswerReturn, type AgentEvents, type AgentOptions, type DeepPartial, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationMessage, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type TextPromise, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };