npm - voicepilot-speech-engine - Versions diffs - 0.1.0 - Mend

voicepilot-speech-engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+export { SpeechToText } from './stt.js';
+export { TextToSpeech } from './tts.js';
+export { AudioStreamManager } from './stream.js';
+export type { STTConfig, TranscriptionResult, TranscriptionCallback, ErrorCallback } from './stt.js';
+export type { TTSConfig, SpeechOptions } from './tts.js';
+//# sourceMappingURL=index.d.ts.map

package/dist/index.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEjD,YAAY,EAAE,SAAS,EAAE,mBAAmB,EAAE,qBAAqB,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACrG,YAAY,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC"}

package/dist/index.js ADDED Viewed

@@ -0,0 +1,3 @@
+export { SpeechToText } from './stt.js';
+export { TextToSpeech } from './tts.js';
+export { AudioStreamManager } from './stream.js';

package/dist/stream.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+export declare class AudioStreamManager {
+    private mediaRecorder?;
+    private audioChunks;
+    private stream?;
+    startRecording(): Promise<void>;
+    stopRecording(): Promise<Blob>;
+    isRecording(): boolean;
+    private cleanup;
+    getMicrophonePermission(): Promise<boolean>;
+}
+//# sourceMappingURL=stream.d.ts.map

package/dist/stream.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"stream.d.ts","sourceRoot":"","sources":["../src/stream.ts"],"names":[],"mappings":"AAAA,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,aAAa,CAAC,CAAgB;IACtC,OAAO,CAAC,WAAW,CAAc;IACjC,OAAO,CAAC,MAAM,CAAC,CAAc;IAEvB,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC;IAoBrC,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IAiB9B,WAAW,IAAI,OAAO;IAItB,OAAO,CAAC,OAAO;IAST,uBAAuB,IAAI,OAAO,CAAC,OAAO,CAAC;CASlD"}

package/dist/stream.js ADDED Viewed

@@ -0,0 +1,57 @@
+export class AudioStreamManager {
+    mediaRecorder;
+    audioChunks = [];
+    stream;
+    async startRecording() {
+        try {
+            this.stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+            this.mediaRecorder = new MediaRecorder(this.stream);
+            this.audioChunks = [];
+            this.mediaRecorder.ondataavailable = (event) => {
+                if (event.data.size > 0) {
+                    this.audioChunks.push(event.data);
+                }
+            };
+            this.mediaRecorder.start(100);
+        }
+        catch (error) {
+            console.error('Failed to start recording:', error);
+            throw new Error('Microphone access denied or not available');
+        }
+    }
+    stopRecording() {
+        return new Promise((resolve, reject) => {
+            if (!this.mediaRecorder) {
+                reject(new Error('No active recording'));
+                return;
+            }
+            this.mediaRecorder.onstop = () => {
+                const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm' });
+                this.cleanup();
+                resolve(audioBlob);
+            };
+            this.mediaRecorder.stop();
+        });
+    }
+    isRecording() {
+        return this.mediaRecorder?.state === 'recording';
+    }
+    cleanup() {
+        if (this.stream) {
+            this.stream.getTracks().forEach(track => track.stop());
+            this.stream = undefined;
+        }
+        this.mediaRecorder = undefined;
+        this.audioChunks = [];
+    }
+    async getMicrophonePermission() {
+        try {
+            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+            stream.getTracks().forEach(track => track.stop());
+            return true;
+        }
+        catch {
+            return false;
+        }
+    }
+}

package/dist/stt.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+export interface STTConfig {
+    subscriptionKey: string;
+    region: string;
+    language?: string;
+}
+export interface TranscriptionResult {
+    text: string;
+    confidence: number;
+    isFinal: boolean;
+}
+export type TranscriptionCallback = (result: TranscriptionResult) => void;
+export type ErrorCallback = (error: Error) => void;
+export declare class SpeechToText {
+    private recognizer?;
+    private config;
+    private isListening;
+    constructor(config: STTConfig);
+    startContinuousRecognition(onTranscription: TranscriptionCallback, onError?: ErrorCallback): void;
+    stopContinuousRecognition(): Promise<void>;
+    recognizeOnce(): Promise<string>;
+    getIsListening(): boolean;
+}
+//# sourceMappingURL=stt.d.ts.map

package/dist/stt.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,SAAS;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,MAAM,qBAAqB,GAAG,CAAC,MAAM,EAAE,mBAAmB,KAAK,IAAI,CAAC;AAC1E,MAAM,MAAM,aAAa,GAAG,CAAC,KAAK,EAAE,KAAK,KAAK,IAAI,CAAC;AAEnD,qBAAa,YAAY;IACvB,OAAO,CAAC,UAAU,CAAC,CAAuB;IAC1C,OAAO,CAAC,MAAM,CAAY;IAC1B,OAAO,CAAC,WAAW,CAAS;gBAEhB,MAAM,EAAE,SAAS;IAI7B,0BAA0B,CACxB,eAAe,EAAE,qBAAqB,EACtC,OAAO,CAAC,EAAE,aAAa,GACtB,IAAI;IAiEP,yBAAyB,IAAI,OAAO,CAAC,IAAI,CAAC;IAuBpC,aAAa,IAAI,OAAO,CAAC,MAAM,CAAC;IA6BtC,cAAc,IAAI,OAAO;CAG1B"}

package/dist/stt.js ADDED Viewed

@@ -0,0 +1,99 @@
+import * as sdk from 'microsoft-cognitiveservices-speech-sdk';
+export class SpeechToText {
+    recognizer;
+    config;
+    isListening = false;
+    constructor(config) {
+        this.config = config;
+    }
+    startContinuousRecognition(onTranscription, onError) {
+        if (this.isListening) {
+            console.warn('Already listening');
+            return;
+        }
+        const speechConfig = sdk.SpeechConfig.fromSubscription(this.config.subscriptionKey, this.config.region);
+        speechConfig.speechRecognitionLanguage = this.config.language || 'en-US';
+        speechConfig.enableDictation();
+        const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
+        this.recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
+        this.recognizer.recognizing = (_sender, event) => {
+            if (event.result.reason === sdk.ResultReason.RecognizingSpeech) {
+                onTranscription({
+                    text: event.result.text,
+                    confidence: 0.5,
+                    isFinal: false,
+                });
+            }
+        };
+        this.recognizer.recognized = (_sender, event) => {
+            if (event.result.reason === sdk.ResultReason.RecognizedSpeech) {
+                onTranscription({
+                    text: event.result.text,
+                    confidence: 1.0,
+                    isFinal: true,
+                });
+            }
+            else if (event.result.reason === sdk.ResultReason.NoMatch) {
+                console.log('No speech could be recognized');
+            }
+        };
+        this.recognizer.canceled = (_sender, event) => {
+            console.error('Recognition canceled:', event.reason);
+            if (event.reason === sdk.CancellationReason.Error) {
+                onError?.(new Error(event.errorDetails));
+            }
+            this.stopContinuousRecognition();
+        };
+        this.recognizer.sessionStopped = () => {
+            this.isListening = false;
+        };
+        this.recognizer.startContinuousRecognitionAsync(() => {
+            this.isListening = true;
+            console.log('Recognition started');
+        }, (error) => {
+            console.error('Failed to start recognition:', error);
+            onError?.(new Error(error));
+        });
+    }
+    stopContinuousRecognition() {
+        return new Promise((resolve) => {
+            if (!this.recognizer || !this.isListening) {
+                resolve();
+                return;
+            }
+            this.recognizer.stopContinuousRecognitionAsync(() => {
+                this.isListening = false;
+                this.recognizer?.close();
+                this.recognizer = undefined;
+                resolve();
+            }, (error) => {
+                console.error('Failed to stop recognition:', error);
+                this.isListening = false;
+                resolve();
+            });
+        });
+    }
+    async recognizeOnce() {
+        const speechConfig = sdk.SpeechConfig.fromSubscription(this.config.subscriptionKey, this.config.region);
+        speechConfig.speechRecognitionLanguage = this.config.language || 'en-US';
+        const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
+        const recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
+        return new Promise((resolve, reject) => {
+            recognizer.recognizeOnceAsync((result) => {
+                if (result.reason === sdk.ResultReason.RecognizedSpeech) {
+                    resolve(result.text);
+                }
+                else {
+                    reject(new Error('No speech recognized'));
+                }
+                recognizer.close();
+            }, (error) => {
+                reject(new Error(error));
+                recognizer.close();
+            });
+        });
+    }
+    getIsListening() {
+        return this.isListening;
+    }
+}

package/dist/tts.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+export interface TTSConfig {
+    subscriptionKey: string;
+    region: string;
+    voice?: string;
+    language?: string;
+    conversational?: boolean;
+}
+export interface SpeechOptions {
+    text: string;
+    voice?: string;
+    rate?: number;
+    pitch?: number;
+    style?: 'friendly' | 'cheerful' | 'empathetic' | 'professional';
+}
+export declare class TextToSpeech {
+    private synthesizer?;
+    private config;
+    constructor(config: TTSConfig);
+    speak(options: SpeechOptions): Promise<void>;
+    speakText(text: string): Promise<void>;
+    synthesizeToAudio(options: SpeechOptions): Promise<ArrayBuffer>;
+    stop(): void;
+    private generateSSML;
+    private escapeXML;
+}
+//# sourceMappingURL=tts.d.ts.map

package/dist/tts.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,SAAS;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,UAAU,GAAG,UAAU,GAAG,YAAY,GAAG,cAAc,CAAC;CACjE;AAED,qBAAa,YAAY;IACvB,OAAO,CAAC,WAAW,CAAC,CAAwB;IAC5C,OAAO,CAAC,MAAM,CAAY;gBAEd,MAAM,EAAE,SAAS;IAIvB,KAAK,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;IAqC5C,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAItC,iBAAiB,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,WAAW,CAAC;IAiCrE,IAAI,IAAI,IAAI;IAOZ,OAAO,CAAC,YAAY;IAuBpB,OAAO,CAAC,SAAS;CAQlB"}

package/dist/tts.js ADDED Viewed

@@ -0,0 +1,94 @@
+import * as sdk from 'microsoft-cognitiveservices-speech-sdk';
+export class TextToSpeech {
+    synthesizer;
+    config;
+    constructor(config) {
+        this.config = config;
+    }
+    async speak(options) {
+        const speechConfig = sdk.SpeechConfig.fromSubscription(this.config.subscriptionKey, this.config.region);
+        speechConfig.speechSynthesisVoiceName =
+            options.voice ||
+                this.config.voice ||
+                'en-US-AriaNeural'; // Aria is more conversational than Jenny
+        const audioConfig = sdk.AudioConfig.fromDefaultSpeakerOutput();
+        this.synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
+        const ssml = this.generateSSML(options);
+        return new Promise((resolve, reject) => {
+            this.synthesizer.speakSsmlAsync(ssml, (result) => {
+                if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
+                    resolve();
+                }
+                else {
+                    reject(new Error('Speech synthesis failed'));
+                }
+                this.synthesizer?.close();
+                this.synthesizer = undefined;
+            }, (error) => {
+                reject(new Error(error));
+                this.synthesizer?.close();
+                this.synthesizer = undefined;
+            });
+        });
+    }
+    async speakText(text) {
+        return this.speak({ text });
+    }
+    async synthesizeToAudio(options) {
+        const speechConfig = sdk.SpeechConfig.fromSubscription(this.config.subscriptionKey, this.config.region);
+        speechConfig.speechSynthesisVoiceName =
+            options.voice ||
+                this.config.voice ||
+                'en-US-AriaNeural'; // Aria is more conversational than Jenny
+        const synthesizer = new sdk.SpeechSynthesizer(speechConfig, undefined);
+        const ssml = this.generateSSML(options);
+        return new Promise((resolve, reject) => {
+            synthesizer.speakSsmlAsync(ssml, (result) => {
+                if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
+                    resolve(result.audioData);
+                }
+                else {
+                    reject(new Error('Speech synthesis failed'));
+                }
+                synthesizer.close();
+            }, (error) => {
+                reject(new Error(error));
+                synthesizer.close();
+            });
+        });
+    }
+    stop() {
+        if (this.synthesizer) {
+            this.synthesizer.close();
+            this.synthesizer = undefined;
+        }
+    }
+    generateSSML(options) {
+        const voice = options.voice || this.config.voice || 'en-US-AriaNeural'; // Aria is more conversational
+        const rate = options.rate ? `${options.rate}%` : '0%'; // Natural speed
+        const pitch = options.pitch ? `${options.pitch > 0 ? '+' : ''}${options.pitch}Hz` : '+5Hz'; // Slight warmth
+        // Add speaking style for supported neural voices (Aria, Jenny, Guy, Sara)
+        const style = options.style || (this.config.conversational ? 'friendly' : undefined);
+        const styleAttr = style ? `style="${style}"` : '';
+        return `
+      <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
+             xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="en-US">
+        <voice name="${voice}">
+          ${style ? `<mstts:express-as ${styleAttr}>` : ''}
+            <prosody rate="${rate}" pitch="${pitch}">
+              ${this.escapeXML(options.text)}
+            </prosody>
+          ${style ? `</mstts:express-as>` : ''}
+        </voice>
+      </speak>
+    `.trim();
+    }
+    escapeXML(text) {
+        return text
+            .replace(/&/g, '&amp;')
+            .replace(/</g, '&lt;')
+            .replace(/>/g, '&gt;')
+            .replace(/"/g, '&quot;')
+            .replace(/'/g, '&apos;');
+    }
+}

package/package.json ADDED Viewed

@@ -0,0 +1,31 @@
+{
+  "name": "voicepilot-speech-engine",
+  "version": "0.1.0",
+  "description": "Azure Speech Services integration with enhanced TTS",
+  "type": "module",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "files": ["dist"],
+  "exports": {
+    ".": {
+      "import": "./dist/index.js",
+      "types": "./dist/index.d.ts"
+    }
+  },
+  "scripts": {
+    "build": "tsc",
+    "dev": "tsc --watch",
+    "clean": "rm -rf dist"
+  },
+  "dependencies": {
+    "microsoft-cognitiveservices-speech-sdk": "^1.42.0"
+  },
+  "devDependencies": {
+    "@types/node": "^22.10.2",
+    "@voicepilot/typescript-config": "workspace:*",
+    "typescript": "^5.7.2"
+  },
+  "publishConfig": {
+    "access": "public"
+  }
+}