@mastra/voice-azure 0.10.9 → 0.10.10-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/package.json +18 -5
- package/.turbo/turbo-build.log +0 -4
- package/eslint.config.js +0 -6
- package/src/index.test.ts +0 -167
- package/src/index.ts +0 -238
- package/src/voices.ts +0 -215
- package/tsconfig.build.json +0 -9
- package/tsconfig.json +0 -5
- package/tsup.config.ts +0 -17
- package/vitest.config.ts +0 -8
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# @mastra/voice-azure
|
|
2
2
|
|
|
3
|
+
## 0.10.10-alpha.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- [#7394](https://github.com/mastra-ai/mastra/pull/7394) [`f0dfcac`](https://github.com/mastra-ai/mastra/commit/f0dfcac4458bdf789b975e2d63e984f5d1e7c4d3) Thanks [@NikAiyer](https://github.com/NikAiyer)! - updated core peerdep
|
|
8
|
+
|
|
9
|
+
- Updated dependencies [[`7149d8d`](https://github.com/mastra-ai/mastra/commit/7149d8d4bdc1edf0008e0ca9b7925eb0b8b60dbe)]:
|
|
10
|
+
- @mastra/core@0.15.3-alpha.7
|
|
11
|
+
|
|
12
|
+
## 0.10.10-alpha.0
|
|
13
|
+
|
|
14
|
+
### Patch Changes
|
|
15
|
+
|
|
16
|
+
- [#7343](https://github.com/mastra-ai/mastra/pull/7343) [`de3cbc6`](https://github.com/mastra-ai/mastra/commit/de3cbc61079211431bd30487982ea3653517278e) Thanks [@LekoArts](https://github.com/LekoArts)! - Update the `package.json` file to include additional fields like `repository`, `homepage` or `files`.
|
|
17
|
+
|
|
18
|
+
- Updated dependencies [[`85ef90b`](https://github.com/mastra-ai/mastra/commit/85ef90bb2cd4ae4df855c7ac175f7d392c55c1bf), [`de3cbc6`](https://github.com/mastra-ai/mastra/commit/de3cbc61079211431bd30487982ea3653517278e)]:
|
|
19
|
+
- @mastra/core@0.15.3-alpha.5
|
|
20
|
+
|
|
3
21
|
## 0.10.9
|
|
4
22
|
|
|
5
23
|
### Patch Changes
|
package/package.json
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/voice-azure",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.10-alpha.1",
|
|
4
4
|
"description": "Mastra Azure speech integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
8
|
+
"files": [
|
|
9
|
+
"dist",
|
|
10
|
+
"CHANGELOG.md"
|
|
11
|
+
],
|
|
8
12
|
"exports": {
|
|
9
13
|
".": {
|
|
10
14
|
"import": {
|
|
@@ -29,12 +33,21 @@
|
|
|
29
33
|
"tsup": "^8.5.0",
|
|
30
34
|
"typescript": "^5.8.3",
|
|
31
35
|
"vitest": "^3.2.4",
|
|
32
|
-
"@internal/
|
|
33
|
-
"@mastra/core": "0.15.
|
|
34
|
-
"@internal/
|
|
36
|
+
"@internal/lint": "0.0.34",
|
|
37
|
+
"@mastra/core": "0.15.3-alpha.7",
|
|
38
|
+
"@internal/types-builder": "0.0.9"
|
|
35
39
|
},
|
|
36
40
|
"peerDependencies": {
|
|
37
|
-
"@mastra/core": ">=0.
|
|
41
|
+
"@mastra/core": ">=0.15.3-0 <0.16.0-0"
|
|
42
|
+
},
|
|
43
|
+
"homepage": "https://mastra.ai",
|
|
44
|
+
"repository": {
|
|
45
|
+
"type": "git",
|
|
46
|
+
"url": "git+https://github.com/mastra-ai/mastra.git",
|
|
47
|
+
"directory": "voice/azure"
|
|
48
|
+
},
|
|
49
|
+
"bugs": {
|
|
50
|
+
"url": "https://github.com/mastra-ai/mastra/issues"
|
|
38
51
|
},
|
|
39
52
|
"scripts": {
|
|
40
53
|
"build": "tsup --silent --config tsup.config.ts",
|
package/.turbo/turbo-build.log
DELETED
package/eslint.config.js
DELETED
package/src/index.test.ts
DELETED
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
import { createReadStream, writeFileSync, mkdirSync } from 'fs';
|
|
2
|
-
import { join } from 'path';
|
|
3
|
-
import { Readable } from 'stream';
|
|
4
|
-
import { describe, it, expect, beforeAll } from 'vitest';
|
|
5
|
-
|
|
6
|
-
import { AzureVoice } from './index';
|
|
7
|
-
|
|
8
|
-
describe('AzureVoice Integration Tests', () => {
|
|
9
|
-
let voice: AzureVoice;
|
|
10
|
-
const outputDir = join(process.cwd(), 'test-outputs');
|
|
11
|
-
const subscriptionKey = process.env.AZURE_API_KEY ?? 'fake-key';
|
|
12
|
-
const region = process.env.AZURE_REGION ?? 'eastus';
|
|
13
|
-
|
|
14
|
-
beforeAll(() => {
|
|
15
|
-
try {
|
|
16
|
-
mkdirSync(outputDir, { recursive: true });
|
|
17
|
-
} catch (err) {
|
|
18
|
-
// Ignore if directory already exists
|
|
19
|
-
console.log('Directory already exists:', err);
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
voice = new AzureVoice({
|
|
23
|
-
speechModel: { apiKey: subscriptionKey, region },
|
|
24
|
-
listeningModel: { apiKey: subscriptionKey, region },
|
|
25
|
-
});
|
|
26
|
-
});
|
|
27
|
-
|
|
28
|
-
describe('getSpeakers', () => {
|
|
29
|
-
it('should list available voices', async () => {
|
|
30
|
-
const voices = await voice.getSpeakers();
|
|
31
|
-
expect(voices.length).toBeGreaterThan(0);
|
|
32
|
-
expect(voices[0]).toHaveProperty('voiceId');
|
|
33
|
-
expect(voices[0]).toHaveProperty('language');
|
|
34
|
-
expect(voices[0]).toHaveProperty('region');
|
|
35
|
-
});
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
it('should initialize with default parameters', async () => {
|
|
39
|
-
const defaultVoice = new AzureVoice();
|
|
40
|
-
const voices = await defaultVoice.getSpeakers();
|
|
41
|
-
expect(voices).toBeInstanceOf(Array);
|
|
42
|
-
expect(voices.length).toBeGreaterThan(0);
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
describe('speak', () => {
|
|
46
|
-
it('should speak with default parameters', async () => {
|
|
47
|
-
const defaultVoice = new AzureVoice({
|
|
48
|
-
speechModel: { apiKey: subscriptionKey, region },
|
|
49
|
-
});
|
|
50
|
-
const audioStream = await defaultVoice.speak('Hello with defaults');
|
|
51
|
-
|
|
52
|
-
const chunks: Buffer[] = [];
|
|
53
|
-
for await (const chunk of audioStream) {
|
|
54
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
55
|
-
}
|
|
56
|
-
const audioBuffer = Buffer.concat(chunks);
|
|
57
|
-
expect(audioBuffer.length).toBeGreaterThan(0);
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
it('should generate audio stream from text', async () => {
|
|
61
|
-
const audioStream = await voice.speak('Hello World', {
|
|
62
|
-
speaker: 'en-US-AriaNeural',
|
|
63
|
-
});
|
|
64
|
-
|
|
65
|
-
const chunks: Buffer[] = [];
|
|
66
|
-
for await (const chunk of audioStream) {
|
|
67
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
68
|
-
}
|
|
69
|
-
const audioBuffer = Buffer.concat(chunks);
|
|
70
|
-
expect(audioBuffer.length).toBeGreaterThan(0);
|
|
71
|
-
|
|
72
|
-
const outputPath = join(outputDir, 'azure-speech-test.wav');
|
|
73
|
-
writeFileSync(outputPath, audioBuffer);
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
it('should work with different parameters', async () => {
|
|
77
|
-
const audioStream = await voice.speak('Test with parameters', {
|
|
78
|
-
speaker: 'en-US-JennyNeural',
|
|
79
|
-
});
|
|
80
|
-
|
|
81
|
-
const chunks: Buffer[] = [];
|
|
82
|
-
for await (const chunk of audioStream) {
|
|
83
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
84
|
-
}
|
|
85
|
-
const audioBuffer = Buffer.concat(chunks);
|
|
86
|
-
expect(audioBuffer.length).toBeGreaterThan(0);
|
|
87
|
-
|
|
88
|
-
const outputPath = join(outputDir, 'azure-speech-params.wav');
|
|
89
|
-
writeFileSync(outputPath, audioBuffer);
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
it('should accept text stream as input', async () => {
|
|
93
|
-
const inputStream = new Readable();
|
|
94
|
-
inputStream.push('Hello from stream');
|
|
95
|
-
inputStream.push(null);
|
|
96
|
-
|
|
97
|
-
const audioStream = await voice.speak(inputStream, {
|
|
98
|
-
speaker: 'en-US-AriaNeural',
|
|
99
|
-
});
|
|
100
|
-
|
|
101
|
-
const chunks: Buffer[] = [];
|
|
102
|
-
for await (const chunk of audioStream) {
|
|
103
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
104
|
-
}
|
|
105
|
-
const audioBuffer = Buffer.concat(chunks);
|
|
106
|
-
expect(audioBuffer.length).toBeGreaterThan(0);
|
|
107
|
-
|
|
108
|
-
const outputPath = join(outputDir, 'azure-speech-stream.wav');
|
|
109
|
-
writeFileSync(outputPath, audioBuffer);
|
|
110
|
-
});
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
describe('listen', () => {
|
|
114
|
-
it('should listen with default parameters', async () => {
|
|
115
|
-
const defaultVoice = new AzureVoice({
|
|
116
|
-
speechModel: { apiKey: subscriptionKey, region },
|
|
117
|
-
listeningModel: { apiKey: subscriptionKey, region },
|
|
118
|
-
});
|
|
119
|
-
const audioStream = await defaultVoice.speak('Listening test with defaults');
|
|
120
|
-
|
|
121
|
-
const text = await defaultVoice.listen(audioStream);
|
|
122
|
-
expect(text).toBeTruthy();
|
|
123
|
-
expect(typeof text).toBe('string');
|
|
124
|
-
expect(text.toLowerCase()).toContain('listening test');
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
it('should transcribe audio from file', async () => {
|
|
128
|
-
const filePath = join(outputDir, 'azure-speech-test.wav');
|
|
129
|
-
const audioStream = createReadStream(filePath);
|
|
130
|
-
|
|
131
|
-
const text = await voice.listen(audioStream);
|
|
132
|
-
expect(text).toBeTruthy();
|
|
133
|
-
expect(typeof text).toBe('string');
|
|
134
|
-
expect(text.length).toBeGreaterThan(0);
|
|
135
|
-
});
|
|
136
|
-
|
|
137
|
-
it('should transcribe audio stream', async () => {
|
|
138
|
-
const audioStream = await voice.speak('This is a test for transcription', {
|
|
139
|
-
speaker: 'en-US-AriaNeural',
|
|
140
|
-
});
|
|
141
|
-
|
|
142
|
-
const text = await voice.listen(audioStream);
|
|
143
|
-
expect(text).toBeTruthy();
|
|
144
|
-
expect(typeof text).toBe('string');
|
|
145
|
-
expect(text.toLowerCase()).toContain('test');
|
|
146
|
-
});
|
|
147
|
-
});
|
|
148
|
-
|
|
149
|
-
describe('error handling', () => {
|
|
150
|
-
it('should handle empty text', async () => {
|
|
151
|
-
await expect(voice.speak('')).rejects.toThrow('Input text is empty');
|
|
152
|
-
});
|
|
153
|
-
|
|
154
|
-
it('should handle missing API key', () => {
|
|
155
|
-
const { AZURE_API_KEY } = process.env;
|
|
156
|
-
delete process.env.AZURE_API_KEY;
|
|
157
|
-
|
|
158
|
-
expect(() => {
|
|
159
|
-
new AzureVoice({
|
|
160
|
-
speechModel: { region: 'eastus' },
|
|
161
|
-
});
|
|
162
|
-
}).toThrow('No Azure API key provided for speech model');
|
|
163
|
-
|
|
164
|
-
process.env.AZURE_API_KEY = AZURE_API_KEY;
|
|
165
|
-
});
|
|
166
|
-
});
|
|
167
|
-
});
|
package/src/index.ts
DELETED
|
@@ -1,238 +0,0 @@
|
|
|
1
|
-
import { Readable } from 'stream';
|
|
2
|
-
import { MastraVoice } from '@mastra/core/voice';
|
|
3
|
-
import * as Azure from 'microsoft-cognitiveservices-speech-sdk';
|
|
4
|
-
import { AZURE_VOICES } from './voices';
|
|
5
|
-
import type { VoiceId } from './voices';
|
|
6
|
-
|
|
7
|
-
interface AzureVoiceConfig {
|
|
8
|
-
apiKey?: string;
|
|
9
|
-
region?: string;
|
|
10
|
-
voiceName?: string;
|
|
11
|
-
language?: string;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export class AzureVoice extends MastraVoice {
|
|
15
|
-
private speechConfig?: Azure.SpeechConfig;
|
|
16
|
-
private listeningConfig?: Azure.SpeechConfig;
|
|
17
|
-
private speechSynthesizer?: Azure.SpeechSynthesizer;
|
|
18
|
-
private speechRecognizer?: Azure.SpeechRecognizer;
|
|
19
|
-
|
|
20
|
-
/**
|
|
21
|
-
* Creates a new instance of AzureVoice for text-to-speech and speech-to-text services.
|
|
22
|
-
*
|
|
23
|
-
* @param {Object} config - Configuration options
|
|
24
|
-
* @param {AzureVoiceConfig} [config.speechModel] - Configuration for text-to-speech
|
|
25
|
-
* @param {AzureVoiceConfig} [config.listeningModel] - Configuration for speech-to-text
|
|
26
|
-
* @param {VoiceId} [config.speaker] - Default voice ID for speech synthesis
|
|
27
|
-
*/
|
|
28
|
-
constructor({
|
|
29
|
-
speechModel,
|
|
30
|
-
listeningModel,
|
|
31
|
-
speaker,
|
|
32
|
-
}: {
|
|
33
|
-
speechModel?: AzureVoiceConfig;
|
|
34
|
-
listeningModel?: AzureVoiceConfig;
|
|
35
|
-
speaker?: VoiceId;
|
|
36
|
-
} = {}) {
|
|
37
|
-
super({
|
|
38
|
-
speechModel: {
|
|
39
|
-
name: '',
|
|
40
|
-
apiKey: speechModel?.apiKey ?? process.env.AZURE_API_KEY,
|
|
41
|
-
},
|
|
42
|
-
listeningModel: {
|
|
43
|
-
name: '',
|
|
44
|
-
apiKey: listeningModel?.apiKey ?? process.env.AZURE_API_KEY,
|
|
45
|
-
},
|
|
46
|
-
speaker,
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
const envApiKey = process.env.AZURE_API_KEY;
|
|
50
|
-
const envRegion = process.env.AZURE_REGION;
|
|
51
|
-
|
|
52
|
-
// Configure speech synthesis
|
|
53
|
-
if (speechModel) {
|
|
54
|
-
const apiKey = speechModel.apiKey ?? envApiKey;
|
|
55
|
-
const region = speechModel.region ?? envRegion;
|
|
56
|
-
|
|
57
|
-
if (!apiKey) throw new Error('No Azure API key provided for speech model');
|
|
58
|
-
if (!region) throw new Error('No region provided for speech model');
|
|
59
|
-
|
|
60
|
-
this.speechConfig = Azure.SpeechConfig.fromSubscription(apiKey, region);
|
|
61
|
-
this.speechConfig.speechSynthesisVoiceName = speechModel.voiceName || speaker || 'en-US-AriaNeural';
|
|
62
|
-
this.speechSynthesizer = new Azure.SpeechSynthesizer(this.speechConfig);
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
// Configure speech recognition
|
|
66
|
-
if (listeningModel) {
|
|
67
|
-
const apiKey = listeningModel.apiKey ?? envApiKey;
|
|
68
|
-
const region = listeningModel.region ?? envRegion;
|
|
69
|
-
|
|
70
|
-
if (!apiKey) throw new Error('No Azure API key provided for listening model');
|
|
71
|
-
if (!region) throw new Error('No region provided for listening model');
|
|
72
|
-
|
|
73
|
-
this.listeningConfig = Azure.SpeechConfig.fromSubscription(apiKey, region);
|
|
74
|
-
if (listeningModel.language) {
|
|
75
|
-
this.listeningConfig.speechRecognitionLanguage = listeningModel.language;
|
|
76
|
-
}
|
|
77
|
-
this.speechRecognizer = new Azure.SpeechRecognizer(this.listeningConfig);
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
/**
|
|
82
|
-
* Gets a list of available voices for speech synthesis.
|
|
83
|
-
*
|
|
84
|
-
* @returns {Promise<Array<{ voiceId: string; language: string; region: string; }>>} List of available voices
|
|
85
|
-
*/
|
|
86
|
-
async getSpeakers() {
|
|
87
|
-
return this.traced(async () => {
|
|
88
|
-
return AZURE_VOICES.map(voice => ({
|
|
89
|
-
voiceId: voice,
|
|
90
|
-
language: voice.split('-')[0],
|
|
91
|
-
region: voice.split('-')[1],
|
|
92
|
-
}));
|
|
93
|
-
}, 'voice.azure.voices')();
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
/**
|
|
97
|
-
* Converts text to speech using Azure's Text-to-Speech service.
|
|
98
|
-
*
|
|
99
|
-
* @param {string | NodeJS.ReadableStream} input - Text to convert to speech
|
|
100
|
-
* @param {Object} [options] - Optional parameters
|
|
101
|
-
* @param {string} [options.speaker] - Voice ID to use for synthesis
|
|
102
|
-
* @returns {Promise<NodeJS.ReadableStream>} Stream containing the synthesized audio
|
|
103
|
-
* @throws {Error} If speech model is not configured or synthesis fails
|
|
104
|
-
*/
|
|
105
|
-
async speak(
|
|
106
|
-
input: string | NodeJS.ReadableStream,
|
|
107
|
-
options?: {
|
|
108
|
-
speaker?: string;
|
|
109
|
-
[key: string]: any;
|
|
110
|
-
},
|
|
111
|
-
): Promise<NodeJS.ReadableStream> {
|
|
112
|
-
if (!this.speechConfig) {
|
|
113
|
-
throw new Error('Speech model (Azure) not configured');
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
// Convert stream input to string if needed
|
|
117
|
-
if (typeof input !== 'string') {
|
|
118
|
-
const chunks: Buffer[] = [];
|
|
119
|
-
try {
|
|
120
|
-
for await (const chunk of input) {
|
|
121
|
-
chunks.push(chunk as Buffer);
|
|
122
|
-
}
|
|
123
|
-
input = Buffer.concat(chunks).toString('utf-8');
|
|
124
|
-
} catch (error) {
|
|
125
|
-
throw new Error(`Failed to read input stream: ${error instanceof Error ? error.message : String(error)}`);
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
if (!input?.trim()) {
|
|
130
|
-
throw new Error('Input text is empty');
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
// Update voice if specified
|
|
134
|
-
if (options?.speaker) {
|
|
135
|
-
this.speechConfig.speechSynthesisVoiceName = options.speaker;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
const synthesizer = new Azure.SpeechSynthesizer(this.speechConfig);
|
|
139
|
-
|
|
140
|
-
try {
|
|
141
|
-
const timeoutPromise = new Promise<never>((_, reject) => {
|
|
142
|
-
setTimeout(() => reject(new Error('Speech synthesis timed out')), 5000);
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
const synthesisPromise = this.traced(
|
|
146
|
-
() =>
|
|
147
|
-
new Promise<Azure.SpeechSynthesisResult>((resolve, reject) => {
|
|
148
|
-
synthesizer.speakTextAsync(
|
|
149
|
-
input,
|
|
150
|
-
result =>
|
|
151
|
-
result.errorDetails
|
|
152
|
-
? reject(new Error(`Speech synthesis failed: ${result.errorDetails}`))
|
|
153
|
-
: resolve(result),
|
|
154
|
-
error => reject(new Error(`Speech synthesis error: ${String(error)}`)),
|
|
155
|
-
);
|
|
156
|
-
}),
|
|
157
|
-
'voice.azure.speak',
|
|
158
|
-
)();
|
|
159
|
-
|
|
160
|
-
const result = await Promise.race([synthesisPromise, timeoutPromise]);
|
|
161
|
-
synthesizer.close();
|
|
162
|
-
|
|
163
|
-
if (result.reason !== Azure.ResultReason.SynthesizingAudioCompleted) {
|
|
164
|
-
throw new Error(`Speech synthesis failed: ${result.errorDetails || result.reason}`);
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
return Readable.from([Buffer.from(result.audioData)]);
|
|
168
|
-
} catch (error) {
|
|
169
|
-
synthesizer.close();
|
|
170
|
-
throw error instanceof Error ? error : new Error(String(error));
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
/**
|
|
175
|
-
* Checks if listening capabilities are enabled.
|
|
176
|
-
*
|
|
177
|
-
* @returns {Promise<{ enabled: boolean }>}
|
|
178
|
-
*/
|
|
179
|
-
async getListener() {
|
|
180
|
-
return { enabled: true };
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
/**
|
|
184
|
-
* Transcribes audio (STT) from a Node.js stream using Azure.
|
|
185
|
-
*
|
|
186
|
-
* @param {NodeJS.ReadableStream} audioStream - The audio to be transcribed, must be in .wav format.
|
|
187
|
-
* @returns {Promise<string>} - The recognized text.
|
|
188
|
-
*/
|
|
189
|
-
async listen(audioStream: NodeJS.ReadableStream): Promise<string> {
|
|
190
|
-
if (!this.listeningConfig || !this.speechRecognizer) {
|
|
191
|
-
throw new Error('Listening model (Azure) not configured');
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
const chunks: Buffer[] = [];
|
|
195
|
-
for await (const chunk of audioStream) {
|
|
196
|
-
chunks.push(chunk as Buffer);
|
|
197
|
-
}
|
|
198
|
-
const audioData = Buffer.concat(chunks);
|
|
199
|
-
|
|
200
|
-
const pushStream = Azure.AudioInputStream.createPushStream();
|
|
201
|
-
const audioConfig = Azure.AudioConfig.fromStreamInput(pushStream);
|
|
202
|
-
const recognizer = new Azure.SpeechRecognizer(this.listeningConfig, audioConfig);
|
|
203
|
-
|
|
204
|
-
try {
|
|
205
|
-
const recognitionPromise = new Promise<string>((resolve, reject) => {
|
|
206
|
-
recognizer.recognizeOnceAsync(
|
|
207
|
-
result => {
|
|
208
|
-
if (result.reason === Azure.ResultReason.RecognizedSpeech) {
|
|
209
|
-
resolve(result.text);
|
|
210
|
-
} else {
|
|
211
|
-
const reason = Azure.ResultReason[result.reason] || result.reason;
|
|
212
|
-
reject(new Error(`Speech recognition failed: ${reason} - ${result.errorDetails || ''}`));
|
|
213
|
-
}
|
|
214
|
-
},
|
|
215
|
-
error => reject(new Error(`Speech recognition error: ${String(error)}`)),
|
|
216
|
-
);
|
|
217
|
-
});
|
|
218
|
-
|
|
219
|
-
const chunkSize = 4096;
|
|
220
|
-
for (let i = 0; i < audioData.length; i += chunkSize) {
|
|
221
|
-
const chunk = audioData.slice(i, i + chunkSize);
|
|
222
|
-
pushStream.write(chunk);
|
|
223
|
-
}
|
|
224
|
-
pushStream.close();
|
|
225
|
-
|
|
226
|
-
const text = await this.traced(() => recognitionPromise, 'voice.azure.listen')();
|
|
227
|
-
|
|
228
|
-
return text;
|
|
229
|
-
} catch (error: unknown) {
|
|
230
|
-
if (error instanceof Error) {
|
|
231
|
-
throw error;
|
|
232
|
-
}
|
|
233
|
-
throw new Error(String(error));
|
|
234
|
-
} finally {
|
|
235
|
-
recognizer.close();
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
}
|
package/src/voices.ts
DELETED
|
@@ -1,215 +0,0 @@
|
|
|
1
|
-
export const AZURE_VOICES = [
|
|
2
|
-
'af-ZA-AdriNeural',
|
|
3
|
-
'af-ZA-WillemNeural',
|
|
4
|
-
'am-ET-MekdesNeural',
|
|
5
|
-
'am-ET-AmehaNeural',
|
|
6
|
-
'ar-AE-FatimaNeural',
|
|
7
|
-
'ar-AE-HamdanNeural',
|
|
8
|
-
'ar-BH-LailaNeural',
|
|
9
|
-
'ar-BH-AliNeural',
|
|
10
|
-
'ar-DZ-AminaNeural',
|
|
11
|
-
'ar-DZ-IsmaelNeural',
|
|
12
|
-
'ar-EG-SalmaNeural',
|
|
13
|
-
'ar-EG-ShakirNeural',
|
|
14
|
-
'ar-IQ-RanaNeural',
|
|
15
|
-
'ar-IQ-BasselNeural',
|
|
16
|
-
'ar-JO-SanaNeural',
|
|
17
|
-
'ar-JO-TaimNeural',
|
|
18
|
-
'ar-KW-NouraNeural',
|
|
19
|
-
'ar-KW-FahedNeural',
|
|
20
|
-
'ar-LB-LaylaNeural',
|
|
21
|
-
'ar-LB-RamiNeural',
|
|
22
|
-
'ar-LY-ImanNeural',
|
|
23
|
-
'ar-LY-OmarNeural',
|
|
24
|
-
'ar-MA-MounaNeural',
|
|
25
|
-
'ar-MA-JamalNeural',
|
|
26
|
-
'ar-OM-AyshaNeural',
|
|
27
|
-
'ar-OM-AbdullahNeural',
|
|
28
|
-
'ar-QA-AmalNeural',
|
|
29
|
-
'ar-QA-MoazNeural',
|
|
30
|
-
'ar-SA-ZariyahNeural',
|
|
31
|
-
'ar-SA-HamedNeural',
|
|
32
|
-
'ar-SY-AmanyNeural',
|
|
33
|
-
'ar-SY-LaithNeural',
|
|
34
|
-
'ar-TN-ReemNeural',
|
|
35
|
-
'ar-TN-HediNeural',
|
|
36
|
-
'ar-YE-MaryamNeural',
|
|
37
|
-
'ar-YE-SalehNeural',
|
|
38
|
-
'as-IN-YashicaNeural',
|
|
39
|
-
'as-IN-PriyomNeural',
|
|
40
|
-
'az-AZ-BanuNeural',
|
|
41
|
-
'az-AZ-BabekNeural',
|
|
42
|
-
'bg-BG-KalinaNeural',
|
|
43
|
-
'bg-BG-BorislavNeural',
|
|
44
|
-
'bn-BD-NabanitaNeural',
|
|
45
|
-
'bn-BD-PradeepNeural',
|
|
46
|
-
'bn-IN-TanishaaNeural',
|
|
47
|
-
'bn-IN-BashkarNeural',
|
|
48
|
-
'bs-BA-VesnaNeural',
|
|
49
|
-
'bs-BA-GoranNeural',
|
|
50
|
-
'ca-ES-JoanaNeural',
|
|
51
|
-
'ca-ES-EnricNeural',
|
|
52
|
-
'ca-ES-AlbaNeural',
|
|
53
|
-
'cs-CZ-VlastaNeural',
|
|
54
|
-
'cs-CZ-AntoninNeural',
|
|
55
|
-
'cy-GB-NiaNeural',
|
|
56
|
-
'cy-GB-AledNeural',
|
|
57
|
-
'da-DK-ChristelNeural',
|
|
58
|
-
'da-DK-JeppeNeural',
|
|
59
|
-
'de-AT-IngridNeural',
|
|
60
|
-
'de-AT-JonasNeural',
|
|
61
|
-
'de-CH-LeniNeural',
|
|
62
|
-
'de-CH-JanNeural',
|
|
63
|
-
'de-DE-KatjaNeural',
|
|
64
|
-
'de-DE-ConradNeural',
|
|
65
|
-
'de-DE-SeraphinaMultilingualNeural',
|
|
66
|
-
'de-DE-FlorianMultilingualNeural',
|
|
67
|
-
'de-DE-AmalaNeural',
|
|
68
|
-
'de-DE-BerndNeural',
|
|
69
|
-
'de-DE-ChristophNeural',
|
|
70
|
-
'de-DE-ElkeNeural',
|
|
71
|
-
'de-DE-GiselaNeural',
|
|
72
|
-
'de-DE-KasperNeural',
|
|
73
|
-
'de-DE-KillianNeural',
|
|
74
|
-
'de-DE-KlarissaNeural',
|
|
75
|
-
'de-DE-KlausNeural',
|
|
76
|
-
'de-DE-LouisaNeural',
|
|
77
|
-
'de-DE-MajaNeural',
|
|
78
|
-
'de-DE-RalfNeural',
|
|
79
|
-
'de-DE-TanjaNeural',
|
|
80
|
-
'de-DE-Seraphina:DragonHDLatestNeural',
|
|
81
|
-
'el-GR-AthinaNeural',
|
|
82
|
-
'el-GR-NestorasNeural',
|
|
83
|
-
'en-AU-NatashaNeural',
|
|
84
|
-
'en-AU-WilliamNeural',
|
|
85
|
-
'en-AU-AnnetteNeural',
|
|
86
|
-
'en-AU-CarlyNeural',
|
|
87
|
-
'en-AU-DarrenNeural',
|
|
88
|
-
'en-AU-DuncanNeural',
|
|
89
|
-
'en-AU-ElsieNeural',
|
|
90
|
-
'en-AU-FreyaNeural',
|
|
91
|
-
'en-AU-JoanneNeural',
|
|
92
|
-
'en-AU-KenNeural',
|
|
93
|
-
'en-AU-KimNeural',
|
|
94
|
-
'en-AU-NeilNeural',
|
|
95
|
-
'en-AU-TimNeural',
|
|
96
|
-
'en-AU-TinaNeural',
|
|
97
|
-
'en-CA-ClaraNeural',
|
|
98
|
-
'en-CA-LiamNeural',
|
|
99
|
-
'en-GB-SoniaNeural',
|
|
100
|
-
'en-GB-RyanNeural',
|
|
101
|
-
'en-GB-LibbyNeural',
|
|
102
|
-
'en-GB-AdaMultilingualNeural',
|
|
103
|
-
'en-GB-OllieMultilingualNeural',
|
|
104
|
-
'en-GB-AbbiNeural',
|
|
105
|
-
'en-GB-AlfieNeural',
|
|
106
|
-
'en-GB-BellaNeural',
|
|
107
|
-
'en-GB-ElliotNeural',
|
|
108
|
-
'en-GB-EthanNeural',
|
|
109
|
-
'en-GB-HollieNeural',
|
|
110
|
-
'en-GB-MaisieNeural',
|
|
111
|
-
'en-GB-NoahNeural',
|
|
112
|
-
'en-GB-OliverNeural',
|
|
113
|
-
'en-GB-OliviaNeural',
|
|
114
|
-
'en-GB-ThomasNeural',
|
|
115
|
-
'en-GB-MiaNeural',
|
|
116
|
-
'en-HK-YanNeural',
|
|
117
|
-
'en-HK-SamNeural',
|
|
118
|
-
'en-IE-EmilyNeural',
|
|
119
|
-
'en-IE-ConnorNeural',
|
|
120
|
-
'en-IN-AaravNeural',
|
|
121
|
-
'en-IN-AashiNeural',
|
|
122
|
-
'en-IN-AnanyaNeural',
|
|
123
|
-
'en-IN-KavyaNeural',
|
|
124
|
-
'en-IN-KunalNeural',
|
|
125
|
-
'en-IN-NeerjaNeural',
|
|
126
|
-
'en-IN-PrabhatNeural',
|
|
127
|
-
'en-IN-RehaanNeural',
|
|
128
|
-
'en-IN-AartiNeural',
|
|
129
|
-
'en-IN-ArjunNeural',
|
|
130
|
-
'en-KE-AsiliaNeural',
|
|
131
|
-
'en-KE-ChilembaNeural',
|
|
132
|
-
'en-NG-EzinneNeural',
|
|
133
|
-
'en-NG-AbeoNeural',
|
|
134
|
-
'en-NZ-MollyNeural',
|
|
135
|
-
'en-NZ-MitchellNeural',
|
|
136
|
-
'en-PH-RosaNeural',
|
|
137
|
-
'en-PH-JamesNeural',
|
|
138
|
-
'en-SG-LunaNeural',
|
|
139
|
-
'en-SG-WayneNeural',
|
|
140
|
-
'en-TZ-ImaniNeural',
|
|
141
|
-
'en-TZ-ElimuNeural',
|
|
142
|
-
'en-US-AvaMultilingualNeural',
|
|
143
|
-
'en-US-AndrewMultilingualNeural',
|
|
144
|
-
'en-US-EmmaMultilingualNeural',
|
|
145
|
-
'en-US-BrianMultilingualNeural',
|
|
146
|
-
'en-US-AvaNeural',
|
|
147
|
-
'en-US-AndrewNeural',
|
|
148
|
-
'en-US-EmmaNeural',
|
|
149
|
-
'en-US-BrianNeural',
|
|
150
|
-
'en-US-JennyNeural',
|
|
151
|
-
'en-US-GuyNeural',
|
|
152
|
-
'en-US-AriaNeural',
|
|
153
|
-
'en-US-DavisNeural',
|
|
154
|
-
'en-US-JaneNeural',
|
|
155
|
-
'en-US-JasonNeural',
|
|
156
|
-
'en-US-KaiNeural',
|
|
157
|
-
'en-US-LunaNeural',
|
|
158
|
-
'en-US-SaraNeural',
|
|
159
|
-
'en-US-TonyNeural',
|
|
160
|
-
'en-US-NancyNeural',
|
|
161
|
-
'en-US-CoraMultilingualNeural',
|
|
162
|
-
'en-US-ChristopherMultilingualNeural',
|
|
163
|
-
'en-US-BrandonMultilingualNeural',
|
|
164
|
-
'en-US-AmberNeural',
|
|
165
|
-
'en-US-AnaNeural',
|
|
166
|
-
'en-US-AshleyNeural',
|
|
167
|
-
'en-US-BrandonNeural',
|
|
168
|
-
'en-US-ChristopherNeural',
|
|
169
|
-
'en-US-CoraNeural',
|
|
170
|
-
'en-US-ElizabethNeural',
|
|
171
|
-
'en-US-EricNeural',
|
|
172
|
-
'en-US-JacobNeural',
|
|
173
|
-
'en-US-JennyMultilingualNeural',
|
|
174
|
-
'en-US-MichelleNeural',
|
|
175
|
-
'en-US-MonicaNeural',
|
|
176
|
-
'en-US-RogerNeural',
|
|
177
|
-
'en-US-RyanMultilingualNeural',
|
|
178
|
-
'en-US-SteffanNeural',
|
|
179
|
-
'en-US-AdamMultilingualNeural',
|
|
180
|
-
'en-US-AIGenerate1Neural',
|
|
181
|
-
'en-US-AIGenerate2Neural',
|
|
182
|
-
'en-US-AlloyTurboMultilingualNeural',
|
|
183
|
-
'en-US-AmandaMultilingualNeural',
|
|
184
|
-
'en-US-BlueNeural',
|
|
185
|
-
'en-US-DavisMultilingualNeural',
|
|
186
|
-
'en-US-DerekMultilingualNeural',
|
|
187
|
-
'en-US-DustinMultilingualNeural',
|
|
188
|
-
'en-US-EchoTurboMultilingualNeural',
|
|
189
|
-
'en-US-EvelynMultilingualNeural',
|
|
190
|
-
'en-US-FableTurboMultilingualNeural',
|
|
191
|
-
'en-US-LewisMultilingualNeural',
|
|
192
|
-
'en-US-LolaMultilingualNeural',
|
|
193
|
-
'en-US-NancyMultilingualNeural',
|
|
194
|
-
'en-US-NovaTurboMultilingualNeural',
|
|
195
|
-
'en-US-OnyxTurboMultilingualNeural',
|
|
196
|
-
'en-US-PhoebeMultilingualNeural',
|
|
197
|
-
'en-US-SamuelMultilingualNeural',
|
|
198
|
-
'en-US-SerenaMultilingualNeural',
|
|
199
|
-
'en-US-ShimmerTurboMultilingualNeural',
|
|
200
|
-
'en-US-SteffanMultilingualNeural',
|
|
201
|
-
'en-US-Andrew:DragonHDLatestNeural',
|
|
202
|
-
'en-US-Andrew2:DragonHDLatestNeural',
|
|
203
|
-
'en-US-Aria:DragonHDLatestNeural',
|
|
204
|
-
'en-US-Ava:DragonHDLatestNeural',
|
|
205
|
-
'en-US-Brian:DragonHDLatestNeural',
|
|
206
|
-
'en-US-Davis:DragonHDLatestNeural',
|
|
207
|
-
'en-US-Emma:DragonHDLatestNeural',
|
|
208
|
-
'en-US-Emma2:DragonHDLatestNeural',
|
|
209
|
-
'en-US-Jenny:DragonHDLatestNeural',
|
|
210
|
-
'en-US-Steffan:DragonHDLatestNeural',
|
|
211
|
-
'en-ZA-LeahNeural',
|
|
212
|
-
'en-ZA-LukeNeural',
|
|
213
|
-
] as const;
|
|
214
|
-
|
|
215
|
-
export type VoiceId = (typeof AZURE_VOICES)[number];
|
package/tsconfig.build.json
DELETED
package/tsconfig.json
DELETED
package/tsup.config.ts
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
import { generateTypes } from '@internal/types-builder';
|
|
2
|
-
import { defineConfig } from 'tsup';
|
|
3
|
-
|
|
4
|
-
export default defineConfig({
|
|
5
|
-
entry: ['src/index.ts'],
|
|
6
|
-
format: ['esm', 'cjs'],
|
|
7
|
-
clean: true,
|
|
8
|
-
dts: false,
|
|
9
|
-
splitting: true,
|
|
10
|
-
treeshake: {
|
|
11
|
-
preset: 'smallest',
|
|
12
|
-
},
|
|
13
|
-
sourcemap: true,
|
|
14
|
-
onSuccess: async () => {
|
|
15
|
-
await generateTypes(process.cwd());
|
|
16
|
-
},
|
|
17
|
-
});
|