@mastra/voice-openai 0.1.2-alpha.1 → 0.1.2-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,23 @@
1
1
 
2
- > @mastra/voice-openai@0.1.2-alpha.1 build /home/runner/work/mastra/mastra/voice/openai
3
- > tsup src/index.ts --format esm --experimental-dts --clean --treeshake
2
+ > @mastra/voice-openai@0.1.2-alpha.3 build /home/runner/work/mastra/mastra/voice/openai
3
+ > tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
4
4
 
5
5
  CLI Building entry: src/index.ts
6
6
  CLI Using tsconfig: tsconfig.json
7
7
  CLI tsup v8.3.6
8
8
  TSC Build start
9
- TSC ⚡️ Build success in 7687ms
9
+ TSC ⚡️ Build success in 6267ms
10
10
  DTS Build start
11
11
  CLI Target: es2022
12
12
  Analysis will use the bundled TypeScript version 5.7.3
13
13
  Writing package typings: /home/runner/work/mastra/mastra/voice/openai/dist/_tsup-dts-rollup.d.ts
14
- DTS ⚡️ Build success in 6240ms
14
+ Analysis will use the bundled TypeScript version 5.7.3
15
+ Writing package typings: /home/runner/work/mastra/mastra/voice/openai/dist/_tsup-dts-rollup.d.cts
16
+ DTS ⚡️ Build success in 10839ms
15
17
  CLI Cleaning output folder
16
18
  ESM Build start
19
+ CJS Build start
17
20
  ESM dist/index.js 5.80 KB
18
- ESM ⚡️ Build success in 308ms
21
+ ESM ⚡️ Build success in 540ms
22
+ CJS dist/index.cjs 6.00 KB
23
+ CJS ⚡️ Build success in 530ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # @mastra/voice-openai
2
2
 
3
+ ## 0.1.2-alpha.3
4
+
5
+ ### Patch Changes
6
+
7
+ - bb4f447: Add support for commonjs
8
+ - Updated dependencies [0fd78ac]
9
+ - Updated dependencies [0d25b75]
10
+ - Updated dependencies [fd14a3f]
11
+ - Updated dependencies [3f369a2]
12
+ - Updated dependencies [4d4e1e1]
13
+ - Updated dependencies [bb4f447]
14
+ - @mastra/core@0.4.3-alpha.3
15
+
16
+ ## 0.1.2-alpha.2
17
+
18
+ ### Patch Changes
19
+
20
+ - Updated dependencies [2512a93]
21
+ - Updated dependencies [e62de74]
22
+ - @mastra/core@0.4.3-alpha.2
23
+
3
24
  ## 0.1.2-alpha.1
4
25
 
5
26
  ### Patch Changes
@@ -0,0 +1,83 @@
1
+ import { MastraVoice } from '@mastra/core/voice';
2
+ import OpenAI from 'openai';
3
+
4
+ export declare interface OpenAIConfig {
5
+ name?: OpenAIModel;
6
+ apiKey?: string;
7
+ }
8
+
9
+ declare type OpenAIModel = 'tts-1' | 'tts-1-hd' | 'whisper-1';
10
+
11
+ export declare class OpenAIVoice extends MastraVoice {
12
+ speechClient?: OpenAI;
13
+ listeningClient?: OpenAI;
14
+ /**
15
+ * Constructs an instance of OpenAIVoice with optional configurations for speech and listening models.
16
+ *
17
+ * @param {Object} [config] - Configuration options for the OpenAIVoice instance.
18
+ * @param {OpenAIConfig} [config.listeningModel] - Configuration for the listening model, including model name and API key.
19
+ * @param {OpenAIConfig} [config.speechModel] - Configuration for the speech model, including model name and API key.
20
+ * @param {string} [config.speaker] - The default speaker's voice to use for speech synthesis.
21
+ * @throws {Error} - Throws an error if no API key is provided for either the speech or listening model.
22
+ */
23
+ constructor({ listeningModel, speechModel, speaker, }?: {
24
+ listeningModel?: OpenAIConfig;
25
+ speechModel?: OpenAIConfig;
26
+ speaker?: string;
27
+ });
28
+ /**
29
+ * Retrieves a list of available speakers for the speech model.
30
+ *
31
+ * @returns {Promise<Array<{ voiceId: OpenAIVoiceId }>>} - A promise that resolves to an array of objects,
32
+ * each containing a `voiceId` representing an available speaker.
33
+ * @throws {Error} - Throws an error if the speech model is not configured.
34
+ */
35
+ getSpeakers(): Promise<Array<{
36
+ voiceId: OpenAIVoiceId;
37
+ }>>;
38
+ /**
39
+ * Converts text or audio input into speech using the configured speech model.
40
+ *
41
+ * @param {string | NodeJS.ReadableStream} input - The text or audio stream to be converted into speech.
42
+ * @param {Object} [options] - Optional parameters for the speech synthesis.
43
+ * @param {string} [options.speaker] - The speaker's voice to use for the speech synthesis.
44
+ * @param {number} [options.speed] - The speed at which the speech should be synthesized.
45
+ * @returns {Promise<NodeJS.ReadableStream>} - A promise that resolves to a readable stream of the synthesized audio.
46
+ * @throws {Error} - Throws an error if the speech model is not configured or if the input text is empty.
47
+ */
48
+ speak(input: string | NodeJS.ReadableStream, options?: {
49
+ speaker?: string;
50
+ speed?: number;
51
+ [key: string]: any;
52
+ }): Promise<NodeJS.ReadableStream>;
53
+ /**
54
+ * Transcribes audio from a given stream using the configured listening model.
55
+ *
56
+ * @param {NodeJS.ReadableStream} audioStream - The audio stream to be transcribed.
57
+ * @param {Object} [options] - Optional parameters for the transcription.
58
+ * @param {string} [options.filetype] - The file type of the audio stream.
59
+ * Supported types include 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'.
60
+ * @returns {Promise<string>} - A promise that resolves to the transcribed text.
61
+ * @throws {Error} - Throws an error if the listening model is not configured.
62
+ */
63
+ listen(audioStream: NodeJS.ReadableStream, options?: {
64
+ filetype?: 'mp3' | 'mp4' | 'mpeg' | 'mpga' | 'm4a' | 'wav' | 'webm';
65
+ [key: string]: any;
66
+ }): Promise<string>;
67
+ }
68
+
69
+ export declare interface OpenAIVoiceConfig {
70
+ speech?: {
71
+ model: 'tts-1' | 'tts-1-hd';
72
+ apiKey?: string;
73
+ speaker?: OpenAIVoiceId;
74
+ };
75
+ listening?: {
76
+ model: 'whisper-1';
77
+ apiKey?: string;
78
+ };
79
+ }
80
+
81
+ declare type OpenAIVoiceId = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer' | 'ash' | 'coral' | 'sage';
82
+
83
+ export { }
package/dist/index.cjs ADDED
@@ -0,0 +1,157 @@
1
+ 'use strict';
2
+
3
+ var stream = require('stream');
4
+ var voice = require('@mastra/core/voice');
5
+ var OpenAI = require('openai');
6
+
7
+ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
8
+
9
+ var OpenAI__default = /*#__PURE__*/_interopDefault(OpenAI);
10
+
11
+ // src/index.ts
12
+ var OpenAIVoice = class extends voice.MastraVoice {
13
+ speechClient;
14
+ listeningClient;
15
+ /**
16
+ * Constructs an instance of OpenAIVoice with optional configurations for speech and listening models.
17
+ *
18
+ * @param {Object} [config] - Configuration options for the OpenAIVoice instance.
19
+ * @param {OpenAIConfig} [config.listeningModel] - Configuration for the listening model, including model name and API key.
20
+ * @param {OpenAIConfig} [config.speechModel] - Configuration for the speech model, including model name and API key.
21
+ * @param {string} [config.speaker] - The default speaker's voice to use for speech synthesis.
22
+ * @throws {Error} - Throws an error if no API key is provided for either the speech or listening model.
23
+ */
24
+ constructor({
25
+ listeningModel,
26
+ speechModel,
27
+ speaker
28
+ } = {}) {
29
+ const defaultApiKey = process.env.OPENAI_API_KEY;
30
+ const defaultSpeechModel = {
31
+ name: "tts-1",
32
+ apiKey: defaultApiKey
33
+ };
34
+ const defaultListeningModel = {
35
+ name: "whisper-1",
36
+ apiKey: defaultApiKey
37
+ };
38
+ super({
39
+ speechModel: {
40
+ name: speechModel?.name ?? defaultSpeechModel.name,
41
+ apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey
42
+ },
43
+ listeningModel: {
44
+ name: listeningModel?.name ?? defaultListeningModel.name,
45
+ apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey
46
+ },
47
+ speaker: speaker ?? "alloy"
48
+ });
49
+ const speechApiKey = speechModel?.apiKey || defaultApiKey;
50
+ if (!speechApiKey) {
51
+ throw new Error("No API key provided for speech model");
52
+ }
53
+ this.speechClient = new OpenAI__default.default({ apiKey: speechApiKey });
54
+ const listeningApiKey = listeningModel?.apiKey || defaultApiKey;
55
+ if (!listeningApiKey) {
56
+ throw new Error("No API key provided for listening model");
57
+ }
58
+ this.listeningClient = new OpenAI__default.default({ apiKey: listeningApiKey });
59
+ if (!this.speechClient && !this.listeningClient) {
60
+ throw new Error("At least one of OPENAI_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set");
61
+ }
62
+ }
63
+ /**
64
+ * Retrieves a list of available speakers for the speech model.
65
+ *
66
+ * @returns {Promise<Array<{ voiceId: OpenAIVoiceId }>>} - A promise that resolves to an array of objects,
67
+ * each containing a `voiceId` representing an available speaker.
68
+ * @throws {Error} - Throws an error if the speech model is not configured.
69
+ */
70
+ async getSpeakers() {
71
+ if (!this.speechModel) {
72
+ throw new Error("Speech model not configured");
73
+ }
74
+ return [
75
+ { voiceId: "alloy" },
76
+ { voiceId: "echo" },
77
+ { voiceId: "fable" },
78
+ { voiceId: "onyx" },
79
+ { voiceId: "nova" },
80
+ { voiceId: "shimmer" },
81
+ { voiceId: "ash" },
82
+ { voiceId: "coral" },
83
+ { voiceId: "sage" }
84
+ ];
85
+ }
86
+ /**
87
+ * Converts text or audio input into speech using the configured speech model.
88
+ *
89
+ * @param {string | NodeJS.ReadableStream} input - The text or audio stream to be converted into speech.
90
+ * @param {Object} [options] - Optional parameters for the speech synthesis.
91
+ * @param {string} [options.speaker] - The speaker's voice to use for the speech synthesis.
92
+ * @param {number} [options.speed] - The speed at which the speech should be synthesized.
93
+ * @returns {Promise<NodeJS.ReadableStream>} - A promise that resolves to a readable stream of the synthesized audio.
94
+ * @throws {Error} - Throws an error if the speech model is not configured or if the input text is empty.
95
+ */
96
+ async speak(input, options) {
97
+ if (!this.speechClient) {
98
+ throw new Error("Speech model not configured");
99
+ }
100
+ if (typeof input !== "string") {
101
+ const chunks = [];
102
+ for await (const chunk of input) {
103
+ chunks.push(Buffer.from(chunk));
104
+ }
105
+ input = Buffer.concat(chunks).toString("utf-8");
106
+ }
107
+ if (input.trim().length === 0) {
108
+ throw new Error("Input text is empty");
109
+ }
110
+ const audio = await this.traced(async () => {
111
+ const response = await this.speechClient.audio.speech.create({
112
+ model: this.speechModel?.name ?? "tts-1",
113
+ voice: options?.speaker ?? this.speaker,
114
+ input,
115
+ speed: options?.speed || 1
116
+ });
117
+ const passThrough = new stream.PassThrough();
118
+ const buffer = Buffer.from(await response.arrayBuffer());
119
+ passThrough.end(buffer);
120
+ return passThrough;
121
+ }, "voice.openai.speak")();
122
+ return audio;
123
+ }
124
+ /**
125
+ * Transcribes audio from a given stream using the configured listening model.
126
+ *
127
+ * @param {NodeJS.ReadableStream} audioStream - The audio stream to be transcribed.
128
+ * @param {Object} [options] - Optional parameters for the transcription.
129
+ * @param {string} [options.filetype] - The file type of the audio stream.
130
+ * Supported types include 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'.
131
+ * @returns {Promise<string>} - A promise that resolves to the transcribed text.
132
+ * @throws {Error} - Throws an error if the listening model is not configured.
133
+ */
134
+ async listen(audioStream, options) {
135
+ if (!this.listeningClient) {
136
+ throw new Error("Listening model not configured");
137
+ }
138
+ const chunks = [];
139
+ for await (const chunk of audioStream) {
140
+ chunks.push(Buffer.from(chunk));
141
+ }
142
+ const audioBuffer = Buffer.concat(chunks);
143
+ const text = await this.traced(async () => {
144
+ const { filetype, ...otherOptions } = options || {};
145
+ const file = new File([audioBuffer], `audio.${filetype || "mp3"}`);
146
+ const response = await this.listeningClient.audio.transcriptions.create({
147
+ model: this.listeningModel?.name || "whisper-1",
148
+ file,
149
+ ...otherOptions
150
+ });
151
+ return response.text;
152
+ }, "voice.openai.listen")();
153
+ return text;
154
+ }
155
+ };
156
+
157
+ exports.OpenAIVoice = OpenAIVoice;
@@ -0,0 +1,3 @@
1
+ export { OpenAIConfig } from './_tsup-dts-rollup.cjs';
2
+ export { OpenAIVoiceConfig } from './_tsup-dts-rollup.cjs';
3
+ export { OpenAIVoice } from './_tsup-dts-rollup.cjs';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/voice-openai",
3
- "version": "0.1.2-alpha.1",
3
+ "version": "0.1.2-alpha.3",
4
4
  "description": "Mastra OpenAI speech integration",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -10,6 +10,10 @@
10
10
  "import": {
11
11
  "types": "./dist/index.d.ts",
12
12
  "default": "./dist/index.js"
13
+ },
14
+ "require": {
15
+ "types": "./dist/index.d.cts",
16
+ "default": "./dist/index.cjs"
13
17
  }
14
18
  },
15
19
  "./package.json": "./package.json"
@@ -17,7 +21,7 @@
17
21
  "dependencies": {
18
22
  "openai": "^4.28.0",
19
23
  "zod": "^3.24.1",
20
- "@mastra/core": "^0.4.3-alpha.1"
24
+ "@mastra/core": "^0.4.3-alpha.3"
21
25
  },
22
26
  "devDependencies": {
23
27
  "@microsoft/api-extractor": "^7.49.2",
@@ -29,7 +33,7 @@
29
33
  "@internal/lint": "0.0.0"
30
34
  },
31
35
  "scripts": {
32
- "build": "tsup src/index.ts --format esm --experimental-dts --clean --treeshake",
36
+ "build": "tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake",
33
37
  "build:watch": "pnpm build --watch",
34
38
  "test": "vitest run",
35
39
  "lint": "eslint ."