@mastra/voice-murf 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_ai-sdk-v5/dist/index.d.ts +8888 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/base/index.d.ts +31 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/logger/index.d.ts +217 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/request-context/index.d.ts +147 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/types/index.d.ts +3 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/ZodError.d.ts +164 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/errors.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/external.d.ts +6 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/enumUtil.d.ts +8 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/errorUtil.d.ts +9 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/parseUtil.d.ts +78 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/partialUtil.d.ts +8 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/typeAliases.d.ts +2 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/util.d.ts +85 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/index.d.cts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/index.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/locales/en.d.ts +3 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/standard-schema.d.ts +102 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/types.d.ts +1034 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/checks.d.ts +1 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/coerce.d.ts +17 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/compat.d.ts +50 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/errors.d.ts +30 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/external.d.ts +16 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/from-json-schema.d.ts +12 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/index.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/iso.d.ts +22 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/parse.d.ts +31 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/schemas.d.ts +767 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/api.d.ts +325 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/checks.d.ts +278 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/core.d.ts +70 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/doc.d.ts +14 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/errors.d.ts +221 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/index.d.ts +16 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema-generator.d.ts +65 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema-processors.d.ts +49 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema.d.ts +88 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/parse.d.ts +49 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/regexes.d.ts +85 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/registries.d.ts +35 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/schemas.d.ts +1184 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/standard-schema.d.ts +126 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/to-json-schema.d.ts +114 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/util.d.ts +200 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/versions.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/index.d.cts +3 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ar.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/az.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/be.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/bg.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ca.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/cs.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/da.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/de.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/el.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/en.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/eo.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/es.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fa.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fi.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fr-CA.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fr.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/he.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hr.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hu.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hy.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/id.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/index.d.ts +52 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/is.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/it.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ja.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ka.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/kh.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/km.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ko.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/lt.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/mk.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ms.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/nl.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/no.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ota.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/pl.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ps.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/pt.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ro.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ru.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/sl.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/sv.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ta.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/th.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/tr.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ua.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/uk.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ur.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/uz.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/vi.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/yo.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/zh-CN.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/zh-TW.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/index.d.ts +16 -0
- package/dist/_types/@internal_voice/dist/voice/aisdk/index.d.ts +3 -0
- package/dist/_types/@internal_voice/dist/voice/aisdk/speech.d.ts +23 -0
- package/dist/_types/@internal_voice/dist/voice/aisdk/transcription.d.ts +22 -0
- package/dist/_types/@internal_voice/dist/voice/composite-voice.d.ts +72 -0
- package/dist/_types/@internal_voice/dist/voice/default-voice.d.ts +13 -0
- package/dist/_types/@internal_voice/dist/voice/index.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/voice/voice.d.ts +172 -0
- package/dist/docs/SKILL.md +1 -1
- package/dist/docs/assets/SOURCE_MAP.json +1 -1
- package/dist/docs/references/docs-agents-adding-voice.md +37 -6
- package/dist/docs/references/docs-voice-overview.md +86 -24
- package/dist/index.cjs +260 -2
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +259 -1
- package/dist/index.js.map +1 -1
- package/package.json +4 -5
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export * from './voice/index.js';
|
|
2
|
+
export type { RequestContext } from './_types/@internal_core/dist/request-context/index.d.ts';
|
|
3
|
+
export type { ToolsInput } from './_types/@internal_core/dist/types/index.d.ts';
|
|
4
|
+
type InferToolInput<TInputSchema> = TInputSchema extends {
|
|
5
|
+
_input: infer TInput;
|
|
6
|
+
} ? TInput : unknown;
|
|
7
|
+
type VoiceToolExecute<TInputSchema, TContext, TOutput> = (input: InferToolInput<TInputSchema>, context: TContext) => TOutput | Promise<TOutput>;
|
|
8
|
+
export type VoiceTool<TId extends string = string, TInputSchema = unknown, TOutputSchema = unknown, TContext = unknown, TOutput = unknown> = {
|
|
9
|
+
id: TId;
|
|
10
|
+
description: string;
|
|
11
|
+
inputSchema?: TInputSchema;
|
|
12
|
+
outputSchema?: TOutputSchema;
|
|
13
|
+
execute?: VoiceToolExecute<TInputSchema, TContext, TOutput>;
|
|
14
|
+
};
|
|
15
|
+
export declare function createTool<TId extends string, TInputSchema = unknown, TOutputSchema = unknown, TContext = unknown, TOutput = unknown>(opts: VoiceTool<TId, TInputSchema, TOutputSchema, TContext, TOutput>): VoiceTool<TId, TInputSchema, TOutputSchema, TContext, TOutput>;
|
|
16
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { SpeechModel } from '../../_types/@internal_ai-sdk-v5/dist/index.d.ts';
|
|
2
|
+
import { MastraVoice } from '../voice.js';
|
|
3
|
+
export declare class AISDKSpeech extends MastraVoice {
|
|
4
|
+
private model;
|
|
5
|
+
private defaultVoice?;
|
|
6
|
+
constructor(model: SpeechModel, options?: {
|
|
7
|
+
voice?: string;
|
|
8
|
+
});
|
|
9
|
+
speak(input: string | NodeJS.ReadableStream, options?: {
|
|
10
|
+
speaker?: string;
|
|
11
|
+
language?: string;
|
|
12
|
+
providerOptions?: Record<string, any>;
|
|
13
|
+
abortSignal?: AbortSignal;
|
|
14
|
+
headers?: Record<string, string>;
|
|
15
|
+
}): Promise<NodeJS.ReadableStream>;
|
|
16
|
+
listen(): Promise<string>;
|
|
17
|
+
getSpeakers(): Promise<never[]>;
|
|
18
|
+
getListener(): Promise<{
|
|
19
|
+
enabled: boolean;
|
|
20
|
+
}>;
|
|
21
|
+
private streamToText;
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=speech.d.ts.map
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { TranscriptionModel } from '../../_types/@internal_ai-sdk-v5/dist/index.d.ts';
|
|
2
|
+
import { MastraVoice } from '../voice.js';
|
|
3
|
+
export declare class AISDKTranscription extends MastraVoice {
|
|
4
|
+
private model;
|
|
5
|
+
constructor(model: TranscriptionModel);
|
|
6
|
+
speak(): Promise<NodeJS.ReadableStream>;
|
|
7
|
+
getSpeakers(): Promise<never[]>;
|
|
8
|
+
getListener(): Promise<{
|
|
9
|
+
enabled: boolean;
|
|
10
|
+
}>;
|
|
11
|
+
/**
|
|
12
|
+
* Transcribe audio to text
|
|
13
|
+
* For enhanced metadata (segments, language, duration), use AI SDK's transcribe() directly
|
|
14
|
+
*/
|
|
15
|
+
listen(audioStream: NodeJS.ReadableStream, options?: {
|
|
16
|
+
providerOptions?: Record<string, any>;
|
|
17
|
+
abortSignal?: AbortSignal;
|
|
18
|
+
headers?: Record<string, string>;
|
|
19
|
+
}): Promise<string>;
|
|
20
|
+
private convertToBuffer;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=transcription.d.ts.map
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import type { TranscriptionModel, SpeechModel } from '../_types/@internal_ai-sdk-v5/dist/index.d.ts';
|
|
2
|
+
import type { ToolsInput } from '../_types/@internal_core/dist/types/index.d.ts';
|
|
3
|
+
import { MastraVoice } from './voice.js';
|
|
4
|
+
import type { VoiceEventType, VoiceEventMap } from '.';
|
|
5
|
+
export declare class CompositeVoice extends MastraVoice<unknown, unknown, unknown, ToolsInput, VoiceEventMap> {
|
|
6
|
+
protected speakProvider?: MastraVoice;
|
|
7
|
+
protected listenProvider?: MastraVoice;
|
|
8
|
+
protected realtimeProvider?: MastraVoice;
|
|
9
|
+
constructor({ input, output, realtime, }: {
|
|
10
|
+
input?: MastraVoice | TranscriptionModel;
|
|
11
|
+
output?: MastraVoice | SpeechModel;
|
|
12
|
+
realtime?: MastraVoice;
|
|
13
|
+
});
|
|
14
|
+
/**
|
|
15
|
+
* Convert text to speech using the configured provider
|
|
16
|
+
* @param input Text or text stream to convert to speech
|
|
17
|
+
* @param options Speech options including speaker and provider-specific options
|
|
18
|
+
* @returns Audio stream or void if in realtime mode
|
|
19
|
+
*/
|
|
20
|
+
speak(input: string | NodeJS.ReadableStream, options?: {
|
|
21
|
+
speaker?: string;
|
|
22
|
+
} & any): Promise<NodeJS.ReadableStream | void>;
|
|
23
|
+
listen(audioStream: NodeJS.ReadableStream, options?: any): Promise<string | void | NodeJS.ReadableStream>;
|
|
24
|
+
getSpeakers(): Promise<{
|
|
25
|
+
voiceId: string;
|
|
26
|
+
}[]>;
|
|
27
|
+
getListener(): Promise<{
|
|
28
|
+
enabled: boolean;
|
|
29
|
+
}>;
|
|
30
|
+
updateConfig(options: Record<string, unknown>): void;
|
|
31
|
+
/**
|
|
32
|
+
* Initializes a WebSocket or WebRTC connection for real-time communication
|
|
33
|
+
* @returns Promise that resolves when the connection is established
|
|
34
|
+
*/
|
|
35
|
+
connect(options?: Record<string, unknown>): Promise<void>;
|
|
36
|
+
/**
|
|
37
|
+
* Relay audio data to the voice provider for real-time processing
|
|
38
|
+
* @param audioData Audio data to send
|
|
39
|
+
*/
|
|
40
|
+
send(audioData: NodeJS.ReadableStream | Int16Array): Promise<void>;
|
|
41
|
+
/**
|
|
42
|
+
* Trigger voice providers to respond
|
|
43
|
+
*/
|
|
44
|
+
answer(options?: Record<string, unknown>): Promise<void>;
|
|
45
|
+
/**
|
|
46
|
+
* Equip the voice provider with instructions
|
|
47
|
+
* @param instructions Instructions to add
|
|
48
|
+
*/
|
|
49
|
+
addInstructions(instructions: string): void;
|
|
50
|
+
/**
|
|
51
|
+
* Equip the voice provider with tools
|
|
52
|
+
* @param tools Array of tools to add
|
|
53
|
+
*/
|
|
54
|
+
addTools(tools: ToolsInput): void;
|
|
55
|
+
/**
|
|
56
|
+
* Disconnect from the WebSocket or WebRTC connection
|
|
57
|
+
*/
|
|
58
|
+
close(): void;
|
|
59
|
+
/**
|
|
60
|
+
* Register an event listener
|
|
61
|
+
* @param event Event name (e.g., 'speaking', 'writing', 'error')
|
|
62
|
+
* @param callback Callback function that receives event data
|
|
63
|
+
*/
|
|
64
|
+
on<E extends VoiceEventType>(event: E, callback: (data: E extends keyof VoiceEventMap ? VoiceEventMap[E] : unknown) => void): void;
|
|
65
|
+
/**
|
|
66
|
+
* Remove an event listener
|
|
67
|
+
* @param event Event name (e.g., 'speaking', 'writing', 'error')
|
|
68
|
+
* @param callback Callback function to remove
|
|
69
|
+
*/
|
|
70
|
+
off<E extends VoiceEventType>(event: E, callback: (data: E extends keyof VoiceEventMap ? VoiceEventMap[E] : unknown) => void): void;
|
|
71
|
+
}
|
|
72
|
+
//# sourceMappingURL=composite-voice.d.ts.map
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { MastraVoice } from '.';
|
|
2
|
+
export declare class DefaultVoice extends MastraVoice {
|
|
3
|
+
constructor();
|
|
4
|
+
speak(_input: string | NodeJS.ReadableStream): Promise<NodeJS.ReadableStream>;
|
|
5
|
+
listen(_input: string | NodeJS.ReadableStream): Promise<string>;
|
|
6
|
+
getSpeakers(): Promise<{
|
|
7
|
+
voiceId: string;
|
|
8
|
+
}[]>;
|
|
9
|
+
getListener(): Promise<{
|
|
10
|
+
enabled: boolean;
|
|
11
|
+
}>;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=default-voice.d.ts.map
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import { MastraBase } from '../_types/@internal_core/dist/base/index.d.ts';
|
|
2
|
+
import type { ToolsInput } from '../_types/@internal_core/dist/types/index.d.ts';
|
|
3
|
+
export type VoiceEventType = 'speaking' | 'writing' | 'error' | string;
|
|
4
|
+
export interface VoiceEventMap {
|
|
5
|
+
speaker: NodeJS.ReadableStream;
|
|
6
|
+
speaking: {
|
|
7
|
+
audio?: string;
|
|
8
|
+
};
|
|
9
|
+
writing: {
|
|
10
|
+
text: string;
|
|
11
|
+
role: 'assistant' | 'user';
|
|
12
|
+
};
|
|
13
|
+
error: {
|
|
14
|
+
message: string;
|
|
15
|
+
code?: string;
|
|
16
|
+
details?: unknown;
|
|
17
|
+
};
|
|
18
|
+
[key: string]: unknown;
|
|
19
|
+
}
|
|
20
|
+
interface BuiltInModelConfig {
|
|
21
|
+
name: string;
|
|
22
|
+
apiKey?: string;
|
|
23
|
+
}
|
|
24
|
+
export interface VoiceConfig<T = unknown> {
|
|
25
|
+
listeningModel?: BuiltInModelConfig;
|
|
26
|
+
speechModel?: BuiltInModelConfig;
|
|
27
|
+
speaker?: string;
|
|
28
|
+
name?: string;
|
|
29
|
+
realtimeConfig?: {
|
|
30
|
+
model?: string;
|
|
31
|
+
apiKey?: string;
|
|
32
|
+
options?: T;
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
export interface VoiceSpanConfig {
|
|
36
|
+
component: 'VOICE';
|
|
37
|
+
name?: string;
|
|
38
|
+
speaker?: string;
|
|
39
|
+
listeningModel?: {
|
|
40
|
+
name: string;
|
|
41
|
+
};
|
|
42
|
+
speechModel?: {
|
|
43
|
+
name: string;
|
|
44
|
+
};
|
|
45
|
+
realtimeModel?: string;
|
|
46
|
+
}
|
|
47
|
+
export interface IMastraVoice<TSpeakOptions = unknown, TListenOptions = unknown, TTools extends ToolsInput = ToolsInput, TEventArgs extends VoiceEventMap = VoiceEventMap, TSpeakerMetadata = unknown> {
|
|
48
|
+
serializeForSpan(): VoiceSpanConfig;
|
|
49
|
+
speak(input: string | NodeJS.ReadableStream, options?: {
|
|
50
|
+
speaker?: string;
|
|
51
|
+
} & TSpeakOptions): Promise<NodeJS.ReadableStream | void>;
|
|
52
|
+
listen(audioStream: NodeJS.ReadableStream | unknown, options?: TListenOptions): Promise<string | NodeJS.ReadableStream | void>;
|
|
53
|
+
updateConfig(options: Record<string, unknown>): void;
|
|
54
|
+
connect(options?: Record<string, unknown>): Promise<void>;
|
|
55
|
+
send(audioData: NodeJS.ReadableStream | Int16Array): Promise<void>;
|
|
56
|
+
answer(options?: Record<string, unknown>): Promise<void>;
|
|
57
|
+
addInstructions(instructions?: string): void;
|
|
58
|
+
addTools(tools: TTools): void;
|
|
59
|
+
close(): void;
|
|
60
|
+
on<E extends VoiceEventType>(event: E, callback: (data: E extends keyof TEventArgs ? TEventArgs[E] : unknown) => void): void;
|
|
61
|
+
off<E extends VoiceEventType>(event: E, callback: (data: E extends keyof TEventArgs ? TEventArgs[E] : unknown) => void): void;
|
|
62
|
+
getSpeakers(): Promise<Array<{
|
|
63
|
+
voiceId: string;
|
|
64
|
+
} & TSpeakerMetadata>>;
|
|
65
|
+
getListener(): Promise<{
|
|
66
|
+
enabled: boolean;
|
|
67
|
+
}>;
|
|
68
|
+
}
|
|
69
|
+
export declare abstract class MastraVoice<TOptions = unknown, TSpeakOptions = unknown, TListenOptions = unknown, TTools extends ToolsInput = ToolsInput, TEventArgs extends VoiceEventMap = VoiceEventMap, TSpeakerMetadata = unknown> extends MastraBase implements IMastraVoice<TSpeakOptions, TListenOptions, TTools, TEventArgs, TSpeakerMetadata> {
|
|
70
|
+
protected listeningModel?: BuiltInModelConfig;
|
|
71
|
+
protected speechModel?: BuiltInModelConfig;
|
|
72
|
+
protected speaker?: string;
|
|
73
|
+
protected realtimeConfig?: {
|
|
74
|
+
model?: string;
|
|
75
|
+
apiKey?: string;
|
|
76
|
+
options?: TOptions;
|
|
77
|
+
};
|
|
78
|
+
constructor({ listeningModel, speechModel, speaker, realtimeConfig, name }?: VoiceConfig<TOptions>);
|
|
79
|
+
/**
|
|
80
|
+
* Custom serialization for tracing/observability spans.
|
|
81
|
+
* Excludes `apiKey` from listeningModel / speechModel / realtimeConfig
|
|
82
|
+
* and any provider-specific state held by subclasses. Subclasses that
|
|
83
|
+
* need to expose additional non-sensitive fields can override.
|
|
84
|
+
*/
|
|
85
|
+
serializeForSpan(): VoiceSpanConfig;
|
|
86
|
+
/**
|
|
87
|
+
* Convert text to speech
|
|
88
|
+
* @param input Text or text stream to convert to speech
|
|
89
|
+
* @param options Speech options including speaker and provider-specific options
|
|
90
|
+
* @returns Audio stream
|
|
91
|
+
*/
|
|
92
|
+
/**
|
|
93
|
+
* Convert text to speech
|
|
94
|
+
* @param input Text or text stream to convert to speech
|
|
95
|
+
* @param options Speech options including speaker and provider-specific options
|
|
96
|
+
* @returns Audio stream or void if in chat mode
|
|
97
|
+
*/
|
|
98
|
+
abstract speak(input: string | NodeJS.ReadableStream, options?: {
|
|
99
|
+
speaker?: string;
|
|
100
|
+
} & TSpeakOptions): Promise<NodeJS.ReadableStream | void>;
|
|
101
|
+
/**
|
|
102
|
+
* Convert speech to text
|
|
103
|
+
* @param audioStream Audio stream to transcribe
|
|
104
|
+
* @param options Provider-specific transcription options
|
|
105
|
+
* @returns Text or text stream
|
|
106
|
+
*/
|
|
107
|
+
/**
|
|
108
|
+
* Convert speech to text
|
|
109
|
+
* @param audioStream Audio stream to transcribe
|
|
110
|
+
* @param options Provider-specific transcription options
|
|
111
|
+
* @returns Text, text stream, or void if in chat mode
|
|
112
|
+
*/
|
|
113
|
+
abstract listen(audioStream: NodeJS.ReadableStream | unknown, // Allow other audio input types for OpenAI realtime API
|
|
114
|
+
options?: TListenOptions): Promise<string | NodeJS.ReadableStream | void>;
|
|
115
|
+
updateConfig(_options: Record<string, unknown>): void;
|
|
116
|
+
/**
|
|
117
|
+
* Initializes a WebSocket or WebRTC connection for real-time communication
|
|
118
|
+
* @returns Promise that resolves when the connection is established
|
|
119
|
+
*/
|
|
120
|
+
connect(_options?: Record<string, unknown>): Promise<void>;
|
|
121
|
+
/**
|
|
122
|
+
* Relay audio data to the voice provider for real-time processing
|
|
123
|
+
* @param audioData Audio data to relay
|
|
124
|
+
*/
|
|
125
|
+
send(_audioData: NodeJS.ReadableStream | Int16Array): Promise<void>;
|
|
126
|
+
/**
|
|
127
|
+
* Trigger voice providers to respond
|
|
128
|
+
*/
|
|
129
|
+
answer(_options?: Record<string, unknown>): Promise<void>;
|
|
130
|
+
/**
|
|
131
|
+
* Equip the voice provider with instructions
|
|
132
|
+
* @param instructions Instructions to add
|
|
133
|
+
*/
|
|
134
|
+
addInstructions(_instructions?: string): void;
|
|
135
|
+
/**
|
|
136
|
+
* Equip the voice provider with tools
|
|
137
|
+
* @param tools Array of tools to add
|
|
138
|
+
*/
|
|
139
|
+
addTools(_tools: TTools): void;
|
|
140
|
+
/**
|
|
141
|
+
* Disconnect from the WebSocket or WebRTC connection
|
|
142
|
+
*/
|
|
143
|
+
close(): void;
|
|
144
|
+
/**
|
|
145
|
+
* Register an event listener
|
|
146
|
+
* @param event Event name (e.g., 'speaking', 'writing', 'error')
|
|
147
|
+
* @param callback Callback function that receives event data
|
|
148
|
+
*/
|
|
149
|
+
on<E extends VoiceEventType>(_event: E, _callback: (data: E extends keyof TEventArgs ? TEventArgs[E] : unknown) => void): void;
|
|
150
|
+
/**
|
|
151
|
+
* Remove an event listener
|
|
152
|
+
* @param event Event name (e.g., 'speaking', 'writing', 'error')
|
|
153
|
+
* @param callback Callback function to remove
|
|
154
|
+
*/
|
|
155
|
+
off<E extends VoiceEventType>(_event: E, _callback: (data: E extends keyof TEventArgs ? TEventArgs[E] : unknown) => void): void;
|
|
156
|
+
/**
|
|
157
|
+
* Get available speakers/voices
|
|
158
|
+
* @returns Array of available voice IDs and their metadata
|
|
159
|
+
*/
|
|
160
|
+
getSpeakers(): Promise<Array<{
|
|
161
|
+
voiceId: string;
|
|
162
|
+
} & TSpeakerMetadata>>;
|
|
163
|
+
/**
|
|
164
|
+
* Get available speakers/voices
|
|
165
|
+
* @returns Array of available voice IDs and their metadata
|
|
166
|
+
*/
|
|
167
|
+
getListener(): Promise<{
|
|
168
|
+
enabled: boolean;
|
|
169
|
+
}>;
|
|
170
|
+
}
|
|
171
|
+
export {};
|
|
172
|
+
//# sourceMappingURL=voice.d.ts.map
|
package/dist/docs/SKILL.md
CHANGED
|
@@ -20,7 +20,7 @@ export const agent = new Agent({
|
|
|
20
20
|
id: 'voice-agent',
|
|
21
21
|
name: 'Voice Agent',
|
|
22
22
|
instructions: `You are a helpful assistant with both STT and TTS capabilities.`,
|
|
23
|
-
model: 'openai/gpt-5.
|
|
23
|
+
model: 'openai/gpt-5.5',
|
|
24
24
|
voice,
|
|
25
25
|
})
|
|
26
26
|
|
|
@@ -109,7 +109,7 @@ export const agent = new Agent({
|
|
|
109
109
|
id: 'speech-to-speech-agent',
|
|
110
110
|
name: 'Speech-to-Speech Agent',
|
|
111
111
|
instructions: `You are a helpful assistant with speech-to-speech capabilities.`,
|
|
112
|
-
model: 'openai/gpt-5.
|
|
112
|
+
model: 'openai/gpt-5.5',
|
|
113
113
|
tools: {
|
|
114
114
|
// Tools configured on Agent are passed to voice provider
|
|
115
115
|
search,
|
|
@@ -132,6 +132,37 @@ agent.voice.send(microphoneStream)
|
|
|
132
132
|
agent.voice.close()
|
|
133
133
|
```
|
|
134
134
|
|
|
135
|
+
### Per-session voice for concurrent sessions
|
|
136
|
+
|
|
137
|
+
A static `voice` instance is shared across every request. For one-shot text-to-speech this is fine, but realtime and speech-to-speech providers store one WebSocket, one set of tools, and one request context per instance. If you deploy a single agent that handles several live sessions at once, a shared instance lets one session overwrite another session's tools, instructions, and request context.
|
|
138
|
+
|
|
139
|
+
To give each session its own voice, provide `voice` as a resolver. Mastra runs the resolver on every `getVoice()` call and returns a fresh, session-owned instance:
|
|
140
|
+
|
|
141
|
+
```typescript
|
|
142
|
+
import { Agent } from '@mastra/core/agent'
|
|
143
|
+
import { OpenAIRealtimeVoice } from '@mastra/voice-openai-realtime'
|
|
144
|
+
|
|
145
|
+
export const agent = new Agent({
|
|
146
|
+
id: 'support-line',
|
|
147
|
+
name: 'Support Line',
|
|
148
|
+
instructions: ({ requestContext }) => `Help user ${requestContext.get('user')}.`,
|
|
149
|
+
model: 'openai/gpt-5.5',
|
|
150
|
+
voice: ({ requestContext }) => new OpenAIRealtimeVoice({ apiKey: requestContext.get('apiKey') }),
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
// Each concurrent session resolves its own voice instance
|
|
154
|
+
const voice = await agent.getVoice({ requestContext })
|
|
155
|
+
await voice.connect()
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
When you use a resolver:
|
|
159
|
+
|
|
160
|
+
- Each call to `getVoice()` returns a new instance, so concurrent sessions never share state.
|
|
161
|
+
- Mastra does not add tools or instructions to a resolver instance. Configure those inside the resolver or on the provider.
|
|
162
|
+
- You own the lifecycle of the returned instance, so call `disconnect()` or `close()` when the session ends.
|
|
163
|
+
|
|
164
|
+
The `agent.voice` getter has no request context, so it throws when `voice` is a resolver. Use `agent.getVoice({ requestContext })` instead.
|
|
165
|
+
|
|
135
166
|
### Event System
|
|
136
167
|
|
|
137
168
|
The realtime voice provider emits several events you can listen for:
|
|
@@ -209,7 +240,7 @@ export const convertToText = async (input: string | NodeJS.ReadableStream): Prom
|
|
|
209
240
|
export const hybridVoiceAgent = new Agent({
|
|
210
241
|
id: 'hybrid-voice-agent',
|
|
211
242
|
name: 'Hybrid Voice Agent',
|
|
212
|
-
model: 'openai/gpt-5.
|
|
243
|
+
model: 'openai/gpt-5.5',
|
|
213
244
|
instructions: 'You can speak and listen using different providers.',
|
|
214
245
|
voice: new CompositeVoice({
|
|
215
246
|
input: new OpenAIVoice(),
|
|
@@ -221,7 +252,7 @@ export const unifiedVoiceAgent = new Agent({
|
|
|
221
252
|
id: 'unified-voice-agent',
|
|
222
253
|
name: 'Unified Voice Agent',
|
|
223
254
|
instructions: 'You are an agent with both STT and TTS capabilities.',
|
|
224
|
-
model: 'openai/gpt-5.
|
|
255
|
+
model: 'openai/gpt-5.5',
|
|
225
256
|
voice: new OpenAIVoice(),
|
|
226
257
|
})
|
|
227
258
|
|
|
@@ -263,7 +294,7 @@ export const agent = new Agent({
|
|
|
263
294
|
id: 'voice-agent',
|
|
264
295
|
name: 'Voice Agent',
|
|
265
296
|
instructions: `You are a helpful assistant with both STT and TTS capabilities.`,
|
|
266
|
-
model: 'openai/gpt-5.
|
|
297
|
+
model: 'openai/gpt-5.5',
|
|
267
298
|
|
|
268
299
|
// Create a composite voice using OpenAI for listening and PlayAI for speaking
|
|
269
300
|
voice: new CompositeVoice({
|
|
@@ -288,7 +319,7 @@ export const agent = new Agent({
|
|
|
288
319
|
id: 'aisdk-voice-agent',
|
|
289
320
|
name: 'AI SDK Voice Agent',
|
|
290
321
|
instructions: `You are a helpful assistant with voice capabilities.`,
|
|
291
|
-
model: 'openai/gpt-5.
|
|
322
|
+
model: 'openai/gpt-5.5',
|
|
292
323
|
|
|
293
324
|
// Pass AI SDK models directly to CompositeVoice
|
|
294
325
|
voice: new CompositeVoice({
|