@ariaflowagents/gemini-native-audio 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -0
- package/dist/CallWorker.d.ts +56 -0
- package/dist/CallWorker.d.ts.map +1 -0
- package/dist/CallWorker.js +172 -0
- package/dist/CallWorker.js.map +1 -0
- package/dist/CapabilityCallWorker.d.ts +46 -0
- package/dist/CapabilityCallWorker.d.ts.map +1 -0
- package/dist/CapabilityCallWorker.js +319 -0
- package/dist/CapabilityCallWorker.js.map +1 -0
- package/dist/GeminiLiveSession.d.ts +86 -0
- package/dist/GeminiLiveSession.d.ts.map +1 -0
- package/dist/GeminiLiveSession.js +297 -0
- package/dist/GeminiLiveSession.js.map +1 -0
- package/dist/RealtimeCallWorker.d.ts +47 -0
- package/dist/RealtimeCallWorker.d.ts.map +1 -0
- package/dist/RealtimeCallWorker.js +55 -0
- package/dist/RealtimeCallWorker.js.map +1 -0
- package/dist/VoiceEngine.d.ts +67 -0
- package/dist/VoiceEngine.d.ts.map +1 -0
- package/dist/VoiceEngine.js +156 -0
- package/dist/VoiceEngine.js.map +1 -0
- package/dist/factories.d.ts +32 -0
- package/dist/factories.d.ts.map +1 -0
- package/dist/factories.js +43 -0
- package/dist/factories.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +12 -0
- package/dist/index.js.map +1 -0
- package/dist/openai/OpenAIRealtimeClient.d.ts +51 -0
- package/dist/openai/OpenAIRealtimeClient.d.ts.map +1 -0
- package/dist/openai/OpenAIRealtimeClient.js +327 -0
- package/dist/openai/OpenAIRealtimeClient.js.map +1 -0
- package/dist/openai/index.d.ts +3 -0
- package/dist/openai/index.d.ts.map +1 -0
- package/dist/openai/index.js +2 -0
- package/dist/openai/index.js.map +1 -0
- package/dist/schema-bridge.d.ts +14 -0
- package/dist/schema-bridge.d.ts.map +1 -0
- package/dist/schema-bridge.js +20 -0
- package/dist/schema-bridge.js.map +1 -0
- package/dist/types.d.ts +150 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +44 -0
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import type { AgentDefinition, Foundation } from '@ariaflowagents/core/foundation';
|
|
2
|
+
import type { AutoRetrieveProvider, FlowConfig, HarnessHooks, ToolSet } from '@ariaflowagents/core/types';
|
|
3
|
+
import type { LivePromptAssembler } from '@ariaflowagents/core/capabilities';
|
|
4
|
+
import type { MemoryService } from '@ariaflowagents/core/memory';
|
|
5
|
+
import type { RealtimeAudioClient } from '@ariaflowagents/core/realtime';
|
|
6
|
+
import type { LanguageModel } from 'ai';
|
|
7
|
+
/**
|
|
8
|
+
* Voice tools use the standard AI SDK ToolSet type.
|
|
9
|
+
* Define tools using `tool()` from `ai` with `inputSchema` and `execute`.
|
|
10
|
+
*
|
|
11
|
+
* All voice tools MUST have an `execute` function. Schema-only tools
|
|
12
|
+
* (tools without execute) are not supported in voice agents.
|
|
13
|
+
*/
|
|
14
|
+
export type VoiceToolSet = ToolSet;
|
|
15
|
+
/** @deprecated Use `ToolSet[string]` or define tools with `tool()` from `ai`. */
|
|
16
|
+
export type VoiceToolDef = ToolSet[string];
|
|
17
|
+
/**
|
|
18
|
+
* Voice agent configuration — supports both flat tool agents (v1) and
|
|
19
|
+
* flow-aware agents (v2) when `flow` is provided.
|
|
20
|
+
*/
|
|
21
|
+
export interface VoiceAgentConfig extends AgentDefinition {
|
|
22
|
+
tools?: ToolSet;
|
|
23
|
+
/** Gemini Live voice preset. */
|
|
24
|
+
voice?: string;
|
|
25
|
+
/** Optional flow config. When set, VoiceEngine uses CapabilityCallWorker. */
|
|
26
|
+
flow?: FlowConfig;
|
|
27
|
+
/** The initial node ID when `flow` is set. Defaults to the first node in the flow. */
|
|
28
|
+
initialNode?: string;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Shared interface implemented by both CallWorker and CapabilityCallWorker.
|
|
32
|
+
* VoiceEngine holds workers through this interface.
|
|
33
|
+
*/
|
|
34
|
+
export interface WorkerLike {
|
|
35
|
+
readonly callId: string;
|
|
36
|
+
start(): Promise<void>;
|
|
37
|
+
stop(): Promise<void>;
|
|
38
|
+
}
|
|
39
|
+
/** Gemini model/API configuration. */
|
|
40
|
+
export interface GeminiConfig {
|
|
41
|
+
apiKey: string;
|
|
42
|
+
model?: string;
|
|
43
|
+
}
|
|
44
|
+
/** Top-level VoiceEngine configuration. */
|
|
45
|
+
export interface VoiceEngineConfig {
|
|
46
|
+
foundation: Foundation;
|
|
47
|
+
agents: VoiceAgentConfig[];
|
|
48
|
+
defaultAgentId: string;
|
|
49
|
+
/**
|
|
50
|
+
* Gemini API configuration. Required when using the default Gemini provider.
|
|
51
|
+
* Optional when a custom `createModelClient` factory is provided.
|
|
52
|
+
*/
|
|
53
|
+
gemini?: GeminiConfig;
|
|
54
|
+
/**
|
|
55
|
+
* Custom factory for creating a RealtimeAudioClient per call.
|
|
56
|
+
* When provided, this factory is used instead of creating a GeminiLiveSession.
|
|
57
|
+
* This is the provider abstraction point — use it to plug in OpenAI Realtime,
|
|
58
|
+
* a custom WebSocket client, or any other provider.
|
|
59
|
+
*
|
|
60
|
+
* Example (OpenAI):
|
|
61
|
+
* ```typescript
|
|
62
|
+
* createModelClient: (agent) => new OpenAIRealtimeClient({ apiKey: '...' })
|
|
63
|
+
* ```
|
|
64
|
+
*/
|
|
65
|
+
createModelClient?: (agent: VoiceAgentConfig) => RealtimeAudioClient;
|
|
66
|
+
/**
|
|
67
|
+
* Optional LivePromptAssembler (port) for building audio-optimized prompts.
|
|
68
|
+
* Used by the core RealtimeRuntime via OrchestrationAuthority.
|
|
69
|
+
*/
|
|
70
|
+
promptAssembler?: LivePromptAssembler;
|
|
71
|
+
/**
|
|
72
|
+
* Optional MemoryService for cross-session long-term memory.
|
|
73
|
+
* When provided, the authority handles memory preloading and ingestion.
|
|
74
|
+
*/
|
|
75
|
+
memoryService?: MemoryService;
|
|
76
|
+
/**
|
|
77
|
+
* Hook callbacks for lifecycle events (onStart, onEnd, onToolResult, etc.).
|
|
78
|
+
* Passed to the OrchestrationAuthority for full hook parity with text Runtime.
|
|
79
|
+
*/
|
|
80
|
+
hooks?: HarnessHooks;
|
|
81
|
+
/**
|
|
82
|
+
* Controls automatic memory ingestion behavior.
|
|
83
|
+
* - 'onEnd': Ingest after session close (default when memoryService is set)
|
|
84
|
+
* - 'manual': Developer must call memoryService.addSessionToMemory() explicitly
|
|
85
|
+
* - 'hook': Fires onMemoryIngest hook
|
|
86
|
+
*/
|
|
87
|
+
memoryIngestion?: 'onEnd' | 'manual' | 'hook';
|
|
88
|
+
/**
|
|
89
|
+
* Optional dedicated text model used to verify extraction-node data in
|
|
90
|
+
* realtime sessions. When omitted, the core authority stays in conservative
|
|
91
|
+
* fallback mode rather than claiming full extraction parity.
|
|
92
|
+
*/
|
|
93
|
+
extractionModel?: LanguageModel;
|
|
94
|
+
/**
|
|
95
|
+
* Optional auto-retrieve for realtime sessions. Passed to DefaultOrchestrationAuthority
|
|
96
|
+
* so each turn can refresh `knowledge:realtime` in session working memory.
|
|
97
|
+
*/
|
|
98
|
+
autoRetrieveProvider?: AutoRetrieveProvider;
|
|
99
|
+
}
|
|
100
|
+
/** Transport session abstraction for audio I/O. */
|
|
101
|
+
export interface TransportSession {
|
|
102
|
+
/** Send audio frames to the client. */
|
|
103
|
+
sendAudio(data: Uint8Array): void;
|
|
104
|
+
/** Receive audio frames from the client. */
|
|
105
|
+
onAudio(handler: (data: Uint8Array) => void): void;
|
|
106
|
+
/** Called when the session ends. */
|
|
107
|
+
onClose(handler: () => void): void;
|
|
108
|
+
/** Close the transport. */
|
|
109
|
+
close(): void;
|
|
110
|
+
}
|
|
111
|
+
/** Parameters for accepting a new call. */
|
|
112
|
+
export interface AcceptCallParams {
|
|
113
|
+
callId: string;
|
|
114
|
+
sessionId?: string;
|
|
115
|
+
userId?: string;
|
|
116
|
+
transport: TransportSession;
|
|
117
|
+
agentId?: string;
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Realtime events emitted by the Gemini Live session.
|
|
121
|
+
*/
|
|
122
|
+
export type RealtimeEvent = {
|
|
123
|
+
type: 'audio';
|
|
124
|
+
data: Uint8Array;
|
|
125
|
+
} | {
|
|
126
|
+
type: 'transcript';
|
|
127
|
+
text: string;
|
|
128
|
+
role: 'user' | 'assistant';
|
|
129
|
+
} | {
|
|
130
|
+
type: 'tool-call';
|
|
131
|
+
id: string;
|
|
132
|
+
name: string;
|
|
133
|
+
args: unknown;
|
|
134
|
+
} | {
|
|
135
|
+
type: 'tool-result';
|
|
136
|
+
id: string;
|
|
137
|
+
name: string;
|
|
138
|
+
result: unknown;
|
|
139
|
+
} | {
|
|
140
|
+
type: 'interrupted';
|
|
141
|
+
} | {
|
|
142
|
+
type: 'turn-complete';
|
|
143
|
+
} | {
|
|
144
|
+
type: 'session-resumed';
|
|
145
|
+
newHandle: string;
|
|
146
|
+
} | {
|
|
147
|
+
type: 'error';
|
|
148
|
+
error: string;
|
|
149
|
+
};
|
|
150
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,iCAAiC,CAAC;AACnF,OAAO,KAAK,EAAE,oBAAoB,EAAE,UAAU,EAAE,YAAY,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1G,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,mCAAmC,CAAC;AAC7E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AACzE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAExC;;;;;;GAMG;AACH,MAAM,MAAM,YAAY,GAAG,OAAO,CAAC;AAEnC,iFAAiF;AACjF,MAAM,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AAE3C;;;GAGG;AACH,MAAM,WAAW,gBAAiB,SAAQ,eAAe;IACvD,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,gCAAgC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,6EAA6E;IAC7E,IAAI,CAAC,EAAE,UAAU,CAAC;IAClB,sFAAsF;IACtF,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACvB;AAED,sCAAsC;AACtC,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,2CAA2C;AAC3C,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,UAAU,CAAC;IACvB,MAAM,EAAE,gBAAgB,EAAE,CAAC;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB;;;OAGG;IACH,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB;;;;;;;;;;OAUG;IACH,iBAAiB,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,mBAAmB,CAAC;IACrE;;;OAGG;IACH,eAAe,CAAC,EAAE,mBAAmB,CAAC;IACtC;;;OAGG;IACH,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B;;;OAGG;IACH,KAAK,CAAC,EAAE,YAAY,CAAC;IACrB;;;;;OAKG;IACH,eAAe,CAAC,EAAE,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC9C;;;;OAIG;IACH,eAAe,CAAC,EAAE,aAAa,CAAC;IAChC;;;OAGG;IACH,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;CAC7C;AAED,mDAAmD;AACnD,MAAM,WAAW,gBAAgB;IAC/B,uCAAuC;IACvC,SAAS,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,CAAC;IAClC,4CAA4C;IAC5C,OAAO,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,UAAU,KAAK,IAAI,GAAG,IAAI,CAAC;IACnD,oCAAoC;IACpC,OAAO,CAAC,OAAO,EAAE,MAAM,IAAI,GAAG,IAAI,CAAC;IACnC,2BAA2B;IAC3B,KAAK,IAAI,IAAI,CAAC;CACf;AAED,2CAA2C;AAC3C,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,gBAAgB,CAAC;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,aAAa,GACrB;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,IAAI,EAAE,UAAU,CAAA;CAAE,GACnC;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,GAAG,WAAW,CAAA;CAAE,GAChE;IAAE,IAAI,EAAE,WAAW,CAAC;IAAC,EAAE,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,OAAO,CAAA;CAAE,GAC9D;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,EAAE,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,OAAO,CAAA;CAAE,GAClE;IAAE,IAAI,EAAE,aAAa,CAAA;CAAE,GACvB;IAAE,IAAI,EAAE,eAAe,CAAA;CAAE,GACzB;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GAC9C;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
|
package/package.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@ariaflowagents/gemini-native-audio",
|
|
3
|
+
"version": "0.9.0",
|
|
4
|
+
"description": "Gemini Live Native Audio integration for AriaFlow — speech-to-speech voice agents with built-in STT/TTS",
|
|
5
|
+
"publishConfig": {
|
|
6
|
+
"access": "public"
|
|
7
|
+
},
|
|
8
|
+
"files": [
|
|
9
|
+
"dist",
|
|
10
|
+
"README.md"
|
|
11
|
+
],
|
|
12
|
+
"type": "module",
|
|
13
|
+
"main": "dist/index.js",
|
|
14
|
+
"types": "dist/index.d.ts",
|
|
15
|
+
"exports": {
|
|
16
|
+
".": {
|
|
17
|
+
"types": "./dist/index.d.ts",
|
|
18
|
+
"default": "./dist/index.js"
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"scripts": {
|
|
22
|
+
"build": "tsc -p tsconfig.json",
|
|
23
|
+
"clean": "rm -rf dist",
|
|
24
|
+
"test": "npm run build && node --test test/*.test.js"
|
|
25
|
+
},
|
|
26
|
+
"dependencies": {
|
|
27
|
+
"@google/genai": "^1.0.0",
|
|
28
|
+
"ws": "^8.19.0",
|
|
29
|
+
"zod-to-json-schema": "^3.24.0"
|
|
30
|
+
},
|
|
31
|
+
"peerDependencies": {
|
|
32
|
+
"@ariaflowagents/core": "workspace:*",
|
|
33
|
+
"ai": "^6.0.0",
|
|
34
|
+
"zod": "^3.0.0"
|
|
35
|
+
},
|
|
36
|
+
"devDependencies": {
|
|
37
|
+
"@ariaflowagents/core": "workspace:*",
|
|
38
|
+
"@types/node": "^20.11.0",
|
|
39
|
+
"@types/ws": "^8.5.13",
|
|
40
|
+
"ai": "^6.0.0",
|
|
41
|
+
"typescript": "^5.3.0",
|
|
42
|
+
"zod": "^3.23.0"
|
|
43
|
+
}
|
|
44
|
+
}
|