@ariaflowagents/gemini-native-audio 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +104 -0
  2. package/dist/CallWorker.d.ts +56 -0
  3. package/dist/CallWorker.d.ts.map +1 -0
  4. package/dist/CallWorker.js +172 -0
  5. package/dist/CallWorker.js.map +1 -0
  6. package/dist/CapabilityCallWorker.d.ts +46 -0
  7. package/dist/CapabilityCallWorker.d.ts.map +1 -0
  8. package/dist/CapabilityCallWorker.js +319 -0
  9. package/dist/CapabilityCallWorker.js.map +1 -0
  10. package/dist/GeminiLiveSession.d.ts +86 -0
  11. package/dist/GeminiLiveSession.d.ts.map +1 -0
  12. package/dist/GeminiLiveSession.js +297 -0
  13. package/dist/GeminiLiveSession.js.map +1 -0
  14. package/dist/RealtimeCallWorker.d.ts +47 -0
  15. package/dist/RealtimeCallWorker.d.ts.map +1 -0
  16. package/dist/RealtimeCallWorker.js +55 -0
  17. package/dist/RealtimeCallWorker.js.map +1 -0
  18. package/dist/VoiceEngine.d.ts +67 -0
  19. package/dist/VoiceEngine.d.ts.map +1 -0
  20. package/dist/VoiceEngine.js +156 -0
  21. package/dist/VoiceEngine.js.map +1 -0
  22. package/dist/factories.d.ts +32 -0
  23. package/dist/factories.d.ts.map +1 -0
  24. package/dist/factories.js +43 -0
  25. package/dist/factories.js.map +1 -0
  26. package/dist/index.d.ts +13 -0
  27. package/dist/index.d.ts.map +1 -0
  28. package/dist/index.js +12 -0
  29. package/dist/index.js.map +1 -0
  30. package/dist/openai/OpenAIRealtimeClient.d.ts +51 -0
  31. package/dist/openai/OpenAIRealtimeClient.d.ts.map +1 -0
  32. package/dist/openai/OpenAIRealtimeClient.js +327 -0
  33. package/dist/openai/OpenAIRealtimeClient.js.map +1 -0
  34. package/dist/openai/index.d.ts +3 -0
  35. package/dist/openai/index.d.ts.map +1 -0
  36. package/dist/openai/index.js +2 -0
  37. package/dist/openai/index.js.map +1 -0
  38. package/dist/schema-bridge.d.ts +14 -0
  39. package/dist/schema-bridge.d.ts.map +1 -0
  40. package/dist/schema-bridge.js +20 -0
  41. package/dist/schema-bridge.js.map +1 -0
  42. package/dist/types.d.ts +150 -0
  43. package/dist/types.d.ts.map +1 -0
  44. package/dist/types.js +2 -0
  45. package/dist/types.js.map +1 -0
  46. package/package.json +44 -0
@@ -0,0 +1,150 @@
1
+ import type { AgentDefinition, Foundation } from '@ariaflowagents/core/foundation';
2
+ import type { AutoRetrieveProvider, FlowConfig, HarnessHooks, ToolSet } from '@ariaflowagents/core/types';
3
+ import type { LivePromptAssembler } from '@ariaflowagents/core/capabilities';
4
+ import type { MemoryService } from '@ariaflowagents/core/memory';
5
+ import type { RealtimeAudioClient } from '@ariaflowagents/core/realtime';
6
+ import type { LanguageModel } from 'ai';
7
+ /**
8
+ * Voice tools use the standard AI SDK ToolSet type.
9
+ * Define tools using `tool()` from `ai` with `inputSchema` and `execute`.
10
+ *
11
+ * All voice tools MUST have an `execute` function. Schema-only tools
12
+ * (tools without execute) are not supported in voice agents.
13
+ */
14
+ export type VoiceToolSet = ToolSet;
15
+ /** @deprecated Use `ToolSet[string]` or define tools with `tool()` from `ai`. */
16
+ export type VoiceToolDef = ToolSet[string];
17
+ /**
18
+ * Voice agent configuration — supports both flat tool agents (v1) and
19
+ * flow-aware agents (v2) when `flow` is provided.
20
+ */
21
+ export interface VoiceAgentConfig extends AgentDefinition {
22
+ tools?: ToolSet;
23
+ /** Gemini Live voice preset. */
24
+ voice?: string;
25
+ /** Optional flow config. When set, VoiceEngine uses CapabilityCallWorker. */
26
+ flow?: FlowConfig;
27
+ /** The initial node ID when `flow` is set. Defaults to the first node in the flow. */
28
+ initialNode?: string;
29
+ }
30
+ /**
31
+ * Shared interface implemented by both CallWorker and CapabilityCallWorker.
32
+ * VoiceEngine holds workers through this interface.
33
+ */
34
+ export interface WorkerLike {
35
+ readonly callId: string;
36
+ start(): Promise<void>;
37
+ stop(): Promise<void>;
38
+ }
39
+ /** Gemini model/API configuration. */
40
+ export interface GeminiConfig {
41
+ apiKey: string;
42
+ model?: string;
43
+ }
44
+ /** Top-level VoiceEngine configuration. */
45
+ export interface VoiceEngineConfig {
46
+ foundation: Foundation;
47
+ agents: VoiceAgentConfig[];
48
+ defaultAgentId: string;
49
+ /**
50
+ * Gemini API configuration. Required when using the default Gemini provider.
51
+ * Optional when a custom `createModelClient` factory is provided.
52
+ */
53
+ gemini?: GeminiConfig;
54
+ /**
55
+ * Custom factory for creating a RealtimeAudioClient per call.
56
+ * When provided, this factory is used instead of creating a GeminiLiveSession.
57
+ * This is the provider abstraction point — use it to plug in OpenAI Realtime,
58
+ * a custom WebSocket client, or any other provider.
59
+ *
60
+ * Example (OpenAI):
61
+ * ```typescript
62
+ * createModelClient: (agent) => new OpenAIRealtimeClient({ apiKey: '...' })
63
+ * ```
64
+ */
65
+ createModelClient?: (agent: VoiceAgentConfig) => RealtimeAudioClient;
66
+ /**
67
+ * Optional LivePromptAssembler (port) for building audio-optimized prompts.
68
+ * Used by the core RealtimeRuntime via OrchestrationAuthority.
69
+ */
70
+ promptAssembler?: LivePromptAssembler;
71
+ /**
72
+ * Optional MemoryService for cross-session long-term memory.
73
+ * When provided, the authority handles memory preloading and ingestion.
74
+ */
75
+ memoryService?: MemoryService;
76
+ /**
77
+ * Hook callbacks for lifecycle events (onStart, onEnd, onToolResult, etc.).
78
+ * Passed to the OrchestrationAuthority for full hook parity with text Runtime.
79
+ */
80
+ hooks?: HarnessHooks;
81
+ /**
82
+ * Controls automatic memory ingestion behavior.
83
+ * - 'onEnd': Ingest after session close (default when memoryService is set)
84
+ * - 'manual': Developer must call memoryService.addSessionToMemory() explicitly
85
+ * - 'hook': Fires onMemoryIngest hook
86
+ */
87
+ memoryIngestion?: 'onEnd' | 'manual' | 'hook';
88
+ /**
89
+ * Optional dedicated text model used to verify extraction-node data in
90
+ * realtime sessions. When omitted, the core authority stays in conservative
91
+ * fallback mode rather than claiming full extraction parity.
92
+ */
93
+ extractionModel?: LanguageModel;
94
+ /**
95
+ * Optional auto-retrieve for realtime sessions. Passed to DefaultOrchestrationAuthority
96
+ * so each turn can refresh `knowledge:realtime` in session working memory.
97
+ */
98
+ autoRetrieveProvider?: AutoRetrieveProvider;
99
+ }
100
+ /** Transport session abstraction for audio I/O. */
101
+ export interface TransportSession {
102
+ /** Send audio frames to the client. */
103
+ sendAudio(data: Uint8Array): void;
104
+ /** Receive audio frames from the client. */
105
+ onAudio(handler: (data: Uint8Array) => void): void;
106
+ /** Called when the session ends. */
107
+ onClose(handler: () => void): void;
108
+ /** Close the transport. */
109
+ close(): void;
110
+ }
111
+ /** Parameters for accepting a new call. */
112
+ export interface AcceptCallParams {
113
+ callId: string;
114
+ sessionId?: string;
115
+ userId?: string;
116
+ transport: TransportSession;
117
+ agentId?: string;
118
+ }
119
+ /**
120
+ * Realtime events emitted by the Gemini Live session.
121
+ */
122
+ export type RealtimeEvent = {
123
+ type: 'audio';
124
+ data: Uint8Array;
125
+ } | {
126
+ type: 'transcript';
127
+ text: string;
128
+ role: 'user' | 'assistant';
129
+ } | {
130
+ type: 'tool-call';
131
+ id: string;
132
+ name: string;
133
+ args: unknown;
134
+ } | {
135
+ type: 'tool-result';
136
+ id: string;
137
+ name: string;
138
+ result: unknown;
139
+ } | {
140
+ type: 'interrupted';
141
+ } | {
142
+ type: 'turn-complete';
143
+ } | {
144
+ type: 'session-resumed';
145
+ newHandle: string;
146
+ } | {
147
+ type: 'error';
148
+ error: string;
149
+ };
150
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,iCAAiC,CAAC;AACnF,OAAO,KAAK,EAAE,oBAAoB,EAAE,UAAU,EAAE,YAAY,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1G,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,mCAAmC,CAAC;AAC7E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AACzE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAExC;;;;;;GAMG;AACH,MAAM,MAAM,YAAY,GAAG,OAAO,CAAC;AAEnC,iFAAiF;AACjF,MAAM,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AAE3C;;;GAGG;AACH,MAAM,WAAW,gBAAiB,SAAQ,eAAe;IACvD,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,gCAAgC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,6EAA6E;IAC7E,IAAI,CAAC,EAAE,UAAU,CAAC;IAClB,sFAAsF;IACtF,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACvB;AAED,sCAAsC;AACtC,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,2CAA2C;AAC3C,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,UAAU,CAAC;IACvB,MAAM,EAAE,gBAAgB,EAAE,CAAC;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB;;;OAGG;IACH,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB;;;;;;;;;;OAUG;IACH,iBAAiB,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,mBAAmB,CAAC;IACrE;;;OAGG;IACH,eAAe,CAAC,EAAE,mBAAmB,CAAC;IACtC;;;OAGG;IACH,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B;;;OAGG;IACH,KAAK,CAAC,EAAE,YAAY,CAAC;IACrB;;;;;OAKG;IACH,eAAe,CAAC,EAAE,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC9C;;;;OAIG;IACH,eAAe,CAAC,EAAE,aAAa,CAAC;IAChC;;;OAGG;IACH,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;CAC7C;AAED,mDAAmD;AACnD,MAAM,WAAW,gBAAgB;IAC/B,uCAAuC;IACvC,SAAS,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,CAAC;IAClC,4CAA4C;IAC5C,OAAO,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,UAAU,KAAK,IAAI,GAAG,IAAI,CAAC;IACnD,oCAAoC;IACpC,OAAO,CAAC,OAAO,EAAE,MAAM,IAAI,GAAG,IAAI,CAAC;IACnC,2BAA2B;IAC3B,KAAK,IAAI,IAAI,CAAC;CACf;AAED,2CAA2C;AAC3C,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,gBAAgB,CAAC;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,aAAa,GACrB;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,IAAI,EAAE,UAAU,CAAA;CAAE,GACnC;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,GAAG,WAAW,CAAA;CAAE,GAChE;IAAE,IAAI,EAAE,WAAW,CAAC;IAAC,EAAE,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,OAAO,CAAA;CAAE,GAC9D;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,EAAE,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,OAAO,CAAA;CAAE,GAClE;IAAE,IAAI,EAAE,aAAa,CAAA;CAAE,GACvB;IAAE,IAAI,EAAE,eAAe,CAAA;CAAE,GACzB;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GAC9C;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC"}
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
package/package.json ADDED
@@ -0,0 +1,44 @@
1
+ {
2
+ "name": "@ariaflowagents/gemini-native-audio",
3
+ "version": "0.9.0",
4
+ "description": "Gemini Live Native Audio integration for AriaFlow — speech-to-speech voice agents with built-in STT/TTS",
5
+ "publishConfig": {
6
+ "access": "public"
7
+ },
8
+ "files": [
9
+ "dist",
10
+ "README.md"
11
+ ],
12
+ "type": "module",
13
+ "main": "dist/index.js",
14
+ "types": "dist/index.d.ts",
15
+ "exports": {
16
+ ".": {
17
+ "types": "./dist/index.d.ts",
18
+ "default": "./dist/index.js"
19
+ }
20
+ },
21
+ "scripts": {
22
+ "build": "tsc -p tsconfig.json",
23
+ "clean": "rm -rf dist",
24
+ "test": "npm run build && node --test test/*.test.js"
25
+ },
26
+ "dependencies": {
27
+ "@google/genai": "^1.0.0",
28
+ "ws": "^8.19.0",
29
+ "zod-to-json-schema": "^3.24.0"
30
+ },
31
+ "peerDependencies": {
32
+ "@ariaflowagents/core": "workspace:*",
33
+ "ai": "^6.0.0",
34
+ "zod": "^3.0.0"
35
+ },
36
+ "devDependencies": {
37
+ "@ariaflowagents/core": "workspace:*",
38
+ "@types/node": "^20.11.0",
39
+ "@types/ws": "^8.5.13",
40
+ "ai": "^6.0.0",
41
+ "typescript": "^5.3.0",
42
+ "zod": "^3.23.0"
43
+ }
44
+ }