@just-every/ensemble 0.2.87 → 0.2.88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. package/dist/cjs/config/tool_execution.cjs +47 -0
  2. package/dist/cjs/config/tool_execution.d.ts +12 -0
  3. package/dist/cjs/config/tool_execution.d.ts.map +1 -0
  4. package/dist/cjs/config/tool_execution.js.map +1 -0
  5. package/dist/cjs/core/ensemble_embed.cjs +35 -0
  6. package/dist/cjs/core/ensemble_embed.d.ts +3 -0
  7. package/dist/cjs/core/ensemble_embed.d.ts.map +1 -0
  8. package/dist/cjs/core/ensemble_embed.js.map +1 -0
  9. package/dist/cjs/core/ensemble_image.cjs +13 -0
  10. package/dist/cjs/core/ensemble_image.d.ts +3 -0
  11. package/dist/cjs/core/ensemble_image.d.ts.map +1 -0
  12. package/dist/cjs/core/ensemble_image.js.map +1 -0
  13. package/dist/cjs/core/ensemble_listen.cjs +162 -0
  14. package/dist/cjs/core/ensemble_listen.d.ts +5 -0
  15. package/dist/cjs/core/ensemble_listen.d.ts.map +1 -0
  16. package/dist/cjs/core/ensemble_listen.js.map +1 -0
  17. package/dist/cjs/core/ensemble_live.cjs +387 -0
  18. package/dist/cjs/core/ensemble_live.d.ts +14 -0
  19. package/dist/cjs/core/ensemble_live.d.ts.map +1 -0
  20. package/dist/cjs/core/ensemble_live.js.map +1 -0
  21. package/dist/cjs/core/ensemble_request.cjs +409 -0
  22. package/dist/cjs/core/ensemble_request.d.ts +4 -0
  23. package/dist/cjs/core/ensemble_request.d.ts.map +1 -0
  24. package/dist/cjs/core/ensemble_request.js.map +1 -0
  25. package/dist/cjs/core/ensemble_voice.cjs +284 -0
  26. package/dist/cjs/core/ensemble_voice.d.ts +4 -0
  27. package/dist/cjs/core/ensemble_voice.d.ts.map +1 -0
  28. package/dist/cjs/core/ensemble_voice.js.map +1 -0
  29. package/dist/cjs/data/model_data.cjs +1460 -0
  30. package/dist/cjs/data/model_data.d.ts +71 -0
  31. package/dist/cjs/data/model_data.d.ts.map +1 -0
  32. package/dist/cjs/data/model_data.js.map +1 -0
  33. package/dist/cjs/index.cjs +139 -0
  34. package/dist/cjs/index.d.ts +37 -0
  35. package/dist/cjs/index.d.ts.map +1 -0
  36. package/dist/cjs/index.js.map +1 -0
  37. package/dist/cjs/model_providers/base_provider.cjs +46 -0
  38. package/dist/cjs/model_providers/base_provider.d.ts +13 -0
  39. package/dist/cjs/model_providers/base_provider.d.ts.map +1 -0
  40. package/dist/cjs/model_providers/base_provider.js.map +1 -0
  41. package/dist/cjs/model_providers/claude.cjs +782 -0
  42. package/dist/cjs/model_providers/claude.d.ts +12 -0
  43. package/dist/cjs/model_providers/claude.d.ts.map +1 -0
  44. package/dist/cjs/model_providers/claude.js.map +1 -0
  45. package/dist/cjs/model_providers/deepseek.cjs +129 -0
  46. package/dist/cjs/model_providers/deepseek.d.ts +8 -0
  47. package/dist/cjs/model_providers/deepseek.d.ts.map +1 -0
  48. package/dist/cjs/model_providers/deepseek.js.map +1 -0
  49. package/dist/cjs/model_providers/elevenlabs.cjs +140 -0
  50. package/dist/cjs/model_providers/elevenlabs.d.ts +37 -0
  51. package/dist/cjs/model_providers/elevenlabs.d.ts.map +1 -0
  52. package/dist/cjs/model_providers/elevenlabs.js.map +1 -0
  53. package/dist/cjs/model_providers/gemini.cjs +1654 -0
  54. package/dist/cjs/model_providers/gemini.d.ts +22 -0
  55. package/dist/cjs/model_providers/gemini.d.ts.map +1 -0
  56. package/dist/cjs/model_providers/gemini.js.map +1 -0
  57. package/dist/cjs/model_providers/grok.cjs +25 -0
  58. package/dist/cjs/model_providers/grok.d.ts +8 -0
  59. package/dist/cjs/model_providers/grok.d.ts.map +1 -0
  60. package/dist/cjs/model_providers/grok.js.map +1 -0
  61. package/dist/cjs/model_providers/model_provider.cjs +296 -0
  62. package/dist/cjs/model_providers/model_provider.d.ts +10 -0
  63. package/dist/cjs/model_providers/model_provider.d.ts.map +1 -0
  64. package/dist/cjs/model_providers/model_provider.js.map +1 -0
  65. package/dist/cjs/model_providers/openai.cjs +1117 -0
  66. package/dist/cjs/model_providers/openai.d.ts +19 -0
  67. package/dist/cjs/model_providers/openai.d.ts.map +1 -0
  68. package/dist/cjs/model_providers/openai.js.map +1 -0
  69. package/dist/cjs/model_providers/openai_chat.cjs +787 -0
  70. package/dist/cjs/model_providers/openai_chat.d.ts +20 -0
  71. package/dist/cjs/model_providers/openai_chat.d.ts.map +1 -0
  72. package/dist/cjs/model_providers/openai_chat.js.map +1 -0
  73. package/dist/cjs/model_providers/openrouter.cjs +22 -0
  74. package/dist/cjs/model_providers/openrouter.d.ts +6 -0
  75. package/dist/cjs/model_providers/openrouter.d.ts.map +1 -0
  76. package/dist/cjs/model_providers/openrouter.js.map +1 -0
  77. package/dist/cjs/model_providers/test_provider.cjs +236 -0
  78. package/dist/cjs/model_providers/test_provider.d.ts +29 -0
  79. package/dist/cjs/model_providers/test_provider.d.ts.map +1 -0
  80. package/dist/cjs/model_providers/test_provider.js.map +1 -0
  81. package/dist/cjs/tsconfig.cjs.tsbuildinfo +1 -0
  82. package/dist/cjs/types/api_types.cjs +3 -0
  83. package/dist/cjs/types/api_types.d.ts +249 -0
  84. package/dist/cjs/types/api_types.d.ts.map +1 -0
  85. package/dist/cjs/types/api_types.js.map +1 -0
  86. package/dist/cjs/types/errors.cjs +76 -0
  87. package/dist/cjs/types/errors.d.ts +34 -0
  88. package/dist/cjs/types/errors.d.ts.map +1 -0
  89. package/dist/cjs/types/errors.js.map +1 -0
  90. package/dist/cjs/types/types.cjs +3 -0
  91. package/dist/cjs/types/types.d.ts +638 -0
  92. package/dist/cjs/types/types.d.ts.map +1 -0
  93. package/dist/cjs/types/types.js.map +1 -0
  94. package/dist/cjs/utils/agent.cjs +384 -0
  95. package/dist/cjs/utils/agent.d.ts +48 -0
  96. package/dist/cjs/utils/agent.d.ts.map +1 -0
  97. package/dist/cjs/utils/agent.js.map +1 -0
  98. package/dist/cjs/utils/audio_stream_player.cjs +342 -0
  99. package/dist/cjs/utils/audio_stream_player.d.ts +37 -0
  100. package/dist/cjs/utils/audio_stream_player.d.ts.map +1 -0
  101. package/dist/cjs/utils/audio_stream_player.js.map +1 -0
  102. package/dist/cjs/utils/citation_tracker.cjs +25 -0
  103. package/dist/cjs/utils/citation_tracker.d.ts +12 -0
  104. package/dist/cjs/utils/citation_tracker.d.ts.map +1 -0
  105. package/dist/cjs/utils/citation_tracker.js.map +1 -0
  106. package/dist/cjs/utils/config_manager.cjs +105 -0
  107. package/dist/cjs/utils/config_manager.d.ts +31 -0
  108. package/dist/cjs/utils/config_manager.d.ts.map +1 -0
  109. package/dist/cjs/utils/config_manager.js.map +1 -0
  110. package/dist/cjs/utils/cost_tracker.cjs +226 -0
  111. package/dist/cjs/utils/cost_tracker.d.ts +33 -0
  112. package/dist/cjs/utils/cost_tracker.d.ts.map +1 -0
  113. package/dist/cjs/utils/cost_tracker.js.map +1 -0
  114. package/dist/cjs/utils/create_tool_function.cjs +182 -0
  115. package/dist/cjs/utils/create_tool_function.d.ts +3 -0
  116. package/dist/cjs/utils/create_tool_function.d.ts.map +1 -0
  117. package/dist/cjs/utils/create_tool_function.js.map +1 -0
  118. package/dist/cjs/utils/delta_buffer.cjs +65 -0
  119. package/dist/cjs/utils/delta_buffer.d.ts +14 -0
  120. package/dist/cjs/utils/delta_buffer.d.ts.map +1 -0
  121. package/dist/cjs/utils/delta_buffer.js.map +1 -0
  122. package/dist/cjs/utils/ensemble_result.cjs +167 -0
  123. package/dist/cjs/utils/ensemble_result.d.ts +33 -0
  124. package/dist/cjs/utils/ensemble_result.d.ts.map +1 -0
  125. package/dist/cjs/utils/ensemble_result.js.map +1 -0
  126. package/dist/cjs/utils/event_controller.cjs +59 -0
  127. package/dist/cjs/utils/event_controller.d.ts +13 -0
  128. package/dist/cjs/utils/event_controller.d.ts.map +1 -0
  129. package/dist/cjs/utils/event_controller.js.map +1 -0
  130. package/dist/cjs/utils/external_models.cjs +42 -0
  131. package/dist/cjs/utils/external_models.d.ts +9 -0
  132. package/dist/cjs/utils/external_models.d.ts.map +1 -0
  133. package/dist/cjs/utils/external_models.js.map +1 -0
  134. package/dist/cjs/utils/image_to_text.cjs +58 -0
  135. package/dist/cjs/utils/image_to_text.d.ts +3 -0
  136. package/dist/cjs/utils/image_to_text.d.ts.map +1 -0
  137. package/dist/cjs/utils/image_to_text.js.map +1 -0
  138. package/dist/cjs/utils/image_utils.cjs +168 -0
  139. package/dist/cjs/utils/image_utils.d.ts +18 -0
  140. package/dist/cjs/utils/image_utils.d.ts.map +1 -0
  141. package/dist/cjs/utils/image_utils.js.map +1 -0
  142. package/dist/cjs/utils/image_validation.cjs +31 -0
  143. package/dist/cjs/utils/image_validation.d.ts +3 -0
  144. package/dist/cjs/utils/image_validation.d.ts.map +1 -0
  145. package/dist/cjs/utils/image_validation.js.map +1 -0
  146. package/dist/cjs/utils/llm_logger.cjs +31 -0
  147. package/dist/cjs/utils/llm_logger.d.ts +8 -0
  148. package/dist/cjs/utils/llm_logger.d.ts.map +1 -0
  149. package/dist/cjs/utils/llm_logger.js.map +1 -0
  150. package/dist/cjs/utils/message_history.cjs +560 -0
  151. package/dist/cjs/utils/message_history.d.ts +65 -0
  152. package/dist/cjs/utils/message_history.d.ts.map +1 -0
  153. package/dist/cjs/utils/message_history.js.map +1 -0
  154. package/dist/cjs/utils/model_class_config.cjs +105 -0
  155. package/dist/cjs/utils/model_class_config.d.ts +12 -0
  156. package/dist/cjs/utils/model_class_config.d.ts.map +1 -0
  157. package/dist/cjs/utils/model_class_config.js.map +1 -0
  158. package/dist/cjs/utils/pause_controller.cjs +90 -0
  159. package/dist/cjs/utils/pause_controller.d.ts +14 -0
  160. package/dist/cjs/utils/pause_controller.d.ts.map +1 -0
  161. package/dist/cjs/utils/pause_controller.js.map +1 -0
  162. package/dist/cjs/utils/quota_tracker.cjs +311 -0
  163. package/dist/cjs/utils/quota_tracker.d.ts +22 -0
  164. package/dist/cjs/utils/quota_tracker.d.ts.map +1 -0
  165. package/dist/cjs/utils/quota_tracker.js.map +1 -0
  166. package/dist/cjs/utils/retry_handler.cjs +131 -0
  167. package/dist/cjs/utils/retry_handler.d.ts +15 -0
  168. package/dist/cjs/utils/retry_handler.d.ts.map +1 -0
  169. package/dist/cjs/utils/retry_handler.js.map +1 -0
  170. package/dist/cjs/utils/running_tool_tracker.cjs +133 -0
  171. package/dist/cjs/utils/running_tool_tracker.d.ts +42 -0
  172. package/dist/cjs/utils/running_tool_tracker.d.ts.map +1 -0
  173. package/dist/cjs/utils/running_tool_tracker.js.map +1 -0
  174. package/dist/cjs/utils/sequential_queue.cjs +73 -0
  175. package/dist/cjs/utils/sequential_queue.d.ts +13 -0
  176. package/dist/cjs/utils/sequential_queue.d.ts.map +1 -0
  177. package/dist/cjs/utils/sequential_queue.js.map +1 -0
  178. package/dist/cjs/utils/stream_handler.cjs +73 -0
  179. package/dist/cjs/utils/stream_handler.d.ts +16 -0
  180. package/dist/cjs/utils/stream_handler.d.ts.map +1 -0
  181. package/dist/cjs/utils/stream_handler.js.map +1 -0
  182. package/dist/cjs/utils/summary_utils.cjs +211 -0
  183. package/dist/cjs/utils/summary_utils.d.ts +7 -0
  184. package/dist/cjs/utils/summary_utils.d.ts.map +1 -0
  185. package/dist/cjs/utils/summary_utils.js.map +1 -0
  186. package/dist/cjs/utils/test_utils.cjs +212 -0
  187. package/dist/cjs/utils/test_utils.d.ts +58 -0
  188. package/dist/cjs/utils/test_utils.d.ts.map +1 -0
  189. package/dist/cjs/utils/test_utils.js.map +1 -0
  190. package/dist/cjs/utils/tool_execution_manager.cjs +139 -0
  191. package/dist/cjs/utils/tool_execution_manager.d.ts +7 -0
  192. package/dist/cjs/utils/tool_execution_manager.d.ts.map +1 -0
  193. package/dist/cjs/utils/tool_execution_manager.js.map +1 -0
  194. package/dist/cjs/utils/tool_parameter_utils.cjs +168 -0
  195. package/dist/cjs/utils/tool_parameter_utils.d.ts +5 -0
  196. package/dist/cjs/utils/tool_parameter_utils.d.ts.map +1 -0
  197. package/dist/cjs/utils/tool_parameter_utils.js.map +1 -0
  198. package/dist/cjs/utils/tool_result_processor.cjs +363 -0
  199. package/dist/cjs/utils/tool_result_processor.d.ts +11 -0
  200. package/dist/cjs/utils/tool_result_processor.d.ts.map +1 -0
  201. package/dist/cjs/utils/tool_result_processor.js.map +1 -0
  202. package/dist/cjs/utils/verification.cjs +59 -0
  203. package/dist/cjs/utils/verification.d.ts +7 -0
  204. package/dist/cjs/utils/verification.d.ts.map +1 -0
  205. package/dist/cjs/utils/verification.js.map +1 -0
  206. package/dist/tsconfig.tsbuildinfo +1 -1
  207. package/dist/utils/audio_stream_player.d.ts +2 -0
  208. package/dist/utils/audio_stream_player.d.ts.map +1 -1
  209. package/dist/utils/audio_stream_player.js +83 -1
  210. package/dist/utils/audio_stream_player.js.map +1 -1
  211. package/dist/utils/cost_tracker.d.ts +7 -0
  212. package/dist/utils/cost_tracker.d.ts.map +1 -1
  213. package/dist/utils/cost_tracker.js +29 -0
  214. package/dist/utils/cost_tracker.js.map +1 -1
  215. package/package.json +18 -3
@@ -0,0 +1,1654 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.geminiProvider = exports.GeminiProvider = void 0;
37
+ exports.getImageMimeType = getImageMimeType;
38
+ exports.cleanBase64Data = cleanBase64Data;
39
+ const genai_1 = require("@google/genai");
40
+ const uuid_1 = require("uuid");
41
+ const base_provider_js_1 = require("./base_provider.cjs");
42
+ const index_js_1 = require("../index.cjs");
43
+ const llm_logger_js_1 = require("../utils/llm_logger.cjs");
44
+ const pause_controller_js_1 = require("../utils/pause_controller.cjs");
45
+ const image_utils_js_1 = require("../utils/image_utils.cjs");
46
+ function convertParameterToGeminiFormat(param) {
47
+ let type = genai_1.Type.STRING;
48
+ switch (param.type) {
49
+ case 'string':
50
+ type = genai_1.Type.STRING;
51
+ break;
52
+ case 'number':
53
+ type = genai_1.Type.NUMBER;
54
+ break;
55
+ case 'boolean':
56
+ type = genai_1.Type.BOOLEAN;
57
+ break;
58
+ case 'object':
59
+ type = genai_1.Type.OBJECT;
60
+ break;
61
+ case 'array':
62
+ type = genai_1.Type.ARRAY;
63
+ break;
64
+ case 'null':
65
+ type = genai_1.Type.STRING;
66
+ console.warn("Mapping 'null' type to STRING");
67
+ break;
68
+ default:
69
+ console.warn(`Unsupported parameter type '${param.type}'. Defaulting to STRING.`);
70
+ type = genai_1.Type.STRING;
71
+ }
72
+ const result = { type, description: param.description };
73
+ if (type === genai_1.Type.ARRAY) {
74
+ if (param.items) {
75
+ let itemType;
76
+ let itemEnum;
77
+ let itemProperties;
78
+ if (typeof param.items === 'object') {
79
+ itemType = param.items.type;
80
+ itemEnum = param.items.enum;
81
+ if ('properties' in param.items) {
82
+ itemProperties = param.items.properties;
83
+ }
84
+ }
85
+ if (itemType === 'object' || itemProperties) {
86
+ result.items = { type: genai_1.Type.STRING };
87
+ result.description = `${result.description || 'Array parameter'} (Each item should be a JSON-encoded object)`;
88
+ if (itemProperties) {
89
+ const propNames = Object.keys(itemProperties);
90
+ result.description += `. Expected properties: ${propNames.join(', ')}`;
91
+ }
92
+ }
93
+ else if (itemType) {
94
+ result.items = {
95
+ type: itemType === 'string'
96
+ ? genai_1.Type.STRING
97
+ : itemType === 'number'
98
+ ? genai_1.Type.NUMBER
99
+ : itemType === 'boolean'
100
+ ? genai_1.Type.BOOLEAN
101
+ : itemType === 'null'
102
+ ? genai_1.Type.STRING
103
+ : genai_1.Type.STRING,
104
+ };
105
+ if (itemEnum) {
106
+ if (typeof itemEnum === 'function') {
107
+ console.warn('Gemini provider does not support async enum functions in array items');
108
+ }
109
+ else {
110
+ result.items.enum = itemEnum;
111
+ }
112
+ }
113
+ }
114
+ else {
115
+ result.items = { type: genai_1.Type.STRING };
116
+ }
117
+ }
118
+ else {
119
+ result.items = { type: genai_1.Type.STRING };
120
+ }
121
+ }
122
+ else if (type === genai_1.Type.OBJECT) {
123
+ if (param.properties && typeof param.properties === 'object') {
124
+ result.properties = {};
125
+ for (const [propName, propSchema] of Object.entries(param.properties)) {
126
+ result.properties[propName] = convertParameterToGeminiFormat(propSchema);
127
+ }
128
+ }
129
+ else {
130
+ result.properties = {};
131
+ }
132
+ }
133
+ else if (param.enum) {
134
+ if (typeof param.enum === 'function') {
135
+ console.warn('Gemini provider does not support async enum functions. Enum will be omitted.');
136
+ }
137
+ else {
138
+ result.format = 'enum';
139
+ result.enum = param.enum;
140
+ }
141
+ }
142
+ return result;
143
+ }
144
+ async function resolveAsyncEnums(params) {
145
+ if (!params || typeof params !== 'object') {
146
+ return params;
147
+ }
148
+ const resolved = { ...params };
149
+ if (resolved.properties) {
150
+ const resolvedProps = {};
151
+ for (const [key, value] of Object.entries(resolved.properties)) {
152
+ if (value && typeof value === 'object') {
153
+ const propCopy = { ...value };
154
+ if (typeof propCopy.enum === 'function') {
155
+ try {
156
+ const enumValue = await propCopy.enum();
157
+ if (Array.isArray(enumValue) && enumValue.length > 0) {
158
+ propCopy.enum = enumValue;
159
+ }
160
+ else {
161
+ delete propCopy.enum;
162
+ }
163
+ }
164
+ catch {
165
+ delete propCopy.enum;
166
+ }
167
+ }
168
+ resolvedProps[key] = await resolveAsyncEnums(propCopy);
169
+ }
170
+ else {
171
+ resolvedProps[key] = value;
172
+ }
173
+ }
174
+ resolved.properties = resolvedProps;
175
+ }
176
+ return resolved;
177
+ }
178
+ async function convertToGeminiFunctionDeclarations(tools) {
179
+ const declarations = await Promise.all(tools.map(async (tool) => {
180
+ if (tool.definition.function.name === 'google_web_search') {
181
+ console.log('[Gemini] Enabling Google Search grounding');
182
+ return null;
183
+ }
184
+ const resolvedParams = await resolveAsyncEnums(tool.definition?.function?.parameters);
185
+ const toolParams = resolvedParams?.properties;
186
+ const properties = {};
187
+ if (toolParams) {
188
+ for (const [name, param] of Object.entries(toolParams)) {
189
+ properties[name] = convertParameterToGeminiFormat(param);
190
+ }
191
+ }
192
+ else {
193
+ console.warn(`Tool ${tool.definition?.function?.name || 'Unnamed Tool'} has missing or invalid parameters definition.`);
194
+ }
195
+ return {
196
+ name: tool.definition.function.name,
197
+ description: tool.definition.function.description,
198
+ parameters: {
199
+ type: genai_1.Type.OBJECT,
200
+ properties,
201
+ required: Array.isArray(resolvedParams?.required) ? resolvedParams.required : [],
202
+ },
203
+ };
204
+ }));
205
+ return declarations.filter(Boolean);
206
+ }
207
+ function getImageMimeType(imageData) {
208
+ if (imageData.includes('data:image/jpeg'))
209
+ return 'image/jpeg';
210
+ if (imageData.includes('data:image/png'))
211
+ return 'image/png';
212
+ if (imageData.includes('data:image/gif'))
213
+ return 'image/gif';
214
+ if (imageData.includes('data:image/webp'))
215
+ return 'image/webp';
216
+ return 'image/jpeg';
217
+ }
218
+ function cleanBase64Data(imageData) {
219
+ return imageData.replace(/^data:image\/[a-z]+;base64,/, '');
220
+ }
221
+ function formatGroundingChunks(chunks) {
222
+ return chunks
223
+ .filter(c => c?.web?.uri)
224
+ .map((c, i) => `${i + 1}. ${c.web.title || 'Untitled'} – ${c.web.uri}`)
225
+ .join('\n');
226
+ }
227
+ async function addImagesToInput(input, images, source) {
228
+ for (const [image_id, imageData] of Object.entries(images)) {
229
+ const processedImageData = await (0, image_utils_js_1.resizeAndTruncateForGemini)(imageData);
230
+ const mimeType = getImageMimeType(processedImageData);
231
+ const cleanedImageData = cleanBase64Data(processedImageData);
232
+ input.push({
233
+ role: 'user',
234
+ parts: [
235
+ {
236
+ text: `This is [image #${image_id}] from the ${source}`,
237
+ },
238
+ {
239
+ inlineData: {
240
+ mimeType: mimeType,
241
+ data: cleanedImageData,
242
+ },
243
+ },
244
+ ],
245
+ });
246
+ }
247
+ return input;
248
+ }
249
+ async function convertToGeminiContents(model, messages) {
250
+ let contents = [];
251
+ for (const msg of messages) {
252
+ if (msg.type === 'function_call') {
253
+ let args = {};
254
+ try {
255
+ const parsedArgs = JSON.parse(msg.arguments || '{}');
256
+ args = typeof parsedArgs === 'object' && parsedArgs !== null ? parsedArgs : { value: parsedArgs };
257
+ }
258
+ catch (e) {
259
+ console.error(`Failed to parse function call arguments for ${msg.name}:`, msg.arguments, e);
260
+ args = {
261
+ error: 'Invalid JSON arguments provided',
262
+ raw_args: msg.arguments,
263
+ };
264
+ }
265
+ contents.push({
266
+ role: 'model',
267
+ parts: [
268
+ {
269
+ functionCall: {
270
+ name: msg.name,
271
+ args,
272
+ },
273
+ },
274
+ ],
275
+ });
276
+ }
277
+ else if (msg.type === 'function_call_output') {
278
+ let textOutput = '';
279
+ if (typeof msg.output === 'string') {
280
+ textOutput = msg.output;
281
+ }
282
+ else {
283
+ textOutput = JSON.stringify(msg.output);
284
+ }
285
+ const message = {
286
+ role: 'user',
287
+ parts: [
288
+ {
289
+ functionResponse: {
290
+ name: msg.name,
291
+ response: { content: textOutput || '' },
292
+ },
293
+ },
294
+ ],
295
+ };
296
+ contents = await (0, image_utils_js_1.appendMessageWithImage)(model, contents, message, {
297
+ read: () => textOutput,
298
+ write: value => {
299
+ message.parts[0].functionResponse.response.content = value;
300
+ return message;
301
+ },
302
+ }, addImagesToInput);
303
+ }
304
+ else {
305
+ let textContent = '';
306
+ if (typeof msg.content === 'string') {
307
+ textContent = msg.content;
308
+ }
309
+ else if (msg.content && typeof msg.content === 'object' && 'text' in msg.content) {
310
+ textContent = msg.content.text;
311
+ }
312
+ else {
313
+ textContent = JSON.stringify(msg.content);
314
+ }
315
+ const role = msg.role === 'assistant' ? 'model' : 'user';
316
+ const message = {
317
+ role,
318
+ parts: [
319
+ {
320
+ thought: msg.type === 'thinking',
321
+ text: textContent.trim(),
322
+ },
323
+ ],
324
+ };
325
+ contents = await (0, image_utils_js_1.appendMessageWithImage)(model, contents, message, {
326
+ read: () => textContent,
327
+ write: value => {
328
+ message.parts[0].text = value;
329
+ return message;
330
+ },
331
+ }, addImagesToInput);
332
+ }
333
+ }
334
+ return contents;
335
+ }
336
+ const THINKING_BUDGET_CONFIGS = {
337
+ '-low': 0,
338
+ '-medium': 2048,
339
+ '-high': 12288,
340
+ '-max': 24576,
341
+ };
342
+ class GeminiProvider extends base_provider_js_1.BaseModelProvider {
343
+ _client;
344
+ apiKey;
345
+ constructor(apiKey) {
346
+ super('google');
347
+ this.apiKey = apiKey;
348
+ }
349
+ get client() {
350
+ if (!this._client) {
351
+ const apiKey = this.apiKey || process.env.GOOGLE_API_KEY;
352
+ if (!apiKey) {
353
+ throw new Error('Failed to initialize Gemini client. GOOGLE_API_KEY is missing or not provided.');
354
+ }
355
+ this._client = new genai_1.GoogleGenAI({
356
+ apiKey: apiKey,
357
+ vertexai: false,
358
+ httpOptions: { apiVersion: 'v1alpha' },
359
+ });
360
+ }
361
+ return this._client;
362
+ }
363
+ async createEmbedding(input, model, opts) {
364
+ try {
365
+ let actualModelId = model.startsWith('gemini/') ? model.substring(7) : model;
366
+ let thinkingConfig = null;
367
+ for (const [suffix, budget] of Object.entries(THINKING_BUDGET_CONFIGS)) {
368
+ if (actualModelId.endsWith(suffix)) {
369
+ thinkingConfig = { thinkingBudget: budget };
370
+ actualModelId = actualModelId.slice(0, -suffix.length);
371
+ break;
372
+ }
373
+ }
374
+ console.log(`[Gemini] Generating embedding with model ${actualModelId}`);
375
+ const payload = {
376
+ model: actualModelId,
377
+ contents: input,
378
+ config: {
379
+ taskType: opts?.taskType ?? 'SEMANTIC_SIMILARITY',
380
+ },
381
+ };
382
+ if (thinkingConfig) {
383
+ payload.config.thinkingConfig = thinkingConfig;
384
+ }
385
+ const response = await this.client.models.embedContent(payload);
386
+ console.log('[Gemini] Embedding response structure:', JSON.stringify(response, (key, value) => key === 'values' && Array.isArray(value) && value.length > 10
387
+ ? `[${value.length} items]`
388
+ : value, 2));
389
+ if (!response.embeddings || !Array.isArray(response.embeddings)) {
390
+ console.error('[Gemini] Unexpected embedding response structure:', response);
391
+ throw new Error('Invalid embedding response structure from Gemini API');
392
+ }
393
+ const estimatedTokens = typeof input === 'string'
394
+ ? Math.ceil(input.length / 4)
395
+ : input.reduce((sum, text) => sum + Math.ceil(text.length / 4), 0);
396
+ let extractedValues = [];
397
+ let dimensions = 0;
398
+ if (response.embeddings.length > 0) {
399
+ if (response.embeddings[0].values) {
400
+ extractedValues = response.embeddings.map(e => e.values);
401
+ dimensions = extractedValues[0].length;
402
+ }
403
+ else {
404
+ console.warn('[Gemini] Could not find expected "values" property in embeddings response');
405
+ extractedValues = response.embeddings;
406
+ dimensions = Array.isArray(extractedValues[0]) ? extractedValues[0].length : 0;
407
+ }
408
+ }
409
+ index_js_1.costTracker.addUsage({
410
+ model: actualModelId,
411
+ input_tokens: estimatedTokens,
412
+ output_tokens: 0,
413
+ metadata: {
414
+ dimensions,
415
+ },
416
+ });
417
+ if (Array.isArray(input) && input.length > 1) {
418
+ return extractedValues;
419
+ }
420
+ else {
421
+ let result;
422
+ if (Array.isArray(extractedValues) && extractedValues.length >= 1) {
423
+ const firstValue = extractedValues[0];
424
+ if (Array.isArray(firstValue)) {
425
+ result = firstValue;
426
+ }
427
+ else {
428
+ console.error('[Gemini] Unexpected format in embedding result:', firstValue);
429
+ result = [];
430
+ }
431
+ }
432
+ else {
433
+ result = [];
434
+ }
435
+ let adjustedResult = result;
436
+ if (result.length !== 3072) {
437
+ console.warn(`Gemini embedding returned ${result.length} dimensions, adjusting to 3072...`);
438
+ if (result.length > 3072) {
439
+ adjustedResult = result.slice(0, 3072);
440
+ }
441
+ else {
442
+ adjustedResult = [...result, ...Array(3072 - result.length).fill(0)];
443
+ }
444
+ }
445
+ return adjustedResult;
446
+ }
447
+ }
448
+ catch (error) {
449
+ console.error('[Gemini] Error generating embedding:', error);
450
+ throw error;
451
+ }
452
+ }
453
+ async *retryStreamOnIncompleteJson(requestFn, maxRetries = 2) {
454
+ let attempts = 0;
455
+ while (attempts <= maxRetries) {
456
+ try {
457
+ const stream = await requestFn();
458
+ for await (const chunk of stream) {
459
+ yield chunk;
460
+ }
461
+ return;
462
+ }
463
+ catch (error) {
464
+ attempts++;
465
+ const errorMsg = error instanceof Error ? error.message : String(error);
466
+ if (errorMsg.includes('Incomplete JSON segment') && attempts <= maxRetries) {
467
+ console.warn(`[Gemini] Incomplete JSON segment error, retrying (${attempts}/${maxRetries})...`);
468
+ await new Promise(resolve => setTimeout(resolve, 1000 * attempts));
469
+ continue;
470
+ }
471
+ throw error;
472
+ }
473
+ }
474
+ }
475
+ async *createResponseStream(messages, model, agent) {
476
+ const { getToolsFromAgent } = await Promise.resolve().then(() => __importStar(require("../utils/agent.cjs")));
477
+ const tools = agent ? await getToolsFromAgent(agent) : [];
478
+ const settings = agent?.modelSettings;
479
+ let messageId = (0, uuid_1.v4)();
480
+ let contentBuffer = '';
481
+ let thoughtBuffer = '';
482
+ let eventOrder = 0;
483
+ const shownGrounding = new Set();
484
+ let requestId = undefined;
485
+ const chunks = [];
486
+ try {
487
+ const contents = await convertToGeminiContents(model, messages);
488
+ if (contents.length === 0) {
489
+ console.warn('Gemini API Warning: No valid content found in messages after conversion. Adding default message.');
490
+ contents.push({
491
+ role: 'user',
492
+ parts: [
493
+ {
494
+ text: "Let's think this through step by step.",
495
+ },
496
+ ],
497
+ });
498
+ }
499
+ const lastContent = contents[contents.length - 1];
500
+ if (lastContent.role !== 'user') {
501
+ console.warn("Last message in history is not from 'user'. Gemini might not respond as expected.");
502
+ }
503
+ let thinkingBudget = null;
504
+ for (const [suffix, budget] of Object.entries(THINKING_BUDGET_CONFIGS)) {
505
+ if (model.endsWith(suffix)) {
506
+ thinkingBudget = budget;
507
+ model = model.slice(0, -suffix.length);
508
+ break;
509
+ }
510
+ }
511
+ const config = {
512
+ thinkingConfig: {
513
+ includeThoughts: true,
514
+ },
515
+ };
516
+ if (thinkingBudget) {
517
+ config.thinkingConfig.thinkingBudget = thinkingBudget;
518
+ }
519
+ if (settings?.stop_sequence) {
520
+ config.stopSequences = [settings.stop_sequence];
521
+ }
522
+ if (settings?.temperature) {
523
+ config.temperature = settings.temperature;
524
+ }
525
+ if (settings?.max_tokens) {
526
+ config.maxOutputTokens = settings.max_tokens;
527
+ }
528
+ if (settings?.top_p) {
529
+ config.topP = settings.top_p;
530
+ }
531
+ if (settings?.top_k) {
532
+ config.topK = settings.top_k;
533
+ }
534
+ if (settings?.json_schema) {
535
+ config.responseMimeType = 'application/json';
536
+ config.responseSchema = settings.json_schema.schema;
537
+ if (config.responseSchema) {
538
+ const removeAdditionalProperties = (obj) => {
539
+ if (!obj || typeof obj !== 'object') {
540
+ return;
541
+ }
542
+ if ('additionalProperties' in obj) {
543
+ delete obj.additionalProperties;
544
+ }
545
+ if (obj.properties && typeof obj.properties === 'object') {
546
+ Object.values(obj.properties).forEach(prop => {
547
+ removeAdditionalProperties(prop);
548
+ });
549
+ }
550
+ if (obj.items) {
551
+ removeAdditionalProperties(obj.items);
552
+ }
553
+ ['oneOf', 'anyOf', 'allOf'].forEach(key => {
554
+ if (obj[key] && Array.isArray(obj[key])) {
555
+ obj[key].forEach((subSchema) => {
556
+ removeAdditionalProperties(subSchema);
557
+ });
558
+ }
559
+ });
560
+ };
561
+ removeAdditionalProperties(config.responseSchema);
562
+ }
563
+ }
564
+ let hasGoogleWebSearch = false;
565
+ if (tools && tools.length > 0) {
566
+ hasGoogleWebSearch = tools.some(tool => tool.definition.function.name === 'google_web_search');
567
+ const functionDeclarations = await convertToGeminiFunctionDeclarations(tools);
568
+ let allowedFunctionNames = [];
569
+ if (functionDeclarations.length > 0) {
570
+ config.tools = [{ functionDeclarations }];
571
+ if (settings?.tool_choice) {
572
+ let toolChoice;
573
+ if (typeof settings.tool_choice === 'object' &&
574
+ settings.tool_choice?.type === 'function' &&
575
+ settings.tool_choice?.function?.name) {
576
+ toolChoice = genai_1.FunctionCallingConfigMode.ANY;
577
+ allowedFunctionNames = [settings.tool_choice.function.name];
578
+ }
579
+ else if (settings.tool_choice === 'required') {
580
+ toolChoice = genai_1.FunctionCallingConfigMode.ANY;
581
+ }
582
+ else if (settings.tool_choice === 'auto') {
583
+ toolChoice = genai_1.FunctionCallingConfigMode.AUTO;
584
+ }
585
+ else if (settings.tool_choice === 'none') {
586
+ toolChoice = genai_1.FunctionCallingConfigMode.NONE;
587
+ }
588
+ if (toolChoice) {
589
+ config.toolConfig = {
590
+ functionCallingConfig: {
591
+ mode: toolChoice,
592
+ },
593
+ };
594
+ if (allowedFunctionNames.length > 0) {
595
+ config.toolConfig.functionCallingConfig.allowedFunctionNames = allowedFunctionNames;
596
+ }
597
+ }
598
+ }
599
+ }
600
+ else if (!hasGoogleWebSearch) {
601
+ console.warn('Tools were provided but resulted in empty declarations after conversion.');
602
+ }
603
+ }
604
+ if (hasGoogleWebSearch) {
605
+ console.log('[Gemini] Enabling Google Search grounding');
606
+ config.tools = [{ googleSearch: {} }];
607
+ config.toolConfig = {
608
+ functionCallingConfig: {
609
+ mode: genai_1.FunctionCallingConfigMode.ANY,
610
+ allowedFunctionNames: ['googleSearch'],
611
+ },
612
+ };
613
+ }
614
+ const requestParams = {
615
+ model,
616
+ contents,
617
+ config,
618
+ };
619
+ requestId = (0, llm_logger_js_1.log_llm_request)(agent.agent_id, 'google', model, requestParams);
620
+ const { waitWhilePaused } = await Promise.resolve().then(() => __importStar(require("../utils/pause_controller.cjs")));
621
+ await waitWhilePaused(100, agent.abortSignal);
622
+ const getStreamFn = () => this.client.models.generateContentStream(requestParams);
623
+ const response = this.retryStreamOnIncompleteJson(getStreamFn);
624
+ let usageMetadata;
625
+ for await (const chunk of response) {
626
+ chunks.push(chunk);
627
+ if (chunk.responseId) {
628
+ messageId = chunk.responseId;
629
+ }
630
+ if ((0, pause_controller_js_1.isPaused)()) {
631
+ console.log(`[Gemini] System paused during stream for model ${model}. Waiting...`);
632
+ await waitWhilePaused(100, agent.abortSignal);
633
+ console.log(`[Gemini] System resumed, continuing stream for model ${model}`);
634
+ }
635
+ if (chunk.functionCalls && chunk.functionCalls.length > 0) {
636
+ for (const fc of chunk.functionCalls) {
637
+ if (fc && fc.name) {
638
+ yield {
639
+ type: 'tool_start',
640
+ tool_call: {
641
+ id: fc.id || `call_${(0, uuid_1.v4)()}`,
642
+ type: 'function',
643
+ function: {
644
+ name: fc.name,
645
+ arguments: JSON.stringify(fc.args || {}),
646
+ },
647
+ },
648
+ };
649
+ }
650
+ }
651
+ }
652
+ for (const candidate of chunk.candidates) {
653
+ if (candidate.content.parts) {
654
+ for (const part of candidate.content.parts) {
655
+ let text = '';
656
+ if (part.text) {
657
+ text += part.text;
658
+ }
659
+ if (part.executableCode) {
660
+ if (text) {
661
+ text += '\n\n';
662
+ }
663
+ text += part.executableCode;
664
+ }
665
+ if (part.videoMetadata) {
666
+ if (text) {
667
+ text += '\n\n';
668
+ }
669
+ text += JSON.stringify(part.videoMetadata);
670
+ }
671
+ if (text.length > 0) {
672
+ const ev = {
673
+ type: 'message_delta',
674
+ content: '',
675
+ message_id: messageId,
676
+ order: eventOrder++,
677
+ };
678
+ if (part.thought) {
679
+ thoughtBuffer += text;
680
+ ev.thinking_content = text;
681
+ }
682
+ else {
683
+ contentBuffer += text;
684
+ ev.content = text;
685
+ }
686
+ yield ev;
687
+ }
688
+ if (part.inlineData?.data) {
689
+ yield {
690
+ type: 'file_complete',
691
+ data_format: 'base64',
692
+ data: part.inlineData.data,
693
+ mime_type: part.inlineData.mimeType || 'image/png',
694
+ message_id: (0, uuid_1.v4)(),
695
+ order: eventOrder++,
696
+ };
697
+ }
698
+ }
699
+ }
700
+ const gChunks = candidate.groundingMetadata?.groundingChunks;
701
+ if (Array.isArray(gChunks)) {
702
+ const newChunks = gChunks.filter(c => c?.web?.uri && !shownGrounding.has(c.web.uri));
703
+ if (newChunks.length) {
704
+ newChunks.forEach(c => shownGrounding.add(c.web.uri));
705
+ const formatted = formatGroundingChunks(newChunks);
706
+ yield {
707
+ type: 'message_delta',
708
+ content: '\n\nSearch Results:\n' + formatted + '\n',
709
+ message_id: messageId,
710
+ order: eventOrder++,
711
+ };
712
+ contentBuffer += '\n\nSearch Results:\n' + formatted + '\n';
713
+ }
714
+ }
715
+ }
716
+ if (chunk.usageMetadata) {
717
+ usageMetadata = chunk.usageMetadata;
718
+ }
719
+ }
720
+ if (usageMetadata) {
721
+ index_js_1.costTracker.addUsage({
722
+ model,
723
+ input_tokens: usageMetadata.promptTokenCount || 0,
724
+ output_tokens: usageMetadata.candidatesTokenCount || 0,
725
+ cached_tokens: usageMetadata.cachedContentTokenCount || 0,
726
+ metadata: {
727
+ total_tokens: usageMetadata.totalTokenCount || 0,
728
+ reasoning_tokens: usageMetadata.thoughtsTokenCount || 0,
729
+ tool_tokens: usageMetadata.toolUsePromptTokenCount || 0,
730
+ },
731
+ });
732
+ }
733
+ else {
734
+ console.error('[Gemini] No usage metadata found in the response. This may affect token tracking.');
735
+ index_js_1.costTracker.addUsage({
736
+ model,
737
+ input_tokens: 0,
738
+ output_tokens: 0,
739
+ cached_tokens: 0,
740
+ metadata: {
741
+ total_tokens: 0,
742
+ source: 'estimated',
743
+ },
744
+ });
745
+ }
746
+ if (contentBuffer || thoughtBuffer) {
747
+ yield {
748
+ type: 'message_complete',
749
+ content: contentBuffer,
750
+ thinking_content: thoughtBuffer,
751
+ message_id: messageId,
752
+ };
753
+ }
754
+ }
755
+ catch (error) {
756
+ (0, llm_logger_js_1.log_llm_error)(requestId, error);
757
+ const errorMessage = error instanceof Error ? error.stack || error.message : String(error);
758
+ if (errorMessage.includes('Incomplete JSON segment')) {
759
+ console.error('[Gemini] Stream terminated with incomplete JSON. This may indicate network issues or timeouts.');
760
+ }
761
+ console.error('\n=== Gemini error ===');
762
+ console.dir(error, { depth: null });
763
+ console.error('\n=== JSON dump of error ===');
764
+ console.error(JSON.stringify(error, Object.getOwnPropertyNames(error), 2));
765
+ console.error('\n=== Manual property walk ===');
766
+ for (const key of Reflect.ownKeys(error)) {
767
+ console.error(`${String(key)}:`, error[key]);
768
+ }
769
+ yield {
770
+ type: 'error',
771
+ error: `Gemini error ${model}: ${errorMessage}`,
772
+ };
773
+ if (contentBuffer || thoughtBuffer) {
774
+ yield {
775
+ type: 'message_complete',
776
+ content: contentBuffer,
777
+ thinking_content: thoughtBuffer,
778
+ message_id: messageId,
779
+ };
780
+ }
781
+ }
782
+ finally {
783
+ (0, llm_logger_js_1.log_llm_response)(requestId, chunks);
784
+ }
785
+ }
786
+ async createImage(prompt, model, opts) {
787
+ try {
788
+ model = model || 'imagen-3.0-generate-002';
789
+ const numberOfImages = opts?.n || 1;
790
+ let aspectRatio = '1:1';
791
+ if (opts?.size === 'landscape') {
792
+ aspectRatio = '16:9';
793
+ }
794
+ else if (opts?.size === 'portrait') {
795
+ aspectRatio = '9:16';
796
+ }
797
+ console.log(`[Gemini] Generating ${numberOfImages} image(s) with model ${model}, prompt: "${prompt.substring(0, 100)}${prompt.length > 100 ? '...' : ''}"`);
798
+ const response = await this.client.models.generateImages({
799
+ model,
800
+ prompt,
801
+ config: {
802
+ numberOfImages,
803
+ aspectRatio,
804
+ includeSafetyAttributes: false,
805
+ },
806
+ });
807
+ const images = [];
808
+ if (response.generatedImages && response.generatedImages.length > 0) {
809
+ for (const generatedImage of response.generatedImages) {
810
+ if (generatedImage.image?.imageBytes) {
811
+ const base64Image = `data:image/png;base64,${generatedImage.image.imageBytes}`;
812
+ images.push(base64Image);
813
+ }
814
+ }
815
+ const perImageCost = this.getImageCost(model);
816
+ index_js_1.costTracker.addUsage({
817
+ model,
818
+ image_count: images.length,
819
+ metadata: {
820
+ aspect_ratio: aspectRatio,
821
+ cost_per_image: perImageCost,
822
+ },
823
+ });
824
+ }
825
+ if (images.length === 0) {
826
+ throw new Error('No images returned from Gemini/Imagen');
827
+ }
828
+ return images;
829
+ }
830
+ catch (error) {
831
+ console.error('[Gemini] Error generating image:', error);
832
+ throw error;
833
+ }
834
+ }
835
+ getImageCost(model) {
836
+ if (model.includes('imagen-3')) {
837
+ return 0.04;
838
+ }
839
+ else if (model.includes('imagen-2')) {
840
+ return 0.02;
841
+ }
842
+ return 0.04;
843
+ }
844
+ async createVoice(text, model = 'gemini-2.5-flash-preview-tts', opts) {
845
+ try {
846
+ console.log(`[Gemini] Generating speech with model ${model}, text: "${text.substring(0, 100)}${text.length > 100 ? '...' : ''}"`);
847
+ const voiceName = this.mapVoiceToGemini(opts?.voice);
848
+ const speechConfig = {
849
+ voiceConfig: {
850
+ prebuiltVoiceConfig: {
851
+ voiceName: voiceName,
852
+ },
853
+ },
854
+ };
855
+ const config = {
856
+ responseModalities: [genai_1.Modality.AUDIO],
857
+ speechConfig: speechConfig,
858
+ };
859
+ let say_prefix = '';
860
+ let say_postfix = '';
861
+ if (opts?.speed && opts.speed !== 1.0) {
862
+ const speedDescription = opts.speed < 1.0
863
+ ? `slowly at ${Math.round(opts.speed * 100)}% speed`
864
+ : `quickly at ${Math.round(opts.speed * 100)}% speed`;
865
+ say_postfix = speedDescription;
866
+ }
867
+ if (opts?.affect) {
868
+ say_prefix = `Sound ${opts.affect}`;
869
+ }
870
+ if (say_postfix || say_prefix) {
871
+ if (say_postfix && say_prefix) {
872
+ text = `${say_prefix} and say ${say_postfix}:\n${text}`;
873
+ }
874
+ else if (say_postfix) {
875
+ text = `Say ${say_postfix}:\n${text}`;
876
+ }
877
+ else if (say_prefix) {
878
+ text = `${say_prefix} and say:\n${text}`;
879
+ }
880
+ }
881
+ console.log(`[Gemini] Starting generateContentStream call...`);
882
+ const streamPromise = this.client.models.generateContentStream({
883
+ model,
884
+ contents: [{ role: 'user', parts: [{ text }] }],
885
+ config,
886
+ });
887
+ const textLength = text.length;
888
+ index_js_1.costTracker.addUsage({
889
+ model,
890
+ input_tokens: Math.ceil(textLength / 4),
891
+ output_tokens: 0,
892
+ metadata: {
893
+ voice: voiceName,
894
+ text_length: textLength,
895
+ type: 'voice_generation',
896
+ },
897
+ });
898
+ if (opts?.stream) {
899
+ const stream = await streamPromise;
900
+ const chunks = [];
901
+ for await (const chunk of stream) {
902
+ if (chunk.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data) {
903
+ const part = chunk.candidates[0].content.parts[0];
904
+ const binaryString = atob(part.inlineData.data);
905
+ const bytes = new Uint8Array(binaryString.length);
906
+ for (let i = 0; i < binaryString.length; i++) {
907
+ bytes[i] = binaryString.charCodeAt(i);
908
+ }
909
+ chunks.push(bytes);
910
+ if (part.inlineData.mimeType) {
911
+ console.log(`[Gemini] Audio format: ${part.inlineData.mimeType}`);
912
+ }
913
+ }
914
+ }
915
+ const totalLength = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
916
+ const combined = new Uint8Array(totalLength);
917
+ let offset = 0;
918
+ for (const chunk of chunks) {
919
+ combined.set(chunk, offset);
920
+ offset += chunk.length;
921
+ }
922
+ return new ReadableStream({
923
+ start(controller) {
924
+ controller.enqueue(combined);
925
+ controller.close();
926
+ },
927
+ });
928
+ }
929
+ let allData = new Uint8Array(0);
930
+ const stream = await streamPromise;
931
+ for await (const chunk of stream) {
932
+ if (!chunk.candidates || !chunk.candidates[0]?.content?.parts) {
933
+ continue;
934
+ }
935
+ const part = chunk.candidates[0].content.parts[0];
936
+ if (part?.inlineData?.data) {
937
+ const binaryString = atob(part.inlineData.data);
938
+ const bytes = new Uint8Array(binaryString.length);
939
+ for (let i = 0; i < binaryString.length; i++) {
940
+ bytes[i] = binaryString.charCodeAt(i);
941
+ }
942
+ const newData = new Uint8Array(allData.length + bytes.length);
943
+ newData.set(allData);
944
+ newData.set(bytes, allData.length);
945
+ allData = newData;
946
+ }
947
+ }
948
+ if (allData.length === 0) {
949
+ throw new Error('No audio data generated from Gemini TTS');
950
+ }
951
+ return allData.buffer;
952
+ }
953
+ catch (error) {
954
+ console.error('[Gemini] Error generating voice:', error);
955
+ throw error;
956
+ }
957
+ }
958
+ mapVoiceToGemini(voice) {
959
+ const geminiVoices = [
960
+ 'Kore',
961
+ 'Puck',
962
+ 'Charon',
963
+ 'Fenrir',
964
+ 'Aoede',
965
+ 'Glados',
966
+ ];
967
+ if (!voice) {
968
+ return 'Kore';
969
+ }
970
+ if (geminiVoices.includes(voice)) {
971
+ return voice;
972
+ }
973
+ const voiceMap = {
974
+ alloy: 'Kore',
975
+ echo: 'Puck',
976
+ fable: 'Charon',
977
+ onyx: 'Fenrir',
978
+ nova: 'Aoede',
979
+ shimmer: 'Glados',
980
+ male: 'Puck',
981
+ female: 'Kore',
982
+ neutral: 'Charon',
983
+ young: 'Aoede',
984
+ mature: 'Fenrir',
985
+ robotic: 'Glados',
986
+ kore: 'Kore',
987
+ puck: 'Puck',
988
+ charon: 'Charon',
989
+ fenrir: 'Fenrir',
990
+ aoede: 'Aoede',
991
+ glados: 'Glados',
992
+ };
993
+ const mappedVoice = voiceMap[voice.toLowerCase()];
994
+ if (mappedVoice) {
995
+ return mappedVoice;
996
+ }
997
+ console.warn(`[Gemini] Unknown voice '${voice}', using default voice 'Kore'`);
998
+ return 'Kore';
999
+ }
1000
+ async *createTranscription(audio, agent, model, opts) {
1001
+ let session = null;
1002
+ let isConnected = false;
1003
+ try {
1004
+ const ai = new genai_1.GoogleGenAI({
1005
+ apiKey: this.apiKey,
1006
+ httpOptions: { apiVersion: 'v1alpha' },
1007
+ });
1008
+ const realtimeInputConfig = opts?.realtimeInputConfig || {
1009
+ automaticActivityDetection: {
1010
+ disabled: false,
1011
+ startOfSpeechSensitivity: 'START_SENSITIVITY_HIGH',
1012
+ endOfSpeechSensitivity: 'END_SENSITIVITY_LOW',
1013
+ },
1014
+ };
1015
+ const speechConfig = opts?.speechConfig || {
1016
+ languageCode: 'en-US',
1017
+ };
1018
+ const systemInstruction = agent.instructions || `You should reply only "OK" to every single message from the user. Nothing else.`;
1019
+ console.log('[Gemini] Connecting to Live API for transcription...');
1020
+ const connectionPromise = new Promise((resolve, reject) => {
1021
+ const timeout = setTimeout(() => {
1022
+ reject(new Error('Connection timeout'));
1023
+ }, 10000);
1024
+ const config = {
1025
+ responseModalities: [genai_1.Modality.TEXT],
1026
+ mediaResolution: genai_1.MediaResolution.MEDIA_RESOLUTION_MEDIUM,
1027
+ speechConfig,
1028
+ realtimeInputConfig,
1029
+ systemInstruction: {
1030
+ parts: [{ text: systemInstruction }],
1031
+ },
1032
+ inputAudioTranscription: {},
1033
+ };
1034
+ ai.live
1035
+ .connect({
1036
+ model: model,
1037
+ config,
1038
+ callbacks: {
1039
+ onopen: () => {
1040
+ clearTimeout(timeout);
1041
+ console.log('[Gemini] Live session connected');
1042
+ isConnected = true;
1043
+ resolve();
1044
+ },
1045
+ onmessage: async (msg) => {
1046
+ if (msg.serverContent?.inputTranscription?.text) {
1047
+ const previewEvent = {
1048
+ type: 'transcription_turn_delta',
1049
+ timestamp: new Date().toISOString(),
1050
+ delta: msg.serverContent.inputTranscription.text,
1051
+ };
1052
+ transcriptEvents.push(previewEvent);
1053
+ }
1054
+ if (msg.serverContent?.turnComplete) {
1055
+ const turnEvent = {
1056
+ type: 'transcription_turn_complete',
1057
+ timestamp: new Date().toISOString(),
1058
+ };
1059
+ transcriptEvents.push(turnEvent);
1060
+ }
1061
+ if (msg.usageMetadata) {
1062
+ if (msg.usageMetadata.promptTokensDetails &&
1063
+ Array.isArray(msg.usageMetadata.promptTokensDetails)) {
1064
+ for (const detail of msg.usageMetadata.promptTokensDetails) {
1065
+ if (detail.modality && detail.tokenCount > 0) {
1066
+ index_js_1.costTracker.addUsage({
1067
+ model: model,
1068
+ input_tokens: detail.tokenCount,
1069
+ output_tokens: 0,
1070
+ input_modality: detail.modality.toLowerCase(),
1071
+ metadata: {
1072
+ totalTokens: msg.usageMetadata.totalTokenCount || 0,
1073
+ source: 'gemini-live-transcription',
1074
+ modalityType: 'input',
1075
+ originalModality: detail.modality,
1076
+ },
1077
+ });
1078
+ }
1079
+ }
1080
+ }
1081
+ if (msg.usageMetadata.responseTokensDetails &&
1082
+ Array.isArray(msg.usageMetadata.responseTokensDetails)) {
1083
+ for (const detail of msg.usageMetadata.responseTokensDetails) {
1084
+ if (detail.modality && detail.tokenCount > 0) {
1085
+ index_js_1.costTracker.addUsage({
1086
+ model: model,
1087
+ input_tokens: 0,
1088
+ output_tokens: detail.tokenCount,
1089
+ output_modality: detail.modality.toLowerCase(),
1090
+ metadata: {
1091
+ totalTokens: msg.usageMetadata.totalTokenCount || 0,
1092
+ source: 'gemini-live-transcription',
1093
+ modalityType: 'output',
1094
+ originalModality: detail.modality,
1095
+ },
1096
+ });
1097
+ }
1098
+ }
1099
+ }
1100
+ if ((!msg.usageMetadata.promptTokensDetails ||
1101
+ msg.usageMetadata.promptTokensDetails.length === 0) &&
1102
+ (!msg.usageMetadata.responseTokensDetails ||
1103
+ msg.usageMetadata.responseTokensDetails.length === 0)) {
1104
+ index_js_1.costTracker.addUsage({
1105
+ model: model,
1106
+ input_tokens: msg.usageMetadata.promptTokenCount || 0,
1107
+ output_tokens: msg.usageMetadata.responseTokenCount || 0,
1108
+ input_modality: 'audio',
1109
+ output_modality: 'text',
1110
+ metadata: {
1111
+ totalTokens: msg.usageMetadata.totalTokenCount || 0,
1112
+ source: 'gemini-live-transcription',
1113
+ },
1114
+ });
1115
+ }
1116
+ }
1117
+ },
1118
+ onerror: (err) => {
1119
+ console.error('[Gemini] Live API error:', {
1120
+ code: err.code,
1121
+ reason: err.reason,
1122
+ wasClean: err.wasClean,
1123
+ });
1124
+ connectionError = err;
1125
+ },
1126
+ onclose: (event) => {
1127
+ console.log('[Gemini] Live session closed');
1128
+ if (event) {
1129
+ console.log('[Gemini] Close event details:', {
1130
+ code: event.code,
1131
+ reason: event.reason,
1132
+ wasClean: event.wasClean,
1133
+ });
1134
+ }
1135
+ isConnected = false;
1136
+ },
1137
+ },
1138
+ })
1139
+ .then(async (s) => {
1140
+ session = s;
1141
+ });
1142
+ });
1143
+ const transcriptEvents = [];
1144
+ let connectionError = null;
1145
+ await connectionPromise;
1146
+ const audioStream = normalizeAudioSource(audio);
1147
+ const reader = audioStream.getReader();
1148
+ const sendAudioChunk = async (chunk) => {
1149
+ try {
1150
+ const base64Data = chunk.toString('base64');
1151
+ await session.sendRealtimeInput({
1152
+ media: {
1153
+ mimeType: 'audio/pcm;rate=16000',
1154
+ data: base64Data,
1155
+ },
1156
+ });
1157
+ }
1158
+ catch (err) {
1159
+ console.error('[Gemini] Error sending audio chunk:', err);
1160
+ connectionError = err;
1161
+ throw err;
1162
+ }
1163
+ };
1164
+ try {
1165
+ while (true) {
1166
+ const { done, value } = await reader.read();
1167
+ if (done)
1168
+ break;
1169
+ if (value && session && isConnected) {
1170
+ const chunk = value instanceof Buffer ? value : Buffer.from(value);
1171
+ await sendAudioChunk(chunk);
1172
+ }
1173
+ if (transcriptEvents.length > 0) {
1174
+ const events = transcriptEvents.splice(0, transcriptEvents.length);
1175
+ for (const event of events) {
1176
+ yield event;
1177
+ }
1178
+ }
1179
+ if (connectionError) {
1180
+ throw connectionError;
1181
+ }
1182
+ }
1183
+ await new Promise(resolve => setTimeout(resolve, 1000));
1184
+ if (transcriptEvents.length > 0) {
1185
+ const events = transcriptEvents.splice(0, transcriptEvents.length);
1186
+ for (const event of events) {
1187
+ yield event;
1188
+ }
1189
+ }
1190
+ }
1191
+ finally {
1192
+ reader.releaseLock();
1193
+ if (session) {
1194
+ session.close();
1195
+ }
1196
+ }
1197
+ }
1198
+ catch (error) {
1199
+ console.error('[Gemini] Transcription error:', error);
1200
+ const errorEvent = {
1201
+ type: 'error',
1202
+ timestamp: new Date().toISOString(),
1203
+ error: error instanceof Error ? error.message : 'Transcription failed',
1204
+ };
1205
+ yield errorEvent;
1206
+ }
1207
+ }
1208
+ async createLiveSession(config, agent, model, opts) {
1209
+ console.log(`[Gemini] Creating Live session with model ${model}`);
1210
+ const liveModels = [
1211
+ 'gemini-2.0-flash-live-001',
1212
+ 'gemini-live-2.5-flash-preview',
1213
+ 'gemini-2.5-flash-preview-native-audio-dialog',
1214
+ 'gemini-2.5-flash-exp-native-audio-thinking-dialog',
1215
+ 'gemini-2.0-flash-exp',
1216
+ ];
1217
+ if (!liveModels.some(m => model.includes(m))) {
1218
+ throw new Error(`Model ${model} does not support Live API. Supported models: ${liveModels.join(', ')}`);
1219
+ }
1220
+ const sessionId = (0, uuid_1.v4)();
1221
+ const liveSession = new GeminiLiveSession(sessionId, this.client, model, config, agent, opts);
1222
+ await liveSession.initialize();
1223
+ return liveSession;
1224
+ }
1225
+ }
1226
+ exports.GeminiProvider = GeminiProvider;
1227
+ function normalizeAudioSource(source) {
1228
+ if (source instanceof ReadableStream) {
1229
+ return source;
1230
+ }
1231
+ if (typeof source === 'object' && source !== null && Symbol.asyncIterator in source) {
1232
+ return new ReadableStream({
1233
+ async start(controller) {
1234
+ try {
1235
+ for await (const chunk of source) {
1236
+ controller.enqueue(chunk);
1237
+ }
1238
+ controller.close();
1239
+ }
1240
+ catch (error) {
1241
+ controller.error(error);
1242
+ }
1243
+ },
1244
+ });
1245
+ }
1246
+ if (typeof source === 'function') {
1247
+ const iterable = source();
1248
+ return normalizeAudioSource(iterable);
1249
+ }
1250
+ if (source instanceof ArrayBuffer || source instanceof Uint8Array) {
1251
+ const data = source instanceof ArrayBuffer ? new Uint8Array(source) : source;
1252
+ return new ReadableStream({
1253
+ start(controller) {
1254
+ controller.enqueue(data);
1255
+ controller.close();
1256
+ },
1257
+ });
1258
+ }
1259
+ throw new Error(`Unsupported audio source type: ${typeof source}`);
1260
+ }
1261
+ class GeminiLiveSession {
1262
+ sessionId;
1263
+ ai;
1264
+ model;
1265
+ config;
1266
+ agent;
1267
+ options;
1268
+ session = null;
1269
+ eventQueue = [];
1270
+ eventResolvers = [];
1271
+ _isActive = true;
1272
+ sessionClosed = false;
1273
+ messageHistory = [];
1274
+ currentTurn = null;
1275
+ constructor(sessionId, ai, model, config, agent, options) {
1276
+ this.sessionId = sessionId;
1277
+ this.ai = ai;
1278
+ this.model = model;
1279
+ this.config = config;
1280
+ this.agent = agent;
1281
+ this.options = options;
1282
+ }
1283
+ async initialize() {
1284
+ const connectionPromise = new Promise((resolve, reject) => {
1285
+ const timeout = setTimeout(() => {
1286
+ reject(new Error('Connection timeout'));
1287
+ }, 30000);
1288
+ const tools = [];
1289
+ if (this.config.tools) {
1290
+ for (const toolGroup of this.config.tools) {
1291
+ if (toolGroup.functionDeclarations) {
1292
+ const functionDeclarations = toolGroup.functionDeclarations.map(func => ({
1293
+ name: func.name,
1294
+ description: func.description,
1295
+ parameters: convertParameterToGeminiFormat(func.parameters),
1296
+ }));
1297
+ tools.push({ functionDeclarations });
1298
+ }
1299
+ if (toolGroup.codeExecution) {
1300
+ tools.push({ codeExecution: {} });
1301
+ }
1302
+ if (toolGroup.googleSearch) {
1303
+ tools.push({ googleSearch: {} });
1304
+ }
1305
+ }
1306
+ }
1307
+ let systemInstruction = undefined;
1308
+ if (this.agent.instructions) {
1309
+ systemInstruction = {
1310
+ parts: [{ text: this.agent.instructions }],
1311
+ };
1312
+ }
1313
+ const responseModalities = this.config.responseModalities[0] === 'AUDIO' ? [genai_1.Modality.AUDIO] : [genai_1.Modality.TEXT];
1314
+ const config = {
1315
+ responseModalities,
1316
+ systemInstruction,
1317
+ tools: tools.length > 0 ? tools : undefined,
1318
+ };
1319
+ if (this.config.responseModalities[0] === 'AUDIO' && this.config.speechConfig) {
1320
+ config.speechConfig = {
1321
+ voiceConfig: this.config.speechConfig.voiceConfig,
1322
+ };
1323
+ }
1324
+ if (this.config.realtimeInputConfig) {
1325
+ config.realtimeInputConfig = {
1326
+ automaticActivityDetection: this.config.realtimeInputConfig.automaticActivityDetection
1327
+ ? {
1328
+ disabled: this.config.realtimeInputConfig.automaticActivityDetection.disabled,
1329
+ }
1330
+ : undefined,
1331
+ };
1332
+ }
1333
+ if (this.config.inputAudioTranscription) {
1334
+ config.inputAudioTranscription = true;
1335
+ }
1336
+ if (this.config.outputAudioTranscription) {
1337
+ config.outputAudioTranscription = true;
1338
+ }
1339
+ if (this.config.enableAffectiveDialog) {
1340
+ config.enableAffectiveDialog = true;
1341
+ }
1342
+ if (this.config.proactivity) {
1343
+ config.proactivity = this.config.proactivity;
1344
+ }
1345
+ console.log('[Gemini] Connecting with config:', JSON.stringify(config, null, 2));
1346
+ this.ai.live
1347
+ .connect({
1348
+ model: this.model,
1349
+ config,
1350
+ callbacks: {
1351
+ onopen: () => {
1352
+ clearTimeout(timeout);
1353
+ console.log('[Gemini] Live session connected');
1354
+ this.pushEvent({
1355
+ type: 'live_ready',
1356
+ timestamp: new Date().toISOString(),
1357
+ });
1358
+ resolve();
1359
+ },
1360
+ onmessage: (msg) => {
1361
+ this.handleMessage(msg);
1362
+ },
1363
+ onerror: (err) => {
1364
+ console.error('[Gemini] Live API error:', err);
1365
+ console.error('[Gemini] Error details:', JSON.stringify(err, null, 2));
1366
+ this.pushEvent({
1367
+ type: 'error',
1368
+ timestamp: new Date().toISOString(),
1369
+ error: err.message || String(err),
1370
+ code: err.code,
1371
+ recoverable: true,
1372
+ });
1373
+ },
1374
+ onclose: (event) => {
1375
+ console.log('[Gemini] Live session closed', event);
1376
+ if (event) {
1377
+ console.log('[Gemini] Close event details:', {
1378
+ code: event.code,
1379
+ reason: event.reason,
1380
+ wasClean: event.wasClean,
1381
+ });
1382
+ }
1383
+ this._isActive = false;
1384
+ this.sessionClosed = true;
1385
+ this.resolveAllWaitingEvents();
1386
+ },
1387
+ },
1388
+ })
1389
+ .then(s => {
1390
+ this.session = s;
1391
+ });
1392
+ });
1393
+ await connectionPromise;
1394
+ }
1395
+ handleMessage(msg) {
1396
+ console.log('[Gemini] Received message:', JSON.stringify(msg, null, 2));
1397
+ if (msg.error) {
1398
+ console.error('[Gemini] Error in message:', msg.error);
1399
+ this.pushEvent({
1400
+ type: 'error',
1401
+ timestamp: new Date().toISOString(),
1402
+ error: msg.error.message || JSON.stringify(msg.error),
1403
+ code: msg.error.code || 'UNKNOWN_ERROR',
1404
+ recoverable: false,
1405
+ });
1406
+ return;
1407
+ }
1408
+ if (msg.serverContent?.modelTurn?.parts) {
1409
+ for (const part of msg.serverContent.modelTurn.parts) {
1410
+ if (part.inlineData?.mimeType?.startsWith('audio/')) {
1411
+ this.pushEvent({
1412
+ type: 'audio_output',
1413
+ timestamp: new Date().toISOString(),
1414
+ data: part.inlineData.data,
1415
+ format: {
1416
+ sampleRate: 24000,
1417
+ channels: 1,
1418
+ encoding: 'pcm',
1419
+ },
1420
+ });
1421
+ }
1422
+ if (part.text) {
1423
+ if (!this.currentTurn || this.currentTurn.role !== 'model') {
1424
+ this.currentTurn = { role: 'model', text: '' };
1425
+ this.pushEvent({
1426
+ type: 'turn_start',
1427
+ timestamp: new Date().toISOString(),
1428
+ role: 'model',
1429
+ });
1430
+ }
1431
+ this.currentTurn.text += part.text;
1432
+ this.pushEvent({
1433
+ type: 'text_delta',
1434
+ timestamp: new Date().toISOString(),
1435
+ delta: part.text,
1436
+ });
1437
+ this.pushEvent({
1438
+ type: 'message_delta',
1439
+ timestamp: new Date().toISOString(),
1440
+ delta: part.text,
1441
+ });
1442
+ }
1443
+ }
1444
+ }
1445
+ if (msg.serverContent?.modelTurn?.parts) {
1446
+ for (const part of msg.serverContent.modelTurn.parts) {
1447
+ if (part.functionCall) {
1448
+ const toolCall = {
1449
+ id: (0, uuid_1.v4)(),
1450
+ type: 'function',
1451
+ function: {
1452
+ name: part.functionCall.name,
1453
+ arguments: JSON.stringify(part.functionCall.args),
1454
+ },
1455
+ };
1456
+ this.pushEvent({
1457
+ type: 'tool_call',
1458
+ timestamp: new Date().toISOString(),
1459
+ toolCalls: [toolCall],
1460
+ });
1461
+ }
1462
+ }
1463
+ }
1464
+ if (msg.serverContent?.inputAudioTranscription) {
1465
+ const text = msg.serverContent.inputAudioTranscription.text ||
1466
+ msg.serverContent.inputAudioTranscription.transcript ||
1467
+ '';
1468
+ if (text) {
1469
+ this.pushEvent({
1470
+ type: 'transcription_input',
1471
+ timestamp: new Date().toISOString(),
1472
+ text,
1473
+ });
1474
+ }
1475
+ }
1476
+ if (msg.serverContent?.outputTranscription) {
1477
+ const text = msg.serverContent.outputTranscription.text || '';
1478
+ if (text) {
1479
+ this.pushEvent({
1480
+ type: 'transcription_output',
1481
+ timestamp: new Date().toISOString(),
1482
+ text,
1483
+ });
1484
+ }
1485
+ }
1486
+ if (msg.serverContent?.turnComplete) {
1487
+ if (this.currentTurn) {
1488
+ const message = this.currentTurn.role === 'model'
1489
+ ? {
1490
+ type: 'message',
1491
+ role: 'assistant',
1492
+ content: this.currentTurn.text,
1493
+ status: 'completed',
1494
+ }
1495
+ : {
1496
+ type: 'message',
1497
+ role: 'user',
1498
+ content: this.currentTurn.text,
1499
+ };
1500
+ this.messageHistory.push(message);
1501
+ this.pushEvent({
1502
+ type: 'turn_complete',
1503
+ timestamp: new Date().toISOString(),
1504
+ role: this.currentTurn.role,
1505
+ message,
1506
+ });
1507
+ this.currentTurn = null;
1508
+ }
1509
+ }
1510
+ if (msg.serverContent?.interrupted) {
1511
+ const cancelledToolCalls = [];
1512
+ if (msg.serverContent.cancelledFunctionCalls) {
1513
+ cancelledToolCalls.push(...msg.serverContent.cancelledFunctionCalls.map((fc) => fc.id));
1514
+ }
1515
+ this.pushEvent({
1516
+ type: 'interrupted',
1517
+ timestamp: new Date().toISOString(),
1518
+ cancelledToolCalls,
1519
+ });
1520
+ }
1521
+ if (msg.usageMetadata) {
1522
+ const usage = msg.usageMetadata;
1523
+ const inputTokens = usage.promptTokenCount || 0;
1524
+ const outputTokens = usage.candidatesTokenCount || 0;
1525
+ const totalTokens = usage.totalTokenCount || 0;
1526
+ index_js_1.costTracker.addUsage({
1527
+ model: this.model,
1528
+ input_tokens: inputTokens,
1529
+ output_tokens: outputTokens,
1530
+ cached_tokens: usage.cachedContentTokenCount || 0,
1531
+ metadata: {
1532
+ total_tokens: totalTokens,
1533
+ source: 'gemini-live',
1534
+ },
1535
+ });
1536
+ const inputCost = undefined;
1537
+ const outputCost = undefined;
1538
+ const totalCost = undefined;
1539
+ this.pushEvent({
1540
+ type: 'cost_update',
1541
+ timestamp: new Date().toISOString(),
1542
+ usage: {
1543
+ inputTokens,
1544
+ outputTokens,
1545
+ totalTokens,
1546
+ inputCost,
1547
+ outputCost,
1548
+ totalCost,
1549
+ },
1550
+ });
1551
+ }
1552
+ }
1553
+ async sendAudio(audio) {
1554
+ if (!this.session || !this._isActive) {
1555
+ console.error(`[GeminiLiveSession ${this.sessionId}] Cannot send audio - session not active`);
1556
+ throw new Error('Session is not active');
1557
+ }
1558
+ console.log(`[GeminiLiveSession ${this.sessionId}] Sending audio: ${audio.data.length} chars (base64), mimeType: ${audio.mimeType}`);
1559
+ try {
1560
+ await this.session.sendRealtimeInput({
1561
+ media: {
1562
+ mimeType: audio.mimeType,
1563
+ data: audio.data,
1564
+ },
1565
+ });
1566
+ console.log(`[GeminiLiveSession ${this.sessionId}] Audio sent successfully`);
1567
+ }
1568
+ catch (error) {
1569
+ console.error(`[GeminiLiveSession ${this.sessionId}] Error sending audio:`, error);
1570
+ throw error;
1571
+ }
1572
+ const size = Math.ceil((audio.data.length * 3) / 4);
1573
+ this.pushEvent({
1574
+ type: 'audio_input',
1575
+ timestamp: new Date().toISOString(),
1576
+ size,
1577
+ });
1578
+ }
1579
+ async sendText(text, role = 'user') {
1580
+ if (!this.session || !this._isActive) {
1581
+ throw new Error('Session is not active');
1582
+ }
1583
+ const message = {
1584
+ role: role === 'assistant' ? 'model' : 'user',
1585
+ parts: [{ text }],
1586
+ };
1587
+ await this.session.sendClientContent({
1588
+ turns: [message],
1589
+ });
1590
+ this.pushEvent({
1591
+ type: 'turn_start',
1592
+ timestamp: new Date().toISOString(),
1593
+ role: role === 'assistant' ? 'model' : 'user',
1594
+ });
1595
+ }
1596
+ async sendToolResponse(toolResults) {
1597
+ if (!this.session || !this._isActive) {
1598
+ throw new Error('Session is not active');
1599
+ }
1600
+ const functionResponses = toolResults.map(result => ({
1601
+ id: result.call_id || result.id,
1602
+ name: result.toolCall.function.name,
1603
+ response: result.error ? { error: result.error } : { result: result.output },
1604
+ }));
1605
+ await this.session.sendToolResponse({ functionResponses });
1606
+ }
1607
+ async *getEventStream() {
1608
+ while (this._isActive || this.eventQueue.length > 0) {
1609
+ if (this.eventQueue.length > 0) {
1610
+ yield this.eventQueue.shift();
1611
+ }
1612
+ else {
1613
+ const result = await new Promise(resolve => {
1614
+ if (this.sessionClosed && this.eventQueue.length === 0) {
1615
+ resolve({ done: true, value: undefined });
1616
+ }
1617
+ else {
1618
+ this.eventResolvers.push(resolve);
1619
+ }
1620
+ });
1621
+ if (result.done)
1622
+ break;
1623
+ if (result.value)
1624
+ yield result.value;
1625
+ }
1626
+ }
1627
+ }
1628
+ async close() {
1629
+ if (this.session && this._isActive) {
1630
+ this._isActive = false;
1631
+ await this.session.close();
1632
+ }
1633
+ }
1634
+ isActive() {
1635
+ return this._isActive;
1636
+ }
1637
+ pushEvent(event) {
1638
+ if (this.eventResolvers.length > 0) {
1639
+ const resolver = this.eventResolvers.shift();
1640
+ resolver({ value: event, done: false });
1641
+ }
1642
+ else {
1643
+ this.eventQueue.push(event);
1644
+ }
1645
+ }
1646
+ resolveAllWaitingEvents() {
1647
+ for (const resolver of this.eventResolvers) {
1648
+ resolver({ done: true, value: undefined });
1649
+ }
1650
+ this.eventResolvers = [];
1651
+ }
1652
+ }
1653
+ exports.geminiProvider = new GeminiProvider();
1654
+ //# sourceMappingURL=gemini.js.map