@robota-sdk/agent-provider 3.0.0-beta.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/browser/index.d.ts +1104 -0
- package/dist/browser/index.d.ts.map +1 -0
- package/dist/browser/index.js +7 -0
- package/dist/browser/index.js.map +1 -0
- package/dist/loggers/index.cjs +1 -0
- package/dist/loggers/index.d.ts +151 -0
- package/dist/loggers/index.d.ts.map +1 -0
- package/dist/loggers/index.js +2 -0
- package/dist/loggers/index.js.map +1 -0
- package/dist/node/anthropic/index.cjs +1 -0
- package/dist/node/anthropic/index.d.ts +158 -0
- package/dist/node/anthropic/index.d.ts.map +1 -0
- package/dist/node/anthropic/index.js +1 -0
- package/dist/node/anthropic--1vgLC-e.js +5 -0
- package/dist/node/anthropic--1vgLC-e.js.map +1 -0
- package/dist/node/anthropic-BFQ6DSCP.cjs +4 -0
- package/dist/node/bytedance/index.cjs +1 -0
- package/dist/node/bytedance/index.d.ts +74 -0
- package/dist/node/bytedance/index.d.ts.map +1 -0
- package/dist/node/bytedance/index.js +1 -0
- package/dist/node/bytedance-C_0sF_pJ.js +2 -0
- package/dist/node/bytedance-C_0sF_pJ.js.map +1 -0
- package/dist/node/bytedance-DVPxqEiC.cjs +1 -0
- package/dist/node/chunk-Bmb41Sf3.cjs +1 -0
- package/dist/node/deepseek/index.cjs +1 -0
- package/dist/node/deepseek/index.d.ts +2 -0
- package/dist/node/deepseek/index.js +1 -0
- package/dist/node/deepseek-_8Ixx7rA.js +2 -0
- package/dist/node/deepseek-_8Ixx7rA.js.map +1 -0
- package/dist/node/deepseek-oA2Y6bD0.cjs +1 -0
- package/dist/node/gemini/index.cjs +1 -0
- package/dist/node/gemini/index.d.ts +173 -0
- package/dist/node/gemini/index.d.ts.map +1 -0
- package/dist/node/gemini/index.js +1 -0
- package/dist/node/gemini-Bh2U87MY.js +4 -0
- package/dist/node/gemini-Bh2U87MY.js.map +1 -0
- package/dist/node/gemini-DSaNCxZj.cjs +3 -0
- package/dist/node/gemma/index.cjs +1 -0
- package/dist/node/gemma/index.d.ts +2 -0
- package/dist/node/gemma/index.js +1 -0
- package/dist/node/gemma-Dp_AfCUR.js +2 -0
- package/dist/node/gemma-Dp_AfCUR.js.map +1 -0
- package/dist/node/gemma-G-Pf_PnX.cjs +1 -0
- package/dist/node/google/index.cjs +1 -0
- package/dist/node/google/index.d.ts +14 -0
- package/dist/node/google/index.d.ts.map +1 -0
- package/dist/node/google/index.js +2 -0
- package/dist/node/google/index.js.map +1 -0
- package/dist/node/index-B6PnlDMd.d.ts +82 -0
- package/dist/node/index-B6PnlDMd.d.ts.map +1 -0
- package/dist/node/index-B7UvPJcI.d.ts +315 -0
- package/dist/node/index-B7UvPJcI.d.ts.map +1 -0
- package/dist/node/index-BLPOTNb5.d.ts +98 -0
- package/dist/node/index-BLPOTNb5.d.ts.map +1 -0
- package/dist/node/index-BqixM_XD.d.ts +231 -0
- package/dist/node/index-BqixM_XD.d.ts.map +1 -0
- package/dist/node/index-C3beaqKO.d.ts +231 -0
- package/dist/node/index-C3beaqKO.d.ts.map +1 -0
- package/dist/node/index-Cp2XRh9G.d.ts +82 -0
- package/dist/node/index-Cp2XRh9G.d.ts.map +1 -0
- package/dist/node/index-DSv5xruI.d.ts +98 -0
- package/dist/node/index-DSv5xruI.d.ts.map +1 -0
- package/dist/node/index-w0bV1uaP.d.ts +315 -0
- package/dist/node/index-w0bV1uaP.d.ts.map +1 -0
- package/dist/node/index.cjs +1 -0
- package/dist/node/index.d.ts +8 -0
- package/dist/node/index.js +1 -0
- package/dist/node/openai/index.cjs +1 -0
- package/dist/node/openai/index.d.ts +2 -0
- package/dist/node/openai/index.js +1 -0
- package/dist/node/openai-CRQjg4xF.js +2 -0
- package/dist/node/openai-CRQjg4xF.js.map +1 -0
- package/dist/node/openai-compatible-BYfyY5lb.cjs +1 -0
- package/dist/node/openai-compatible-Dm4Sof9e.js +2 -0
- package/dist/node/openai-compatible-Dm4Sof9e.js.map +1 -0
- package/dist/node/openai-xWC6pY7r.cjs +1 -0
- package/dist/node/qwen/index.cjs +1 -0
- package/dist/node/qwen/index.d.ts +2 -0
- package/dist/node/qwen/index.js +1 -0
- package/dist/node/qwen-ChUZobTL.js +2 -0
- package/dist/node/qwen-ChUZobTL.js.map +1 -0
- package/dist/node/qwen-CjT71vSM.cjs +1 -0
- package/package.json +157 -0
- package/src/anthropic/__tests__/abort-streaming.test.ts +199 -0
- package/src/anthropic/__tests__/model-catalog-refresh.test.ts +92 -0
- package/src/anthropic/__tests__/provider-definition.test.ts +55 -0
- package/src/anthropic/__tests__/provider.test.ts +1357 -0
- package/src/anthropic/__tests__/response-parser.test.ts +326 -0
- package/src/anthropic/index.ts +22 -0
- package/src/anthropic/message-converter.ts +181 -0
- package/src/anthropic/model-catalog-refresh.ts +128 -0
- package/src/anthropic/parsers/response-parser.ts +184 -0
- package/src/anthropic/provider-definition.ts +93 -0
- package/src/anthropic/provider.ts +290 -0
- package/src/anthropic/streaming-handler.ts +204 -0
- package/src/anthropic/types/api-types.ts +158 -0
- package/src/anthropic/types.ts +79 -0
- package/src/bytedance/http-client.test.ts +288 -0
- package/src/bytedance/http-client.ts +163 -0
- package/src/bytedance/index.ts +2 -0
- package/src/bytedance/provider.spec.ts +320 -0
- package/src/bytedance/provider.ts +171 -0
- package/src/bytedance/status-mapper.test.ts +299 -0
- package/src/bytedance/status-mapper.ts +141 -0
- package/src/bytedance/types.ts +68 -0
- package/src/deepseek/defaults.ts +4 -0
- package/src/deepseek/index.ts +22 -0
- package/src/deepseek/model-catalog-refresh.test.ts +57 -0
- package/src/deepseek/model-catalog-refresh.ts +105 -0
- package/src/deepseek/model-catalog.ts +55 -0
- package/src/deepseek/provider-definition.test.ts +109 -0
- package/src/deepseek/provider-definition.ts +132 -0
- package/src/deepseek/provider.test.ts +324 -0
- package/src/deepseek/provider.ts +298 -0
- package/src/deepseek/types.ts +37 -0
- package/src/gemini/execution-helpers.ts +233 -0
- package/src/gemini/genai-transport.test.ts +208 -0
- package/src/gemini/image-operations.test.ts +448 -0
- package/src/gemini/image-operations.ts +261 -0
- package/src/gemini/index.ts +11 -0
- package/src/gemini/message-converter.test.ts +616 -0
- package/src/gemini/message-converter.ts +140 -0
- package/src/gemini/model-catalog-refresh.test.ts +107 -0
- package/src/gemini/model-catalog-refresh.ts +92 -0
- package/src/gemini/provider-definition.test.ts +70 -0
- package/src/gemini/provider-definition.ts +78 -0
- package/src/gemini/provider-extended.test.ts +898 -0
- package/src/gemini/provider.spec.ts +216 -0
- package/src/gemini/provider.ts +279 -0
- package/src/gemini/request-converter.ts +226 -0
- package/src/gemini/tool-schema-converter.ts +78 -0
- package/src/gemini/types/api-types.ts +235 -0
- package/src/gemini/types.ts +121 -0
- package/src/gemma/index.ts +5 -0
- package/src/gemma/message-factory.ts +38 -0
- package/src/gemma/provider-definition.test.ts +43 -0
- package/src/gemma/provider-definition.ts +84 -0
- package/src/gemma/provider-projection.ts +49 -0
- package/src/gemma/provider.test.ts +628 -0
- package/src/gemma/provider.ts +308 -0
- package/src/gemma/pseudo-command-envelope.ts +58 -0
- package/src/gemma/pseudo-tool-call-projector.ts +243 -0
- package/src/gemma/pseudo-tool-call-tag-parser.ts +153 -0
- package/src/gemma/pseudo-tool-call-types.ts +31 -0
- package/src/gemma/reasoning-projector.test.ts +52 -0
- package/src/gemma/reasoning-projector.ts +144 -0
- package/src/gemma/streaming-projection.ts +79 -0
- package/src/gemma/tool-call-argument-parser.ts +126 -0
- package/src/gemma/tool-call-projector.test.ts +227 -0
- package/src/gemma/tool-call-projector.ts +264 -0
- package/src/gemma/types.ts +27 -0
- package/src/google/index.ts +11 -0
- package/src/google/provider-compat.test.ts +19 -0
- package/src/google/provider-definition.ts +6 -0
- package/src/google/provider.ts +10 -0
- package/src/google/types.ts +5 -0
- package/src/index.ts +9 -0
- package/src/openai/adapter.test.ts +494 -0
- package/src/openai/adapter.ts +145 -0
- package/src/openai/chat-completions-chat.ts +189 -0
- package/src/openai/executor-integration.test.ts +206 -0
- package/src/openai/index.ts +21 -0
- package/src/openai/interfaces/payload-logger.ts +48 -0
- package/src/openai/loggers/console-payload-logger.test.ts +173 -0
- package/src/openai/loggers/console-payload-logger.ts +94 -0
- package/src/openai/loggers/console.ts +9 -0
- package/src/openai/loggers/file-payload-logger.test.ts +238 -0
- package/src/openai/loggers/file-payload-logger.ts +112 -0
- package/src/openai/loggers/file.ts +9 -0
- package/src/openai/loggers/index.ts +12 -0
- package/src/openai/loggers/sanitize-openai-log-data.test.ts +89 -0
- package/src/openai/loggers/sanitize-openai-log-data.ts +14 -0
- package/src/openai/message-converter.ts +22 -0
- package/src/openai/model-catalog-refresh.test.ts +92 -0
- package/src/openai/model-catalog-refresh.ts +115 -0
- package/src/openai/openai-request-format.ts +92 -0
- package/src/openai/parsers/response-parser.test.ts +407 -0
- package/src/openai/parsers/response-parser.ts +47 -0
- package/src/openai/provider-definition.test.ts +75 -0
- package/src/openai/provider-definition.ts +132 -0
- package/src/openai/provider.test.ts +1402 -0
- package/src/openai/provider.ts +237 -0
- package/src/openai/responses-chat.ts +258 -0
- package/src/openai/responses-converter.ts +112 -0
- package/src/openai/responses-parser.ts +285 -0
- package/src/openai/responses-stream-utils.ts +45 -0
- package/src/openai/responses-types.ts +195 -0
- package/src/openai/streaming/stream-assembler.ts +3 -0
- package/src/openai/streaming/stream-handler.test.ts +367 -0
- package/src/openai/streaming/stream-handler.ts +119 -0
- package/src/openai/types/api-types.ts +112 -0
- package/src/openai/types.ts +194 -0
- package/src/qwen/defaults.ts +26 -0
- package/src/qwen/index.ts +5 -0
- package/src/qwen/model-catalog-refresh.test.ts +91 -0
- package/src/qwen/model-catalog-refresh.ts +97 -0
- package/src/qwen/provider-capabilities.ts +34 -0
- package/src/qwen/provider-definition.test.ts +139 -0
- package/src/qwen/provider-definition.ts +173 -0
- package/src/qwen/provider-streaming-assembly.ts +40 -0
- package/src/qwen/provider.test.ts +640 -0
- package/src/qwen/provider.ts +293 -0
- package/src/qwen/responses-chat.ts +194 -0
- package/src/qwen/responses-converter.ts +104 -0
- package/src/qwen/responses-parser.ts +299 -0
- package/src/qwen/responses-stream-utils.ts +38 -0
- package/src/qwen/types.ts +228 -0
- package/src/shared/openai-compatible/endpoint-probe.test.ts +52 -0
- package/src/shared/openai-compatible/endpoint-probe.ts +43 -0
- package/src/shared/openai-compatible/index.ts +6 -0
- package/src/shared/openai-compatible/message-converter.test.ts +111 -0
- package/src/shared/openai-compatible/message-converter.ts +84 -0
- package/src/shared/openai-compatible/native-payload-observer.test.ts +43 -0
- package/src/shared/openai-compatible/native-payload-observer.ts +26 -0
- package/src/shared/openai-compatible/response-parser.test.ts +172 -0
- package/src/shared/openai-compatible/response-parser.ts +180 -0
- package/src/shared/openai-compatible/stream-assembler.test.ts +266 -0
- package/src/shared/openai-compatible/stream-assembler.ts +248 -0
- package/src/shared/openai-compatible/types.ts +59 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
import { describe, expect, it, vi, beforeEach } from 'vitest';
|
|
2
|
+
import { GeminiProvider } from './provider';
|
|
3
|
+
import type { TUniversalMessage } from '@robota-sdk/agent-core';
|
|
4
|
+
|
|
5
|
+
interface IGenerateContentInput {
|
|
6
|
+
model: string;
|
|
7
|
+
contents: Array<{
|
|
8
|
+
role: 'user' | 'model';
|
|
9
|
+
parts: Array<{
|
|
10
|
+
text?: string;
|
|
11
|
+
inlineData?: {
|
|
12
|
+
mimeType: string;
|
|
13
|
+
data: string;
|
|
14
|
+
};
|
|
15
|
+
functionCall?: {
|
|
16
|
+
id?: string;
|
|
17
|
+
name: string;
|
|
18
|
+
args: Record<string, string | number | boolean | object>;
|
|
19
|
+
};
|
|
20
|
+
}>;
|
|
21
|
+
}>;
|
|
22
|
+
config?: {
|
|
23
|
+
temperature?: number;
|
|
24
|
+
maxOutputTokens?: number;
|
|
25
|
+
responseModalities?: Array<'TEXT' | 'IMAGE'>;
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const generateContentMock = vi.fn<
|
|
30
|
+
[IGenerateContentInput],
|
|
31
|
+
Promise<{
|
|
32
|
+
candidates: Array<{
|
|
33
|
+
content: {
|
|
34
|
+
parts: Array<{
|
|
35
|
+
text?: string;
|
|
36
|
+
inlineData?: {
|
|
37
|
+
mimeType: string;
|
|
38
|
+
data: string;
|
|
39
|
+
};
|
|
40
|
+
}>;
|
|
41
|
+
};
|
|
42
|
+
}>;
|
|
43
|
+
}>
|
|
44
|
+
>();
|
|
45
|
+
|
|
46
|
+
vi.mock('@google/genai', () => {
|
|
47
|
+
class GoogleGenAI {
|
|
48
|
+
public readonly models = {
|
|
49
|
+
generateContent: generateContentMock,
|
|
50
|
+
async *generateContentStream(): AsyncIterable<{ text: string }> {
|
|
51
|
+
yield { text: '' };
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
public constructor(_options: { apiKey: string }) {}
|
|
56
|
+
}
|
|
57
|
+
return {
|
|
58
|
+
GoogleGenAI,
|
|
59
|
+
Type: {
|
|
60
|
+
STRING: 'STRING',
|
|
61
|
+
NUMBER: 'NUMBER',
|
|
62
|
+
INTEGER: 'INTEGER',
|
|
63
|
+
BOOLEAN: 'BOOLEAN',
|
|
64
|
+
ARRAY: 'ARRAY',
|
|
65
|
+
OBJECT: 'OBJECT',
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
describe('GeminiProvider image support', () => {
|
|
71
|
+
beforeEach(() => {
|
|
72
|
+
generateContentMock.mockReset();
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it('maps inline image output from Gemini response to assistant parts', async () => {
|
|
76
|
+
generateContentMock.mockResolvedValue({
|
|
77
|
+
candidates: [
|
|
78
|
+
{
|
|
79
|
+
content: {
|
|
80
|
+
parts: [
|
|
81
|
+
{ text: 'created' },
|
|
82
|
+
{
|
|
83
|
+
inlineData: {
|
|
84
|
+
mimeType: 'image/png',
|
|
85
|
+
data: 'ZmFrZS1pbWFnZS1kYXRh',
|
|
86
|
+
},
|
|
87
|
+
},
|
|
88
|
+
],
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
],
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
const provider = new GeminiProvider({ apiKey: 'test-key' });
|
|
95
|
+
const response = await provider.chat(
|
|
96
|
+
[
|
|
97
|
+
{
|
|
98
|
+
id: 'msg-1',
|
|
99
|
+
state: 'complete' as const,
|
|
100
|
+
role: 'user',
|
|
101
|
+
content: 'create an image',
|
|
102
|
+
timestamp: new Date(),
|
|
103
|
+
},
|
|
104
|
+
],
|
|
105
|
+
{
|
|
106
|
+
model: 'gemini-2.5-flash-image',
|
|
107
|
+
google: { responseModalities: ['TEXT', 'IMAGE'] },
|
|
108
|
+
},
|
|
109
|
+
);
|
|
110
|
+
|
|
111
|
+
expect(response.role).toBe('assistant');
|
|
112
|
+
expect(response.parts).toBeDefined();
|
|
113
|
+
expect(response.parts?.some((part) => part.type === 'image_inline')).toBe(true);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('maps inline image input parts into Gemini inlineData request parts', async () => {
|
|
117
|
+
generateContentMock.mockResolvedValue({
|
|
118
|
+
candidates: [
|
|
119
|
+
{
|
|
120
|
+
content: {
|
|
121
|
+
parts: [
|
|
122
|
+
{ text: 'ok' },
|
|
123
|
+
{
|
|
124
|
+
inlineData: {
|
|
125
|
+
mimeType: 'image/png',
|
|
126
|
+
data: 'ZmFrZS1pbWFnZS1kYXRh',
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
],
|
|
130
|
+
},
|
|
131
|
+
},
|
|
132
|
+
],
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
const provider = new GeminiProvider({ apiKey: 'test-key' });
|
|
136
|
+
const messages: TUniversalMessage[] = [
|
|
137
|
+
{
|
|
138
|
+
id: 'msg-1',
|
|
139
|
+
state: 'complete' as const,
|
|
140
|
+
role: 'user',
|
|
141
|
+
content: '',
|
|
142
|
+
parts: [
|
|
143
|
+
{
|
|
144
|
+
type: 'image_inline',
|
|
145
|
+
mimeType: 'image/png',
|
|
146
|
+
data: 'ZmFrZS1pbWFnZS1pbnB1dA==',
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
type: 'text',
|
|
150
|
+
text: 'edit this image',
|
|
151
|
+
},
|
|
152
|
+
],
|
|
153
|
+
timestamp: new Date(),
|
|
154
|
+
},
|
|
155
|
+
];
|
|
156
|
+
|
|
157
|
+
await provider.chat(messages, {
|
|
158
|
+
model: 'gemini-2.5-flash-image',
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
const calledPayload = generateContentMock.mock.calls[0]?.[0];
|
|
162
|
+
expect(calledPayload.contents[0]?.parts[0]?.inlineData?.mimeType).toBe('image/png');
|
|
163
|
+
expect(calledPayload.contents[0]?.parts[1]?.text).toBe('edit this image');
|
|
164
|
+
expect(calledPayload.config?.responseModalities).toEqual(['TEXT', 'IMAGE']);
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it('throws when image modality is requested with non-image model', async () => {
|
|
168
|
+
const provider = new GeminiProvider({ apiKey: 'test-key' });
|
|
169
|
+
|
|
170
|
+
await expect(
|
|
171
|
+
provider.chat(
|
|
172
|
+
[
|
|
173
|
+
{
|
|
174
|
+
id: 'msg-1',
|
|
175
|
+
state: 'complete' as const,
|
|
176
|
+
role: 'user',
|
|
177
|
+
content: 'generate image',
|
|
178
|
+
timestamp: new Date(),
|
|
179
|
+
},
|
|
180
|
+
],
|
|
181
|
+
{
|
|
182
|
+
model: 'gemini-1.5-pro',
|
|
183
|
+
google: { responseModalities: ['IMAGE'] },
|
|
184
|
+
},
|
|
185
|
+
),
|
|
186
|
+
).rejects.toThrow('Google chat failed:');
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
it('throws when image uri message part is used directly', async () => {
|
|
190
|
+
const provider = new GeminiProvider({ apiKey: 'test-key' });
|
|
191
|
+
|
|
192
|
+
await expect(
|
|
193
|
+
provider.chat(
|
|
194
|
+
[
|
|
195
|
+
{
|
|
196
|
+
id: 'msg-1',
|
|
197
|
+
state: 'complete' as const,
|
|
198
|
+
role: 'user',
|
|
199
|
+
content: '',
|
|
200
|
+
parts: [
|
|
201
|
+
{
|
|
202
|
+
type: 'image_uri',
|
|
203
|
+
uri: 'asset://example',
|
|
204
|
+
mimeType: 'image/png',
|
|
205
|
+
},
|
|
206
|
+
],
|
|
207
|
+
timestamp: new Date(),
|
|
208
|
+
},
|
|
209
|
+
],
|
|
210
|
+
{
|
|
211
|
+
model: 'gemini-2.5-flash-image',
|
|
212
|
+
},
|
|
213
|
+
),
|
|
214
|
+
).rejects.toThrow('Google chat failed:');
|
|
215
|
+
});
|
|
216
|
+
});
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto';
|
|
2
|
+
import { GoogleGenAI } from '@google/genai';
|
|
3
|
+
import type { IGeminiProviderOptions } from './types';
|
|
4
|
+
import { AbstractAIProvider } from '@robota-sdk/agent-core';
|
|
5
|
+
import type {
|
|
6
|
+
TUniversalMessage,
|
|
7
|
+
IChatOptions,
|
|
8
|
+
TTextDeltaCallback,
|
|
9
|
+
TUniversalMessagePart,
|
|
10
|
+
IImageGenerationProvider,
|
|
11
|
+
IImageGenerationRequest,
|
|
12
|
+
IImageEditRequest,
|
|
13
|
+
IImageComposeRequest,
|
|
14
|
+
IImageGenerationResult,
|
|
15
|
+
TProviderMediaResult,
|
|
16
|
+
} from '@robota-sdk/agent-core';
|
|
17
|
+
import { mapImageInputSourceToPart } from './image-operations';
|
|
18
|
+
import { executeDirect, executeDirectStream, runImageRequest } from './execution-helpers';
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Gemini provider implementation for Robota
|
|
22
|
+
*
|
|
23
|
+
* IMPORTANT PROVIDER-SPECIFIC RULES:
|
|
24
|
+
* 1. This provider MUST extend BaseAIProvider from @robota-sdk/agent-core
|
|
25
|
+
* 2. Content handling for Google Gemini API:
|
|
26
|
+
* - Function calls can have content (text) along with function calls
|
|
27
|
+
* - Content can be empty string or actual text, NOT null
|
|
28
|
+
* 3. Use override keyword for all methods inherited from BaseAIProvider
|
|
29
|
+
* 4. Provider-specific API behavior should be documented here
|
|
30
|
+
*
|
|
31
|
+
* @public
|
|
32
|
+
*/
|
|
33
|
+
export class GeminiProvider extends AbstractAIProvider implements IImageGenerationProvider {
|
|
34
|
+
override readonly name: string = 'gemini';
|
|
35
|
+
override readonly version = '1.0.0';
|
|
36
|
+
public onTextDelta?: TTextDeltaCallback;
|
|
37
|
+
|
|
38
|
+
private readonly client?: GoogleGenAI;
|
|
39
|
+
private readonly options: IGeminiProviderOptions;
|
|
40
|
+
|
|
41
|
+
constructor(options: IGeminiProviderOptions) {
|
|
42
|
+
super();
|
|
43
|
+
this.options = options;
|
|
44
|
+
|
|
45
|
+
if (options.executor) {
|
|
46
|
+
this.executor = options.executor;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (!this.executor) {
|
|
50
|
+
this.client = new GoogleGenAI({ apiKey: options.apiKey });
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Generate response using TUniversalMessage */
|
|
55
|
+
override async chat(
|
|
56
|
+
messages: TUniversalMessage[],
|
|
57
|
+
options?: IChatOptions,
|
|
58
|
+
): Promise<TUniversalMessage> {
|
|
59
|
+
this.validateMessages(messages);
|
|
60
|
+
|
|
61
|
+
if (this.executor) {
|
|
62
|
+
try {
|
|
63
|
+
return await this.executeViaExecutorOrDirect(messages, options);
|
|
64
|
+
} catch (error) {
|
|
65
|
+
this.logger.error(
|
|
66
|
+
'Gemini Provider executor chat error:',
|
|
67
|
+
error instanceof Error ? error.message : String(error),
|
|
68
|
+
);
|
|
69
|
+
throw error;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (!this.client) {
|
|
74
|
+
throw new Error('Google client not available. Either provide apiKey or use an executor.');
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
try {
|
|
78
|
+
return await executeDirect(
|
|
79
|
+
this.client,
|
|
80
|
+
this.options,
|
|
81
|
+
messages,
|
|
82
|
+
this.withProviderCallbacks(options),
|
|
83
|
+
this.name,
|
|
84
|
+
);
|
|
85
|
+
} catch (error) {
|
|
86
|
+
const errorMessage = error instanceof Error ? error.message : 'Google API request failed';
|
|
87
|
+
throw new Error(`Google chat failed: ${errorMessage}`);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/** Generate streaming response using TUniversalMessage */
|
|
92
|
+
override async *chatStream(
|
|
93
|
+
messages: TUniversalMessage[],
|
|
94
|
+
options?: IChatOptions,
|
|
95
|
+
): AsyncIterable<TUniversalMessage> {
|
|
96
|
+
this.validateMessages(messages);
|
|
97
|
+
if (this.executor) {
|
|
98
|
+
try {
|
|
99
|
+
yield* this.executeStreamViaExecutorOrDirect(messages, options);
|
|
100
|
+
return;
|
|
101
|
+
} catch (error) {
|
|
102
|
+
this.logger.error(
|
|
103
|
+
'Gemini Provider executor stream error:',
|
|
104
|
+
error instanceof Error ? error.message : String(error),
|
|
105
|
+
);
|
|
106
|
+
throw error;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (!this.client) {
|
|
111
|
+
throw new Error('Google client not available. Either provide apiKey or use an executor.');
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
try {
|
|
115
|
+
yield* executeDirectStream(
|
|
116
|
+
this.client,
|
|
117
|
+
this.options,
|
|
118
|
+
messages,
|
|
119
|
+
this.withProviderCallbacks(options),
|
|
120
|
+
this.name,
|
|
121
|
+
);
|
|
122
|
+
} catch (error) {
|
|
123
|
+
const errorMessage = error instanceof Error ? error.message : 'Google API request failed';
|
|
124
|
+
throw new Error(`Google stream failed: ${errorMessage}`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/** Generate an image from a text prompt using the Gemini API. */
|
|
129
|
+
public async generateImage(
|
|
130
|
+
request: IImageGenerationRequest,
|
|
131
|
+
): Promise<TProviderMediaResult<IImageGenerationResult>> {
|
|
132
|
+
if (request.prompt.trim().length === 0) {
|
|
133
|
+
return {
|
|
134
|
+
ok: false,
|
|
135
|
+
error: {
|
|
136
|
+
code: 'PROVIDER_INVALID_REQUEST',
|
|
137
|
+
message: 'Image generation requires a non-empty prompt.',
|
|
138
|
+
},
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
if (request.model.trim().length === 0) {
|
|
142
|
+
return {
|
|
143
|
+
ok: false,
|
|
144
|
+
error: {
|
|
145
|
+
code: 'PROVIDER_INVALID_REQUEST',
|
|
146
|
+
message: 'Image generation requires a non-empty model.',
|
|
147
|
+
},
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const message: TUniversalMessage = {
|
|
152
|
+
id: randomUUID(),
|
|
153
|
+
role: 'user',
|
|
154
|
+
content: request.prompt,
|
|
155
|
+
state: 'complete' as const,
|
|
156
|
+
parts: [{ type: 'text', text: request.prompt }],
|
|
157
|
+
timestamp: new Date(),
|
|
158
|
+
};
|
|
159
|
+
return runImageRequest(this.chat.bind(this), [message], request.model);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/** Edit an existing image based on a text prompt using the Gemini API. */
|
|
163
|
+
public async editImage(
|
|
164
|
+
request: IImageEditRequest,
|
|
165
|
+
): Promise<TProviderMediaResult<IImageGenerationResult>> {
|
|
166
|
+
if (request.prompt.trim().length === 0) {
|
|
167
|
+
return {
|
|
168
|
+
ok: false,
|
|
169
|
+
error: {
|
|
170
|
+
code: 'PROVIDER_INVALID_REQUEST',
|
|
171
|
+
message: 'Image edit requires a non-empty prompt.',
|
|
172
|
+
},
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
if (request.model.trim().length === 0) {
|
|
176
|
+
return {
|
|
177
|
+
ok: false,
|
|
178
|
+
error: {
|
|
179
|
+
code: 'PROVIDER_INVALID_REQUEST',
|
|
180
|
+
message: 'Image edit requires a non-empty model.',
|
|
181
|
+
},
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const inputPartResult = mapImageInputSourceToPart(request.image);
|
|
186
|
+
if (!inputPartResult.ok) {
|
|
187
|
+
return inputPartResult;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const message: TUniversalMessage = {
|
|
191
|
+
id: randomUUID(),
|
|
192
|
+
role: 'user',
|
|
193
|
+
content: request.prompt,
|
|
194
|
+
state: 'complete' as const,
|
|
195
|
+
parts: [inputPartResult.value, { type: 'text', text: request.prompt }],
|
|
196
|
+
timestamp: new Date(),
|
|
197
|
+
};
|
|
198
|
+
return runImageRequest(this.chat.bind(this), [message], request.model);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/** Compose multiple images together based on a text prompt using the Gemini API. */
|
|
202
|
+
public async composeImage(
|
|
203
|
+
request: IImageComposeRequest,
|
|
204
|
+
): Promise<TProviderMediaResult<IImageGenerationResult>> {
|
|
205
|
+
if (request.prompt.trim().length === 0) {
|
|
206
|
+
return {
|
|
207
|
+
ok: false,
|
|
208
|
+
error: {
|
|
209
|
+
code: 'PROVIDER_INVALID_REQUEST',
|
|
210
|
+
message: 'Image compose requires a non-empty prompt.',
|
|
211
|
+
},
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
if (request.model.trim().length === 0) {
|
|
215
|
+
return {
|
|
216
|
+
ok: false,
|
|
217
|
+
error: {
|
|
218
|
+
code: 'PROVIDER_INVALID_REQUEST',
|
|
219
|
+
message: 'Image compose requires a non-empty model.',
|
|
220
|
+
},
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
if (request.images.length < 2) {
|
|
224
|
+
return {
|
|
225
|
+
ok: false,
|
|
226
|
+
error: {
|
|
227
|
+
code: 'PROVIDER_INVALID_REQUEST',
|
|
228
|
+
message: 'Image compose requires at least two input images.',
|
|
229
|
+
},
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const messageParts: TUniversalMessagePart[] = [];
|
|
234
|
+
for (const imageSource of request.images) {
|
|
235
|
+
const mappedPartResult = mapImageInputSourceToPart(imageSource);
|
|
236
|
+
if (!mappedPartResult.ok) {
|
|
237
|
+
return mappedPartResult;
|
|
238
|
+
}
|
|
239
|
+
messageParts.push(mappedPartResult.value);
|
|
240
|
+
}
|
|
241
|
+
messageParts.push({ type: 'text', text: request.prompt });
|
|
242
|
+
|
|
243
|
+
const message: TUniversalMessage = {
|
|
244
|
+
id: randomUUID(),
|
|
245
|
+
role: 'user',
|
|
246
|
+
content: request.prompt,
|
|
247
|
+
state: 'complete' as const,
|
|
248
|
+
parts: messageParts,
|
|
249
|
+
timestamp: new Date(),
|
|
250
|
+
};
|
|
251
|
+
return runImageRequest(this.chat.bind(this), [message], request.model);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
override supportsTools(): boolean {
|
|
255
|
+
return true;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
override validateConfig(): boolean {
|
|
259
|
+
if (this.executor) {
|
|
260
|
+
return this.executor.validateConfig();
|
|
261
|
+
}
|
|
262
|
+
return !!this.client && !!this.options && !!this.options.apiKey;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
override async dispose(): Promise<void> {
|
|
266
|
+
// Google client does not need explicit cleanup
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
private withProviderCallbacks(options?: IChatOptions): IChatOptions | undefined {
|
|
270
|
+
const onTextDelta = options?.onTextDelta ?? this.onTextDelta;
|
|
271
|
+
if (!onTextDelta) {
|
|
272
|
+
return options;
|
|
273
|
+
}
|
|
274
|
+
return {
|
|
275
|
+
...options,
|
|
276
|
+
onTextDelta,
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
}
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import type { Content, Part } from '@google/genai';
|
|
2
|
+
import type {
|
|
3
|
+
IAssistantMessage,
|
|
4
|
+
ISystemMessage,
|
|
5
|
+
IToolMessage,
|
|
6
|
+
IUserMessage,
|
|
7
|
+
TUniversalMessage,
|
|
8
|
+
} from '@robota-sdk/agent-core';
|
|
9
|
+
|
|
10
|
+
type TGoogleJsonValue = string | number | boolean | null | TGoogleJsonValue[] | IGoogleJsonObject;
|
|
11
|
+
|
|
12
|
+
interface IGoogleJsonObject {
|
|
13
|
+
readonly [key: string]: TGoogleJsonValue;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface IGeminiMessageConversionResult {
|
|
17
|
+
contents: Content[];
|
|
18
|
+
systemInstruction?: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Maps universal message parts to Gemini-compatible parts.
|
|
23
|
+
* Supports text and inline image parts; throws on unsupported part types.
|
|
24
|
+
*/
|
|
25
|
+
export function mapMessagePartsToGeminiParts(
|
|
26
|
+
message: IUserMessage | IAssistantMessage | ISystemMessage | IToolMessage,
|
|
27
|
+
): Part[] {
|
|
28
|
+
const parts: Part[] = [];
|
|
29
|
+
const messageParts = message.parts ?? [];
|
|
30
|
+
for (const part of messageParts) {
|
|
31
|
+
if (part.type === 'text') {
|
|
32
|
+
parts.push({ text: part.text });
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
if (part.type === 'image_inline') {
|
|
36
|
+
parts.push({
|
|
37
|
+
inlineData: {
|
|
38
|
+
mimeType: part.mimeType,
|
|
39
|
+
data: part.data,
|
|
40
|
+
},
|
|
41
|
+
});
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
throw new Error(`Google provider does not support image URI parts directly: ${part.uri}`);
|
|
45
|
+
}
|
|
46
|
+
if (parts.length === 0 && typeof message.content === 'string' && message.content.length > 0) {
|
|
47
|
+
parts.push({ text: message.content });
|
|
48
|
+
}
|
|
49
|
+
return parts;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Converts an array of universal messages to the Gemini Content format.
|
|
54
|
+
*
|
|
55
|
+
* IMPORTANT: Google Gemini allows content with function calls.
|
|
56
|
+
* Content can be empty string or text, but NOT null.
|
|
57
|
+
*/
|
|
58
|
+
export function convertToGeminiFormat(messages: TUniversalMessage[]): Content[] {
|
|
59
|
+
return convertToGeminiRequestFormat(messages).contents;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Converts universal messages into Gemini request content plus request config
|
|
64
|
+
* fields. Gemini system instructions are request-level config, not user turns.
|
|
65
|
+
*/
|
|
66
|
+
export function convertToGeminiRequestFormat(
|
|
67
|
+
messages: TUniversalMessage[],
|
|
68
|
+
): IGeminiMessageConversionResult {
|
|
69
|
+
const contents: Content[] = [];
|
|
70
|
+
const systemInstructionParts: string[] = [];
|
|
71
|
+
|
|
72
|
+
for (const msg of messages) {
|
|
73
|
+
if (msg.role === 'user') {
|
|
74
|
+
contents.push({
|
|
75
|
+
role: 'user',
|
|
76
|
+
parts: mapMessagePartsToGeminiParts(msg as IUserMessage),
|
|
77
|
+
});
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
if (msg.role === 'assistant') {
|
|
81
|
+
contents.push(convertAssistantMessage(msg as IAssistantMessage));
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
if (msg.role === 'tool') {
|
|
85
|
+
contents.push(convertToolMessage(msg as IToolMessage));
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const systemInstruction = extractSystemInstructionText(msg as ISystemMessage);
|
|
90
|
+
if (systemInstruction.length > 0) {
|
|
91
|
+
systemInstructionParts.push(systemInstruction);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return {
|
|
96
|
+
contents,
|
|
97
|
+
...(systemInstructionParts.length > 0 && {
|
|
98
|
+
systemInstruction: systemInstructionParts.join('\n'),
|
|
99
|
+
}),
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Converts all messages to Gemini contents, including system instructions as
|
|
105
|
+
* user content. This exists only for compatibility with callers that still need
|
|
106
|
+
* a contents-only value.
|
|
107
|
+
*/
|
|
108
|
+
export function convertToGeminiFormatWithInlineSystem(messages: TUniversalMessage[]): Content[] {
|
|
109
|
+
return messages.map((msg) => {
|
|
110
|
+
if (msg.role === 'user') {
|
|
111
|
+
return {
|
|
112
|
+
role: 'user',
|
|
113
|
+
parts: mapMessagePartsToGeminiParts(msg as IUserMessage),
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
if (msg.role === 'assistant') {
|
|
117
|
+
return convertAssistantMessage(msg as IAssistantMessage);
|
|
118
|
+
}
|
|
119
|
+
if (msg.role === 'tool') {
|
|
120
|
+
const toolMessage = msg as IToolMessage;
|
|
121
|
+
return {
|
|
122
|
+
role: 'user',
|
|
123
|
+
parts: mapMessagePartsToGeminiParts(toolMessage),
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
const systemMessage = msg as ISystemMessage;
|
|
127
|
+
const systemParts = mapMessagePartsToGeminiParts(systemMessage);
|
|
128
|
+
if (systemParts.length === 0) {
|
|
129
|
+
systemParts.push({ text: `System: ${systemMessage.content || ''}` });
|
|
130
|
+
}
|
|
131
|
+
return {
|
|
132
|
+
role: 'user',
|
|
133
|
+
parts: systemParts,
|
|
134
|
+
};
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function convertAssistantMessage(assistantMsg: IAssistantMessage): Content {
|
|
139
|
+
const parts: Part[] = [];
|
|
140
|
+
const mappedAssistantParts = mapMessagePartsToGeminiParts(assistantMsg);
|
|
141
|
+
for (const mappedPart of mappedAssistantParts) {
|
|
142
|
+
parts.push(mappedPart);
|
|
143
|
+
}
|
|
144
|
+
if (parts.length === 0 && assistantMsg.content) {
|
|
145
|
+
parts.push({ text: assistantMsg.content });
|
|
146
|
+
}
|
|
147
|
+
if (assistantMsg.toolCalls && assistantMsg.toolCalls.length > 0) {
|
|
148
|
+
assistantMsg.toolCalls.forEach((tc) => {
|
|
149
|
+
parts.push({
|
|
150
|
+
functionCall: {
|
|
151
|
+
id: tc.id,
|
|
152
|
+
name: tc.function.name,
|
|
153
|
+
args: parseToolCallArguments(tc.function.arguments),
|
|
154
|
+
},
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
return {
|
|
159
|
+
role: 'model',
|
|
160
|
+
parts,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function convertToolMessage(toolMessage: IToolMessage): Content {
|
|
165
|
+
const functionResponse = {
|
|
166
|
+
id: toolMessage.toolCallId,
|
|
167
|
+
name: requireToolMessageName(toolMessage),
|
|
168
|
+
response: parseToolResponseContent(toolMessage.content),
|
|
169
|
+
};
|
|
170
|
+
return {
|
|
171
|
+
role: 'user',
|
|
172
|
+
parts: [{ functionResponse }],
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function extractSystemInstructionText(systemMessage: ISystemMessage): string {
|
|
177
|
+
const parts = mapMessagePartsToGeminiParts(systemMessage);
|
|
178
|
+
if (parts.length === 0) {
|
|
179
|
+
return systemMessage.content;
|
|
180
|
+
}
|
|
181
|
+
const textParts: string[] = [];
|
|
182
|
+
for (const part of parts) {
|
|
183
|
+
if (typeof part.text === 'string') {
|
|
184
|
+
textParts.push(part.text);
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
throw new Error('Google provider system instructions support only text parts.');
|
|
188
|
+
}
|
|
189
|
+
return textParts.join('\n');
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function requireToolMessageName(toolMessage: IToolMessage): string {
|
|
193
|
+
const toolName = toolMessage.name?.trim();
|
|
194
|
+
if (!toolName) {
|
|
195
|
+
throw new Error('Google provider tool message requires a function name.');
|
|
196
|
+
}
|
|
197
|
+
return toolName;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function parseToolCallArguments(serializedArguments: string): IGoogleJsonObject {
|
|
201
|
+
const parsedArguments = JSON.parse(serializedArguments) as TGoogleJsonValue;
|
|
202
|
+
if (!isJsonObject(parsedArguments)) {
|
|
203
|
+
throw new Error('Google provider tool call arguments must be a JSON object.');
|
|
204
|
+
}
|
|
205
|
+
return parsedArguments;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function parseToolResponseContent(content: string): IGoogleJsonObject {
|
|
209
|
+
const trimmedContent = content.trim();
|
|
210
|
+
if (trimmedContent.length === 0) {
|
|
211
|
+
return { output: null };
|
|
212
|
+
}
|
|
213
|
+
try {
|
|
214
|
+
const parsedContent = JSON.parse(trimmedContent) as TGoogleJsonValue;
|
|
215
|
+
if (isJsonObject(parsedContent)) {
|
|
216
|
+
return parsedContent;
|
|
217
|
+
}
|
|
218
|
+
return { output: parsedContent };
|
|
219
|
+
} catch {
|
|
220
|
+
return { output: content };
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function isJsonObject(value: TGoogleJsonValue): value is IGoogleJsonObject {
|
|
225
|
+
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
226
|
+
}
|