@umituz/react-native-ai-gemini-provider 1.0.3 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@umituz/react-native-ai-gemini-provider",
3
- "version": "1.0.3",
3
+ "version": "1.0.6",
4
4
  "description": "Google Gemini AI provider for React Native applications",
5
5
  "main": "src/index.ts",
6
6
  "types": "src/index.ts",
@@ -18,7 +18,8 @@
18
18
  "gemini",
19
19
  "google",
20
20
  "generation",
21
- "llm"
21
+ "llm",
22
+ "image-generation"
22
23
  ],
23
24
  "author": "umituz",
24
25
  "license": "MIT",
@@ -27,9 +28,11 @@
27
28
  "url": "git+https://github.com/umituz/react-native-ai-gemini-provider.git"
28
29
  },
29
30
  "peerDependencies": {
30
- "react": ">=18.0.0"
31
+ "react": ">=18.0.0",
32
+ "@google/generative-ai": ">=0.21.0"
31
33
  },
32
34
  "devDependencies": {
35
+ "@google/generative-ai": "^0.21.0",
33
36
  "@types/react": "^19.0.0",
34
37
  "@typescript-eslint/eslint-plugin": "^7.0.0",
35
38
  "@typescript-eslint/parser": "^7.0.0",
@@ -11,6 +11,8 @@ export interface GeminiConfig {
11
11
  maxDelay?: number;
12
12
  defaultTimeoutMs?: number;
13
13
  defaultModel?: string;
14
+ /** Model used for image generation (default: gemini-2.0-flash-exp) */
15
+ imageModel?: string;
14
16
  }
15
17
 
16
18
  export interface GeminiGenerationConfig {
@@ -101,3 +103,27 @@ export interface GeminiModel {
101
103
  outputTokenLimit?: number;
102
104
  supportedCapabilities?: string[];
103
105
  }
106
+
107
+ /**
108
+ * Result from image generation
109
+ */
110
+ export interface GeminiImageGenerationResult {
111
+ /** Generated text (story, caption, etc.) */
112
+ text?: string;
113
+ /** Data URL of the generated image (data:image/png;base64,...) */
114
+ imageUrl?: string;
115
+ /** Raw base64 image data */
116
+ imageBase64?: string;
117
+ /** MIME type of the generated image */
118
+ mimeType?: string;
119
+ }
120
+
121
+ /**
122
+ * Input for image generation
123
+ */
124
+ export interface GeminiImageInput {
125
+ /** Base64 encoded image data (with or without data URL prefix) */
126
+ base64: string;
127
+ /** MIME type (e.g., "image/png", "image/jpeg") */
128
+ mimeType: string;
129
+ }
package/src/index.ts CHANGED
@@ -31,6 +31,8 @@ export type {
31
31
  GeminiPromptFeedback,
32
32
  GeminiUsageMetadata,
33
33
  GeminiModel,
34
+ GeminiImageGenerationResult,
35
+ GeminiImageInput,
34
36
  } from "./domain/entities";
35
37
 
36
38
  export { GeminiErrorType } from "./domain/entities";
@@ -47,6 +49,7 @@ export type {
47
49
  export {
48
50
  geminiClientService,
49
51
  geminiProviderService,
52
+ createGeminiProvider,
50
53
  } from "./infrastructure/services";
51
54
 
52
55
  export type {
@@ -1,40 +1,43 @@
1
1
  /**
2
2
  * Gemini Client Service
3
- * Google Gemini AI client wrapper
3
+ * Google Gemini AI client using official SDK
4
4
  */
5
5
 
6
+ import { GoogleGenerativeAI, type GenerativeModel } from "@google/generative-ai";
6
7
  import type {
7
8
  GeminiConfig,
8
- GeminiRequest,
9
- GeminiResponse,
10
9
  GeminiContent,
11
10
  GeminiGenerationConfig,
11
+ GeminiResponse,
12
+ GeminiImageGenerationResult,
13
+ GeminiPart,
14
+ GeminiFinishReason,
12
15
  } from "../../domain/entities";
13
16
 
14
17
  declare const __DEV__: boolean;
15
18
 
16
19
  const DEFAULT_CONFIG: Partial<GeminiConfig> = {
17
- baseUrl: "https://generativelanguage.googleapis.com/v1beta",
18
20
  maxRetries: 3,
19
21
  baseDelay: 1000,
20
22
  maxDelay: 10000,
21
23
  defaultTimeoutMs: 60000,
22
24
  defaultModel: "gemini-1.5-flash",
25
+ imageModel: "gemini-2.0-flash-exp",
23
26
  };
24
27
 
25
28
  class GeminiClientService {
26
- private apiKey: string | null = null;
29
+ private client: GoogleGenerativeAI | null = null;
27
30
  private config: GeminiConfig | null = null;
28
31
  private initialized = false;
29
32
 
30
33
  initialize(config: GeminiConfig): void {
31
- this.apiKey = config.apiKey;
34
+ this.client = new GoogleGenerativeAI(config.apiKey);
32
35
  this.config = { ...DEFAULT_CONFIG, ...config };
33
36
  this.initialized = true;
34
37
 
35
38
  if (typeof __DEV__ !== "undefined" && __DEV__) {
36
39
  // eslint-disable-next-line no-console
37
- console.log("[Gemini] Client initialized");
40
+ console.log("[Gemini] Client initialized with official SDK");
38
41
  }
39
42
  }
40
43
 
@@ -47,81 +50,90 @@ class GeminiClientService {
47
50
  }
48
51
 
49
52
  private validateInitialization(): void {
50
- if (!this.apiKey || !this.initialized) {
53
+ if (!this.client || !this.initialized) {
51
54
  throw new Error(
52
55
  "Gemini client not initialized. Call initialize() first.",
53
56
  );
54
57
  }
55
58
  }
56
59
 
57
- private getEndpoint(model: string, action: string): string {
58
- const baseUrl = this.config?.baseUrl ?? DEFAULT_CONFIG.baseUrl;
59
- return `${baseUrl}/models/${model}:${action}?key=${this.apiKey}`;
60
- }
61
-
62
- private async request<T>(
63
- endpoint: string,
64
- body: unknown,
65
- timeoutMs?: number,
66
- ): Promise<T> {
67
- const timeout = timeoutMs ?? this.config?.defaultTimeoutMs ?? 60000;
68
-
69
- const controller = new AbortController();
70
- const timeoutId = setTimeout(() => controller.abort(), timeout);
71
-
72
- try {
73
- const response = await fetch(endpoint, {
74
- method: "POST",
75
- headers: {
76
- "Content-Type": "application/json",
77
- },
78
- body: JSON.stringify(body),
79
- signal: controller.signal,
80
- });
81
-
82
- if (!response.ok) {
83
- let errorData: { error?: { message?: string } } = {};
84
- try {
85
- errorData = (await response.json()) as { error?: { message?: string } };
86
- } catch {
87
- // Ignore JSON parse errors for error responses
88
- }
89
- const errorMessage = errorData?.error?.message ?? `HTTP ${response.status}`;
90
- const error = new Error(errorMessage);
91
- (error as unknown as Record<string, unknown>).status = response.status;
92
- (error as unknown as Record<string, unknown>).response = errorData;
93
- throw error;
94
- }
95
-
96
- return response.json() as Promise<T>;
97
- } finally {
98
- clearTimeout(timeoutId);
99
- }
60
+ private getModel(modelName?: string): GenerativeModel {
61
+ this.validateInitialization();
62
+ const effectiveModel = modelName || this.config?.defaultModel || "gemini-1.5-flash";
63
+ return this.client!.getGenerativeModel({ model: effectiveModel });
100
64
  }
101
65
 
66
+ /**
67
+ * Generate content (text, with optional images)
68
+ */
102
69
  async generateContent(
103
70
  model: string,
104
71
  contents: GeminiContent[],
105
72
  generationConfig?: GeminiGenerationConfig,
106
73
  ): Promise<GeminiResponse> {
107
- this.validateInitialization();
108
-
109
- const effectiveModel = model || this.config?.defaultModel || "gemini-1.5-flash";
110
- const endpoint = this.getEndpoint(effectiveModel, "generateContent");
111
-
112
- const body: GeminiRequest = {
113
- contents,
114
- generationConfig,
115
- };
74
+ const genModel = this.getModel(model);
116
75
 
117
76
  if (typeof __DEV__ !== "undefined" && __DEV__) {
118
77
  // eslint-disable-next-line no-console
119
- console.log("[Gemini] Generate content:", { model: effectiveModel });
78
+ console.log("[Gemini] Generate content:", { model });
120
79
  }
121
80
 
122
- return this.request<GeminiResponse>(endpoint, body);
81
+ // Convert our content format to SDK format
82
+ const sdkContents = contents.map((content) => ({
83
+ role: content.role || "user",
84
+ parts: content.parts.map((part) => {
85
+ if ("text" in part) {
86
+ return { text: part.text };
87
+ }
88
+ if ("inlineData" in part) {
89
+ return {
90
+ inlineData: {
91
+ mimeType: part.inlineData.mimeType,
92
+ data: part.inlineData.data,
93
+ },
94
+ };
95
+ }
96
+ // fileData parts
97
+ return part;
98
+ }),
99
+ }));
100
+
101
+ const result = await genModel.generateContent({
102
+ contents: sdkContents as Parameters<typeof genModel.generateContent>[0] extends { contents: infer C } ? C : never,
103
+ generationConfig,
104
+ });
105
+
106
+ const response = result.response;
107
+
108
+ return {
109
+ candidates: response.candidates?.map((candidate) => ({
110
+ content: {
111
+ parts: candidate.content.parts
112
+ .map((part): GeminiPart | null => {
113
+ if ("text" in part && part.text !== undefined) {
114
+ return { text: part.text };
115
+ }
116
+ if ("inlineData" in part && part.inlineData) {
117
+ return {
118
+ inlineData: {
119
+ mimeType: part.inlineData.mimeType,
120
+ data: part.inlineData.data,
121
+ },
122
+ };
123
+ }
124
+ return null;
125
+ })
126
+ .filter((p): p is GeminiPart => p !== null),
127
+ role: (candidate.content.role || "model") as "user" | "model",
128
+ },
129
+ finishReason: candidate.finishReason as GeminiFinishReason | undefined,
130
+ })),
131
+ };
123
132
  }
124
133
 
134
+ /**
135
+ * Generate text from prompt
136
+ */
125
137
  async generateText(
126
138
  model: string,
127
139
  prompt: string,
@@ -135,139 +147,170 @@ class GeminiClientService {
135
147
  return this.extractTextFromResponse(response);
136
148
  }
137
149
 
138
- async generateWithImage(
150
+ /**
151
+ * Generate content with images (multimodal)
152
+ */
153
+ async generateWithImages(
139
154
  model: string,
140
155
  prompt: string,
141
- imageBase64: string,
142
- mimeType: string,
156
+ images: Array<{ base64: string; mimeType: string }>,
143
157
  config?: GeminiGenerationConfig,
144
- ): Promise<string> {
145
- const contents: GeminiContent[] = [
146
- {
147
- parts: [
148
- { text: prompt },
149
- { inlineData: { mimeType, data: imageBase64 } },
150
- ],
151
- role: "user",
152
- },
153
- ];
158
+ ): Promise<GeminiResponse> {
159
+ const parts: GeminiContent["parts"] = [{ text: prompt }];
160
+
161
+ for (const image of images) {
162
+ // Remove data URL prefix if present
163
+ const base64Data = image.base64.includes(",")
164
+ ? image.base64.split(",")[1]
165
+ : image.base64;
166
+
167
+ parts.push({
168
+ inlineData: {
169
+ mimeType: image.mimeType,
170
+ data: base64Data,
171
+ },
172
+ });
173
+ }
154
174
 
155
- const response = await this.generateContent(model, contents, config);
156
- return this.extractTextFromResponse(response);
175
+ const contents: GeminiContent[] = [{ parts, role: "user" }];
176
+
177
+ return this.generateContent(model, contents, config);
157
178
  }
158
179
 
159
- private extractTextFromResponse(response: GeminiResponse): string {
160
- const candidate = response.candidates?.[0];
180
+ /**
181
+ * Generate image from prompt and/or input images
182
+ * Uses Gemini 2.0 Flash with image generation capability
183
+ */
184
+ async generateImage(
185
+ prompt: string,
186
+ images?: Array<{ base64: string; mimeType: string }>,
187
+ config?: GeminiGenerationConfig,
188
+ ): Promise<GeminiImageGenerationResult> {
189
+ const imageModel = this.config?.imageModel || "gemini-2.0-flash-exp";
161
190
 
162
- if (!candidate) {
163
- throw new Error("No response candidates");
191
+ if (typeof __DEV__ !== "undefined" && __DEV__) {
192
+ // eslint-disable-next-line no-console
193
+ console.log("[Gemini] Generate image:", { model: imageModel, hasInputImages: !!images?.length });
164
194
  }
165
195
 
166
- if (candidate.finishReason === "SAFETY") {
167
- throw new Error("Content blocked by safety filters");
196
+ const parts: GeminiContent["parts"] = [{ text: prompt }];
197
+
198
+ // Add input images if provided (for image-to-image generation)
199
+ if (images && images.length > 0) {
200
+ for (const image of images) {
201
+ const base64Data = image.base64.includes(",")
202
+ ? image.base64.split(",")[1]
203
+ : image.base64;
204
+
205
+ parts.push({
206
+ inlineData: {
207
+ mimeType: image.mimeType,
208
+ data: base64Data,
209
+ },
210
+ });
211
+ }
168
212
  }
169
213
 
170
- const textPart = candidate.content.parts.find(
171
- (p): p is { text: string } => "text" in p,
172
- );
214
+ const contents: GeminiContent[] = [{ parts, role: "user" }];
215
+ const response = await this.generateContent(imageModel, contents, config);
173
216
 
174
- if (!textPart) {
175
- throw new Error("No text in response");
217
+ // Extract generated image from response
218
+ const result: GeminiImageGenerationResult = {
219
+ text: undefined,
220
+ imageUrl: undefined,
221
+ imageBase64: undefined,
222
+ mimeType: undefined,
223
+ };
224
+
225
+ if (response.candidates && response.candidates.length > 0) {
226
+ const candidate = response.candidates[0];
227
+
228
+ for (const part of candidate.content.parts) {
229
+ if ("text" in part && part.text) {
230
+ result.text = part.text;
231
+ }
232
+ if ("inlineData" in part && part.inlineData) {
233
+ result.imageBase64 = part.inlineData.data;
234
+ result.mimeType = part.inlineData.mimeType;
235
+ result.imageUrl = `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`;
236
+ }
237
+ }
176
238
  }
177
239
 
178
- return textPart.text;
240
+ return result;
179
241
  }
180
242
 
243
+ /**
244
+ * Stream content generation
245
+ */
181
246
  async streamContent(
182
247
  model: string,
183
248
  contents: GeminiContent[],
184
249
  onChunk: (text: string) => void,
185
250
  generationConfig?: GeminiGenerationConfig,
186
251
  ): Promise<string> {
187
- this.validateInitialization();
252
+ const genModel = this.getModel(model);
188
253
 
189
- const effectiveModel = model || this.config?.defaultModel || "gemini-1.5-flash";
190
- const endpoint = this.getEndpoint(effectiveModel, "streamGenerateContent");
254
+ const sdkContents = contents.map((content) => ({
255
+ role: content.role || "user",
256
+ parts: content.parts.map((part) => {
257
+ if ("text" in part) {
258
+ return { text: part.text };
259
+ }
260
+ if ("inlineData" in part) {
261
+ return {
262
+ inlineData: {
263
+ mimeType: part.inlineData.mimeType,
264
+ data: part.inlineData.data,
265
+ },
266
+ };
267
+ }
268
+ return part;
269
+ }),
270
+ }));
191
271
 
192
- const body: GeminiRequest = {
193
- contents,
272
+ const result = await genModel.generateContentStream({
273
+ contents: sdkContents as Parameters<typeof genModel.generateContentStream>[0] extends { contents: infer C } ? C : never,
194
274
  generationConfig,
195
- };
275
+ });
196
276
 
197
- const timeout = this.config?.defaultTimeoutMs ?? 60000;
198
- const controller = new AbortController();
199
- const timeoutId = setTimeout(() => controller.abort(), timeout);
277
+ let fullText = "";
200
278
 
201
- try {
202
- const response = await fetch(endpoint, {
203
- method: "POST",
204
- headers: { "Content-Type": "application/json" },
205
- body: JSON.stringify(body),
206
- signal: controller.signal,
207
- });
208
-
209
- if (!response.ok) {
210
- throw new Error(`HTTP ${response.status}`);
211
- }
212
-
213
- const reader = response.body?.getReader();
214
- if (!reader) {
215
- throw new Error("No response body");
279
+ for await (const chunk of result.stream) {
280
+ const chunkText = chunk.text();
281
+ if (chunkText) {
282
+ fullText += chunkText;
283
+ onChunk(chunkText);
216
284
  }
285
+ }
217
286
 
218
- const decoder = new TextDecoder();
219
- let fullText = "";
220
- let reading = true;
221
-
222
- while (reading) {
223
- const { done, value } = await reader.read();
224
-
225
- if (done) {
226
- reading = false;
227
- continue;
228
- }
287
+ return fullText;
288
+ }
229
289
 
230
- const chunk = decoder.decode(value, { stream: true });
231
- const text = this.parseStreamChunk(chunk);
290
+ private extractTextFromResponse(response: GeminiResponse): string {
291
+ const candidate = response.candidates?.[0];
232
292
 
233
- if (text) {
234
- fullText += text;
235
- onChunk(text);
236
- }
237
- }
293
+ if (!candidate) {
294
+ throw new Error("No response candidates");
295
+ }
238
296
 
239
- return fullText;
240
- } finally {
241
- clearTimeout(timeoutId);
297
+ if (candidate.finishReason === "SAFETY") {
298
+ throw new Error("Content blocked by safety filters");
242
299
  }
243
- }
244
300
 
245
- private parseStreamChunk(chunk: string): string {
246
- try {
247
- const lines = chunk.split("\n").filter((l) => l.trim());
248
- let text = "";
249
-
250
- for (const line of lines) {
251
- if (line.startsWith("data: ")) {
252
- const data = JSON.parse(line.slice(6)) as GeminiResponse;
253
- const candidate = data.candidates?.[0];
254
- const textPart = candidate?.content.parts.find(
255
- (p): p is { text: string } => "text" in p,
256
- );
257
- if (textPart) {
258
- text += textPart.text;
259
- }
260
- }
261
- }
301
+ const textPart = candidate.content.parts.find(
302
+ (p): p is { text: string } => "text" in p && typeof p.text === "string",
303
+ );
262
304
 
263
- return text;
264
- } catch {
265
- return "";
305
+ if (!textPart) {
306
+ throw new Error("No text in response");
266
307
  }
308
+
309
+ return textPart.text;
267
310
  }
268
311
 
269
312
  reset(): void {
270
- this.apiKey = null;
313
+ this.client = null;
271
314
  this.config = null;
272
315
  this.initialized = false;
273
316
  }
@@ -3,7 +3,12 @@
3
3
  * IAIProvider implementation for Google Gemini
4
4
  */
5
5
 
6
- import type { GeminiConfig, GeminiContent } from "../../domain/entities";
6
+ import type {
7
+ GeminiConfig,
8
+ GeminiContent,
9
+ GeminiImageInput,
10
+ GeminiImageGenerationResult,
11
+ } from "../../domain/entities";
7
12
  import { geminiClientService } from "./gemini-client.service";
8
13
 
9
14
  declare const __DEV__: boolean;
@@ -14,6 +19,8 @@ export interface AIProviderConfig {
14
19
  baseDelay?: number;
15
20
  maxDelay?: number;
16
21
  defaultTimeoutMs?: number;
22
+ /** Model used for image generation */
23
+ imageModel?: string;
17
24
  }
18
25
 
19
26
  export interface JobSubmission {
@@ -58,6 +65,7 @@ class GeminiProviderService {
58
65
  baseDelay: config.baseDelay,
59
66
  maxDelay: config.maxDelay,
60
67
  defaultTimeoutMs: config.defaultTimeoutMs,
68
+ imageModel: config.imageModel,
61
69
  };
62
70
 
63
71
  geminiClientService.initialize(geminiConfig);
@@ -134,9 +142,11 @@ class GeminiProviderService {
134
142
  options?: SubscribeOptions<T>,
135
143
  ): Promise<T> {
136
144
  options?.onQueueUpdate?.({ status: "IN_QUEUE" });
145
+ options?.onProgress?.(10);
137
146
 
138
147
  const result = await this.executeGeneration<T>(model, input);
139
148
 
149
+ options?.onProgress?.(100);
140
150
  options?.onQueueUpdate?.({ status: "COMPLETED" });
141
151
  options?.onResult?.(result);
142
152
 
@@ -150,6 +160,39 @@ class GeminiProviderService {
150
160
  return this.executeGeneration<T>(model, input);
151
161
  }
152
162
 
163
+ /**
164
+ * Generate image with optional input images
165
+ * Convenience method for image generation
166
+ */
167
+ async generateImage(
168
+ prompt: string,
169
+ images?: GeminiImageInput[],
170
+ ): Promise<GeminiImageGenerationResult> {
171
+ return geminiClientService.generateImage(prompt, images);
172
+ }
173
+
174
+ /**
175
+ * Generate content with images (multimodal)
176
+ */
177
+ async generateWithImages(
178
+ model: string,
179
+ prompt: string,
180
+ images: GeminiImageInput[],
181
+ ): Promise<{ text: string; response: unknown }> {
182
+ const response = await geminiClientService.generateWithImages(
183
+ model,
184
+ prompt,
185
+ images,
186
+ );
187
+
188
+ const text = response.candidates?.[0]?.content.parts
189
+ .filter((p): p is { text: string } => "text" in p)
190
+ .map((p) => p.text)
191
+ .join("") || "";
192
+
193
+ return { text, response };
194
+ }
195
+
153
196
  reset(): void {
154
197
  geminiClientService.reset();
155
198
  this.pendingJobs.clear();
@@ -182,6 +225,15 @@ class GeminiProviderService {
182
225
  model: string,
183
226
  input: Record<string, unknown>,
184
227
  ): Promise<T> {
228
+ // Check if this is an image generation request
229
+ if (input.generateImage === true || input.type === "image") {
230
+ const prompt = String(input.prompt || "");
231
+ const images = input.images as GeminiImageInput[] | undefined;
232
+ const result = await geminiClientService.generateImage(prompt, images);
233
+ return result as T;
234
+ }
235
+
236
+ // Regular content generation
185
237
  const contents = this.buildContents(input);
186
238
 
187
239
  const response = await geminiClientService.generateContent(
@@ -199,16 +251,24 @@ class GeminiProviderService {
199
251
  if (typeof input.prompt === "string") {
200
252
  const parts: GeminiContent["parts"] = [{ text: input.prompt }];
201
253
 
254
+ // Handle single image
202
255
  if (input.image_url && typeof input.image_url === "string") {
203
- const base64Match = input.image_url.match(
204
- /^data:([^;]+);base64,(.+)$/,
205
- );
256
+ const imageData = this.parseImageUrl(input.image_url);
257
+ if (imageData) {
258
+ parts.push({ inlineData: imageData });
259
+ }
260
+ }
206
261
 
207
- if (base64Match) {
262
+ // Handle multiple images
263
+ if (Array.isArray(input.images)) {
264
+ for (const img of input.images as GeminiImageInput[]) {
265
+ const base64Data = img.base64.includes(",")
266
+ ? img.base64.split(",")[1]
267
+ : img.base64;
208
268
  parts.push({
209
269
  inlineData: {
210
- mimeType: base64Match[1],
211
- data: base64Match[2],
270
+ mimeType: img.mimeType,
271
+ data: base64Data,
212
272
  },
213
273
  });
214
274
  }
@@ -224,27 +284,69 @@ class GeminiProviderService {
224
284
  return contents;
225
285
  }
226
286
 
287
+ private parseImageUrl(
288
+ imageUrl: string,
289
+ ): { mimeType: string; data: string } | null {
290
+ const base64Match = imageUrl.match(/^data:([^;]+);base64,(.+)$/);
291
+ if (base64Match) {
292
+ return {
293
+ mimeType: base64Match[1],
294
+ data: base64Match[2],
295
+ };
296
+ }
297
+ return null;
298
+ }
299
+
227
300
  private formatResponse<T>(
228
301
  response: unknown,
229
302
  input: Record<string, unknown>,
230
303
  ): T {
231
304
  const resp = response as {
232
305
  candidates?: Array<{
233
- content: { parts: Array<{ text?: string }> };
306
+ content: {
307
+ parts: Array<{
308
+ text?: string;
309
+ inlineData?: { mimeType: string; data: string };
310
+ }>;
311
+ };
234
312
  }>;
235
313
  };
236
314
 
237
- const text = resp.candidates?.[0]?.content.parts.find((p) => p.text)?.text;
315
+ const candidate = resp.candidates?.[0];
316
+ const parts = candidate?.content.parts || [];
317
+
318
+ // Extract text
319
+ const text = parts.find((p) => p.text)?.text;
320
+
321
+ // Extract image if present
322
+ const imagePart = parts.find((p) => p.inlineData);
323
+ const imageData = imagePart?.inlineData;
238
324
 
239
325
  if (input.outputFormat === "text") {
240
326
  return text as T;
241
327
  }
242
328
 
243
- return {
329
+ // Return full result with image data if available
330
+ const result: Record<string, unknown> = {
244
331
  text,
245
332
  response,
246
- } as T;
333
+ };
334
+
335
+ if (imageData) {
336
+ result.imageUrl = `data:${imageData.mimeType};base64,${imageData.data}`;
337
+ result.imageBase64 = imageData.data;
338
+ result.mimeType = imageData.mimeType;
339
+ }
340
+
341
+ return result as T;
247
342
  }
248
343
  }
249
344
 
250
345
  export const geminiProviderService = new GeminiProviderService();
346
+
347
+ /**
348
+ * Factory function to create a new Gemini provider instance
349
+ */
350
+ export function createGeminiProvider(): GeminiProviderService {
351
+ return new GeminiProviderService();
352
+ }
@@ -3,7 +3,10 @@
3
3
  */
4
4
 
5
5
  export { geminiClientService } from "./gemini-client.service";
6
- export { geminiProviderService } from "./gemini-provider.service";
6
+ export {
7
+ geminiProviderService,
8
+ createGeminiProvider,
9
+ } from "./gemini-provider.service";
7
10
  export type {
8
11
  AIProviderConfig,
9
12
  JobSubmission,
@@ -78,16 +78,22 @@ export function useGemini(options: UseGeminiOptions = {}): UseGeminiReturn {
78
78
 
79
79
  try {
80
80
  const model = options.model ?? "gemini-1.5-flash";
81
- const text = await geminiClientService.generateWithImage(
81
+ const response = await geminiClientService.generateWithImages(
82
82
  model,
83
83
  prompt,
84
- imageBase64,
85
- mimeType,
84
+ [{ base64: imageBase64, mimeType }],
86
85
  options.generationConfig,
87
86
  );
88
87
 
89
88
  if (abortRef.current) return;
90
89
 
90
+ // Extract text from response
91
+ const text =
92
+ response.candidates?.[0]?.content.parts
93
+ .filter((p): p is { text: string } => "text" in p)
94
+ .map((p) => p.text)
95
+ .join("") || "";
96
+
91
97
  setResult(text);
92
98
  options.onSuccess?.(text);
93
99
  } catch (err) {