@sschepis/robodev 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,579 @@
1
+ // AI Provider abstraction layer
2
+ // Handles provider detection, endpoint configuration, and request/response translation
3
+ // Supports: OpenAI, Google Gemini (via @google/genai SDK), and local (LMStudio, Ollama, etc.)
4
+
5
+ import { config } from '../config.mjs';
6
+
7
+ /**
8
+ * Supported AI providers
9
+ */
10
+ export const AI_PROVIDERS = {
11
+ LOCAL: 'local',
12
+ OPENAI: 'openai',
13
+ GEMINI: 'gemini',
14
+ };
15
+
16
+ /**
17
+ * Default endpoints for each provider
18
+ */
19
+ const PROVIDER_ENDPOINTS = {
20
+ [AI_PROVIDERS.LOCAL]: 'http://localhost:1234/v1/chat/completions',
21
+ [AI_PROVIDERS.OPENAI]: 'https://api.openai.com/v1/chat/completions',
22
+ // Gemini uses SDK, not REST endpoint — this is only a fallback
23
+ [AI_PROVIDERS.GEMINI]: null,
24
+ };
25
+
26
+ // Lazy-loaded Gemini SDK instance
27
+ let _geminiAI = null;
28
+
29
+ /**
30
+ * Get or create the Gemini SDK client
31
+ * @returns {Object} GoogleGenAI instance
32
+ */
33
+ async function getGeminiClient() {
34
+ if (_geminiAI) return _geminiAI;
35
+
36
+ const { GoogleGenAI } = await import('@google/genai');
37
+ _geminiAI = new GoogleGenAI({ apiKey: config.keys.google });
38
+ return _geminiAI;
39
+ }
40
+
41
+ /**
42
+ * Detect the AI provider from the model name
43
+ * @param {string} model - The model identifier
44
+ * @returns {string} The detected provider key
45
+ */
46
+ export function detectProvider(model) {
47
+ // Explicit provider override takes priority
48
+ const explicitProvider = config.ai.provider;
49
+ if (explicitProvider && Object.values(AI_PROVIDERS).includes(explicitProvider)) {
50
+ return explicitProvider;
51
+ }
52
+
53
+ if (!model) return AI_PROVIDERS.LOCAL;
54
+
55
+ const m = model.toLowerCase();
56
+
57
+ // Google Gemini models
58
+ if (m.startsWith('gemini-') || m.startsWith('models/gemini-')) {
59
+ return AI_PROVIDERS.GEMINI;
60
+ }
61
+
62
+ // OpenAI models
63
+ if (m.startsWith('gpt-') || m.startsWith('o1') || m.startsWith('o3') || m.startsWith('o4')) {
64
+ return AI_PROVIDERS.OPENAI;
65
+ }
66
+
67
+ // Default: local server (LMStudio, Ollama, etc.)
68
+ return AI_PROVIDERS.LOCAL;
69
+ }
70
+
71
+ /**
72
+ * Get the appropriate endpoint URL for a provider
73
+ * @param {string} provider - The provider key
74
+ * @returns {string|null} The endpoint URL (null for SDK-based providers)
75
+ */
76
+ export function getEndpoint(provider) {
77
+ // If user has explicitly set an endpoint, always use it
78
+ const configuredEndpoint = config.ai.endpoint;
79
+ if (configuredEndpoint && configuredEndpoint !== 'http://localhost:1234/v1/chat/completions') {
80
+ return configuredEndpoint;
81
+ }
82
+
83
+ return PROVIDER_ENDPOINTS[provider] || PROVIDER_ENDPOINTS[AI_PROVIDERS.LOCAL];
84
+ }
85
+
86
+ /**
87
+ * Get the appropriate authorization headers for a provider (REST-based only)
88
+ * @param {string} provider - The provider key
89
+ * @returns {Object} Headers object
90
+ */
91
+ export function getAuthHeaders(provider) {
92
+ switch (provider) {
93
+ case AI_PROVIDERS.OPENAI:
94
+ if (config.keys.openai) {
95
+ return { 'Authorization': `Bearer ${config.keys.openai}` };
96
+ }
97
+ return {};
98
+
99
+ case AI_PROVIDERS.LOCAL:
100
+ default:
101
+ // Local servers may still use an API key for compatibility
102
+ if (config.keys.openai) {
103
+ return { 'Authorization': `Bearer ${config.keys.openai}` };
104
+ }
105
+ return {};
106
+ }
107
+ }
108
+
109
+ // ─── OpenAI ↔ Gemini Format Translation ──────────────────────────────────
110
+
111
+ /**
112
+ * Convert OpenAI-format tools to Gemini functionDeclarations
113
+ * OpenAI: [{type: "function", function: {name, description, parameters}}]
114
+ * Gemini: [{functionDeclarations: [{name, description, parameters}]}]
115
+ */
116
+ function openaiToolsToGemini(tools) {
117
+ if (!tools || tools.length === 0) return undefined;
118
+
119
+ const declarations = tools
120
+ .filter(t => t.type === 'function' && t.function)
121
+ .map(t => {
122
+ const decl = {
123
+ name: t.function.name,
124
+ description: t.function.description || '',
125
+ parameters: sanitizeSchemaForGemini(t.function.parameters),
126
+ };
127
+ return decl;
128
+ });
129
+
130
+ return [{ functionDeclarations: declarations }];
131
+ }
132
+
133
+ /**
134
+ * Sanitize a JSON Schema to be compatible with Gemini's expectations.
135
+ * Gemini does not support some JSON Schema constructs that OpenAI does.
136
+ */
137
+ function sanitizeSchemaForGemini(schema) {
138
+ if (!schema) return undefined;
139
+
140
+ const clean = { ...schema };
141
+
142
+ // Remove unsupported keywords
143
+ delete clean.default;
144
+ delete clean.minimum;
145
+ delete clean.maximum;
146
+ delete clean.$schema;
147
+
148
+ // Gemini expects `required` only as an array of property names at the object level.
149
+ // Remove boolean `required` values on individual properties (OpenAI allows this).
150
+ // Also remove empty required arrays — Gemini may reject these.
151
+ if (typeof clean.required === 'boolean') {
152
+ delete clean.required;
153
+ } else if (Array.isArray(clean.required) && clean.required.length === 0) {
154
+ delete clean.required;
155
+ }
156
+
157
+ // Gemini doesn't support "any" type — convert to "string"
158
+ if (clean.type === 'any') {
159
+ clean.type = 'string';
160
+ }
161
+
162
+ // Recursively clean nested properties
163
+ if (clean.properties) {
164
+ const cleanProps = {};
165
+ for (const [key, value] of Object.entries(clean.properties)) {
166
+ cleanProps[key] = sanitizeSchemaForGemini(value);
167
+ }
168
+ clean.properties = cleanProps;
169
+ }
170
+
171
+ // Clean array items
172
+ if (clean.items) {
173
+ clean.items = sanitizeSchemaForGemini(clean.items);
174
+ }
175
+
176
+ return clean;
177
+ }
178
+
179
+ /**
180
+ * Convert OpenAI-format messages to Gemini contents + systemInstruction
181
+ * OpenAI: [{role: "system"|"user"|"assistant"|"tool", content, tool_calls, tool_call_id, name}]
182
+ * Gemini: {systemInstruction, contents: [{role: "user"|"model", parts: [...]}]}
183
+ */
184
+ function openaiMessagesToGemini(messages) {
185
+ let systemInstruction = undefined;
186
+ const contents = [];
187
+
188
+ for (const msg of messages) {
189
+ if (msg.role === 'system') {
190
+ // Extract system instruction (use the last one if multiple)
191
+ systemInstruction = msg.content;
192
+ continue;
193
+ }
194
+
195
+ if (msg.role === 'user') {
196
+ contents.push({
197
+ role: 'user',
198
+ parts: [{ text: msg.content }],
199
+ });
200
+ continue;
201
+ }
202
+
203
+ if (msg.role === 'assistant') {
204
+ // If we have preserved Gemini parts (from a previous round-trip),
205
+ // use them directly to preserve thought/thoughtSignature fields
206
+ if (msg._geminiParts && Array.isArray(msg._geminiParts)) {
207
+ contents.push({ role: 'model', parts: msg._geminiParts });
208
+ continue;
209
+ }
210
+
211
+ const parts = [];
212
+
213
+ // Text content
214
+ if (msg.content) {
215
+ parts.push({ text: msg.content });
216
+ }
217
+
218
+ // Tool calls → functionCall parts (with optional thoughtSignature)
219
+ if (msg.tool_calls && msg.tool_calls.length > 0) {
220
+ for (const tc of msg.tool_calls) {
221
+ let args;
222
+ try {
223
+ args = typeof tc.function.arguments === 'string'
224
+ ? JSON.parse(tc.function.arguments)
225
+ : tc.function.arguments;
226
+ } catch {
227
+ args = {};
228
+ }
229
+ const fcPart = {
230
+ functionCall: {
231
+ name: tc.function.name,
232
+ args: args,
233
+ },
234
+ };
235
+ // Preserve thoughtSignature if stored from prior Gemini response
236
+ if (tc._thoughtSignature) {
237
+ fcPart.thoughtSignature = tc._thoughtSignature;
238
+ }
239
+ parts.push(fcPart);
240
+ }
241
+ }
242
+
243
+ if (parts.length > 0) {
244
+ contents.push({ role: 'model', parts });
245
+ }
246
+ continue;
247
+ }
248
+
249
+ if (msg.role === 'tool') {
250
+ // Tool result → functionResponse part
251
+ let responseObj;
252
+ try {
253
+ responseObj = typeof msg.content === 'string'
254
+ ? { result: msg.content }
255
+ : msg.content;
256
+ } catch {
257
+ responseObj = { result: String(msg.content) };
258
+ }
259
+
260
+ contents.push({
261
+ role: 'user',
262
+ parts: [{
263
+ functionResponse: {
264
+ name: msg.name || 'unknown_tool',
265
+ response: responseObj,
266
+ },
267
+ }],
268
+ });
269
+ continue;
270
+ }
271
+ }
272
+
273
+ return { systemInstruction, contents };
274
+ }
275
+
276
+ /**
277
+ * Convert a Gemini generateContent response to OpenAI-compatible format
278
+ * so the rest of the app can process it uniformly.
279
+ *
280
+ * IMPORTANT: For thinking models (gemini-3-*), the response may include
281
+ * `thought` and `thoughtSignature` fields in parts. These MUST be preserved
282
+ * and sent back in subsequent turns for function calling to work.
283
+ * We store them as `_geminiParts` on the message object.
284
+ */
285
+ function geminiResponseToOpenai(geminiResponse) {
286
+ const candidate = geminiResponse.candidates?.[0];
287
+ if (!candidate) {
288
+ return { choices: [] };
289
+ }
290
+
291
+ const parts = candidate.content?.parts || [];
292
+ const message = { role: 'assistant' };
293
+
294
+ // Collect text parts (exclude thought parts from visible content)
295
+ const textParts = parts.filter(p => p.text && !p.thought).map(p => p.text);
296
+ if (textParts.length > 0) {
297
+ message.content = textParts.join('');
298
+ } else {
299
+ message.content = null;
300
+ }
301
+
302
+ // Collect function calls — preserve thoughtSignature for thinking models
303
+ const functionCalls = parts.filter(p => p.functionCall);
304
+ if (functionCalls.length > 0) {
305
+ message.tool_calls = functionCalls.map((p, i) => ({
306
+ id: `call_gemini_${Date.now()}_${i}`,
307
+ type: 'function',
308
+ function: {
309
+ name: p.functionCall.name,
310
+ arguments: JSON.stringify(p.functionCall.args || {}),
311
+ },
312
+ // Preserve Gemini-specific fields for round-tripping
313
+ _thoughtSignature: p.thoughtSignature || undefined,
314
+ }));
315
+ }
316
+
317
+ // Preserve the FULL original parts array for faithful reconstruction
318
+ // This ensures thought parts and thoughtSignatures survive the round-trip
319
+ message._geminiParts = parts;
320
+
321
+ return {
322
+ choices: [{
323
+ index: 0,
324
+ message,
325
+ finish_reason: candidate.finishReason || 'stop',
326
+ }],
327
+ usage: geminiResponse.usageMetadata ? {
328
+ prompt_tokens: geminiResponse.usageMetadata.promptTokenCount || 0,
329
+ completion_tokens: geminiResponse.usageMetadata.candidatesTokenCount || 0,
330
+ total_tokens: geminiResponse.usageMetadata.totalTokenCount || 0,
331
+ } : undefined,
332
+ };
333
+ }
334
+
335
+ // ─── Provider Context ────────────────────────────────────────────────────
336
+
337
+ /**
338
+ * Transform request body for provider-specific quirks (REST providers only)
339
+ * @param {string} provider - The provider key
340
+ * @param {Object} body - The OpenAI-compatible request body
341
+ * @returns {Object} The transformed request body
342
+ */
343
+ export function transformRequestBody(provider, body) {
344
+ const transformed = { ...body };
345
+
346
+ switch (provider) {
347
+ case AI_PROVIDERS.OPENAI:
348
+ // OpenAI doesn't support reasoning_effort for most models
349
+ // Keep it for models that might support it (o1, etc.)
350
+ break;
351
+
352
+ case AI_PROVIDERS.LOCAL:
353
+ default:
354
+ // Local servers (LMStudio) typically support all OpenAI params
355
+ break;
356
+ }
357
+
358
+ return transformed;
359
+ }
360
+
361
+ /**
362
+ * Create a fully configured provider context for making API calls
363
+ * @param {string} [model] - Optional model override; defaults to config.ai.model
364
+ * @returns {{ provider: string, endpoint: string|null, headers: Object, model: string }}
365
+ */
366
+ export function createProviderContext(model) {
367
+ const activeModel = model || config.ai.model;
368
+ const provider = detectProvider(activeModel);
369
+ const endpoint = getEndpoint(provider);
370
+ const authHeaders = provider !== AI_PROVIDERS.GEMINI ? getAuthHeaders(provider) : {};
371
+
372
+ return {
373
+ provider,
374
+ endpoint,
375
+ headers: {
376
+ 'Content-Type': 'application/json',
377
+ ...authHeaders,
378
+ },
379
+ model: activeModel,
380
+ };
381
+ }
382
+
383
+ // ─── Unified API Call Functions ──────────────────────────────────────────
384
+
385
+ /**
386
+ * Make an API call using the provider abstraction.
387
+ * For Gemini: uses @google/genai SDK with format translation
388
+ * For OpenAI/Local: uses REST fetch with OpenAI-compatible format
389
+ *
390
+ * @param {Object} requestBody - OpenAI-compatible request body (model, messages, tools, etc.)
391
+ * @param {Object} [options] - Optional overrides
392
+ * @param {string} [options.model] - Model override
393
+ * @returns {Promise<Object>} OpenAI-compatible parsed JSON response
394
+ */
395
+ export async function callProvider(requestBody, options = {}) {
396
+ const ctx = createProviderContext(options.model || requestBody.model);
397
+
398
+ // ── Gemini: use native SDK ──
399
+ if (ctx.provider === AI_PROVIDERS.GEMINI) {
400
+ return await callGeminiSDK(ctx, requestBody);
401
+ }
402
+
403
+ // ── OpenAI / Local: use REST fetch ──
404
+ return await callOpenAIREST(ctx, requestBody);
405
+ }
406
+
407
+ /**
408
+ * Make a streaming API call using the provider abstraction.
409
+ * For Gemini: falls back to non-streaming (SDK stream support can be added later)
410
+ * For OpenAI/Local: uses REST SSE streaming
411
+ *
412
+ * @param {Object} requestBody - OpenAI-compatible request body
413
+ * @param {Object} [options] - Optional overrides
414
+ * @returns {Promise<Response>} Raw fetch Response for streaming (or synthetic for Gemini)
415
+ */
416
+ export async function callProviderStream(requestBody, options = {}) {
417
+ const ctx = createProviderContext(options.model || requestBody.model);
418
+
419
+ // ── Gemini: use SDK (non-streaming, wrapped as synthetic stream) ──
420
+ if (ctx.provider === AI_PROVIDERS.GEMINI) {
421
+ return await callGeminiSDKStream(ctx, requestBody);
422
+ }
423
+
424
+ // ── OpenAI / Local: use REST SSE ──
425
+ const body = transformRequestBody(ctx.provider, { ...requestBody, stream: true });
426
+
427
+ const response = await fetch(ctx.endpoint, {
428
+ method: 'POST',
429
+ headers: ctx.headers,
430
+ body: JSON.stringify(body),
431
+ });
432
+
433
+ if (!response.ok) {
434
+ const providerLabel = ctx.provider === AI_PROVIDERS.LOCAL
435
+ ? 'Local AI server (is LMStudio running?)'
436
+ : `${ctx.provider} API`;
437
+ throw new Error(`${providerLabel} Error: ${response.status} - ${response.statusText}`);
438
+ }
439
+
440
+ return response;
441
+ }
442
+
443
+ // ─── Gemini SDK Calls ────────────────────────────────────────────────────
444
+
445
+ /**
446
+ * Call Gemini using the @google/genai SDK
447
+ */
448
+ async function callGeminiSDK(ctx, requestBody) {
449
+ const ai = await getGeminiClient();
450
+ const { systemInstruction, contents } = openaiMessagesToGemini(requestBody.messages);
451
+ const geminiTools = openaiToolsToGemini(requestBody.tools);
452
+
453
+ const generateConfig = {};
454
+ if (requestBody.temperature !== undefined) {
455
+ generateConfig.temperature = requestBody.temperature;
456
+ }
457
+ if (requestBody.max_tokens) {
458
+ generateConfig.maxOutputTokens = requestBody.max_tokens;
459
+ }
460
+
461
+ // Build tool config for auto tool choice
462
+ if (geminiTools) {
463
+ generateConfig.tools = geminiTools;
464
+ }
465
+
466
+ if (systemInstruction) {
467
+ generateConfig.systemInstruction = systemInstruction;
468
+ }
469
+
470
+ const geminiResponse = await ai.models.generateContent({
471
+ model: ctx.model,
472
+ contents: contents,
473
+ config: generateConfig,
474
+ });
475
+
476
+ // Translate response to OpenAI format
477
+ return geminiResponseToOpenai(geminiResponse);
478
+ }
479
+
480
+ /**
481
+ * Call Gemini streaming using the @google/genai SDK.
482
+ * Returns a synthetic Response object that mimics SSE streaming.
483
+ */
484
+ async function callGeminiSDKStream(ctx, requestBody) {
485
+ const ai = await getGeminiClient();
486
+ const { systemInstruction, contents } = openaiMessagesToGemini(requestBody.messages);
487
+ const geminiTools = openaiToolsToGemini(requestBody.tools);
488
+
489
+ const generateConfig = {};
490
+ if (requestBody.temperature !== undefined) {
491
+ generateConfig.temperature = requestBody.temperature;
492
+ }
493
+ if (geminiTools) {
494
+ generateConfig.tools = geminiTools;
495
+ }
496
+ if (systemInstruction) {
497
+ generateConfig.systemInstruction = systemInstruction;
498
+ }
499
+
500
+ // Use generateContentStream for real streaming
501
+ const streamResult = await ai.models.generateContentStream({
502
+ model: ctx.model,
503
+ contents: contents,
504
+ config: generateConfig,
505
+ });
506
+
507
+ // Create a ReadableStream that wraps the Gemini async iterator
508
+ const stream = new ReadableStream({
509
+ async start(controller) {
510
+ const encoder = new TextEncoder();
511
+ try {
512
+ for await (const chunk of streamResult) {
513
+ const text = chunk.text || '';
514
+ if (text) {
515
+ // Emit SSE-formatted data matching OpenAI streaming format
516
+ const sseData = JSON.stringify({
517
+ choices: [{
518
+ delta: { content: text },
519
+ index: 0,
520
+ }],
521
+ });
522
+ controller.enqueue(encoder.encode(`data: ${sseData}\n\n`));
523
+ }
524
+ }
525
+ controller.enqueue(encoder.encode('data: [DONE]\n\n'));
526
+ controller.close();
527
+ } catch (err) {
528
+ controller.error(err);
529
+ }
530
+ },
531
+ });
532
+
533
+ // Return a synthetic Response object
534
+ return new Response(stream, {
535
+ status: 200,
536
+ headers: { 'Content-Type': 'text/event-stream' },
537
+ });
538
+ }
539
+
540
+ // ─── OpenAI/Local REST Calls ─────────────────────────────────────────────
541
+
542
+ /**
543
+ * Call OpenAI or local server using REST
544
+ */
545
+ async function callOpenAIREST(ctx, requestBody) {
546
+ const body = transformRequestBody(ctx.provider, requestBody);
547
+
548
+ const response = await fetch(ctx.endpoint, {
549
+ method: 'POST',
550
+ headers: ctx.headers,
551
+ body: JSON.stringify(body),
552
+ });
553
+
554
+ if (!response.ok) {
555
+ const providerLabel = ctx.provider === AI_PROVIDERS.LOCAL
556
+ ? 'Local AI server (is LMStudio running?)'
557
+ : `${ctx.provider} API`;
558
+ throw new Error(`${providerLabel} Error: ${response.status} - ${response.statusText}`);
559
+ }
560
+
561
+ return response.json();
562
+ }
563
+
564
+ // ─── Utility Functions ───────────────────────────────────────────────────
565
+
566
+ /**
567
+ * Get a human-readable label for the current provider setup
568
+ * @param {string} [model] - Optional model override
569
+ * @returns {string} Description like "Gemini (gemini-2.0-flash)"
570
+ */
571
+ export function getProviderLabel(model) {
572
+ const ctx = createProviderContext(model);
573
+ const labels = {
574
+ [AI_PROVIDERS.LOCAL]: 'Local',
575
+ [AI_PROVIDERS.OPENAI]: 'OpenAI',
576
+ [AI_PROVIDERS.GEMINI]: 'Gemini',
577
+ };
578
+ return `${labels[ctx.provider] || ctx.provider} (${ctx.model})`;
579
+ }