banana-code 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +246 -0
  3. package/banana.js +5464 -0
  4. package/lib/agenticRunner.js +1884 -0
  5. package/lib/borderRenderer.js +41 -0
  6. package/lib/commandRunner.js +205 -0
  7. package/lib/completer.js +286 -0
  8. package/lib/config.js +301 -0
  9. package/lib/contextBuilder.js +324 -0
  10. package/lib/diffViewer.js +295 -0
  11. package/lib/fileManager.js +224 -0
  12. package/lib/historyManager.js +124 -0
  13. package/lib/hookManager.js +1143 -0
  14. package/lib/imageHandler.js +268 -0
  15. package/lib/inlineComplete.js +192 -0
  16. package/lib/interactivePicker.js +254 -0
  17. package/lib/lmStudio.js +226 -0
  18. package/lib/markdownRenderer.js +423 -0
  19. package/lib/mcpClient.js +288 -0
  20. package/lib/modelRegistry.js +350 -0
  21. package/lib/monkeyModels.js +97 -0
  22. package/lib/oauthOpenAI.js +167 -0
  23. package/lib/parser.js +134 -0
  24. package/lib/promptManager.js +96 -0
  25. package/lib/providerClients.js +1014 -0
  26. package/lib/providerManager.js +130 -0
  27. package/lib/providerStore.js +413 -0
  28. package/lib/statusBar.js +283 -0
  29. package/lib/streamHandler.js +306 -0
  30. package/lib/subAgentManager.js +406 -0
  31. package/lib/tokenCounter.js +132 -0
  32. package/lib/visionAnalyzer.js +163 -0
  33. package/lib/watcher.js +138 -0
  34. package/models.json +57 -0
  35. package/package.json +42 -0
  36. package/prompts/base.md +23 -0
  37. package/prompts/code-agent-glm.md +16 -0
  38. package/prompts/code-agent-gptoss.md +25 -0
  39. package/prompts/code-agent-nemotron.md +17 -0
  40. package/prompts/code-agent-qwen.md +20 -0
  41. package/prompts/code-agent.md +70 -0
  42. package/prompts/plan.md +44 -0
@@ -0,0 +1,406 @@
1
+ /**
2
+ * Sub-Agent Manager for Banana Code
3
+ * Orchestrates sub-agent lifecycle: spawn, track, cancel.
4
+ * Any model on any provider can spawn sub-agents on any other provider/model.
5
+ */
6
+
7
+ const { AgenticRunner, TOOLS, READ_ONLY_TOOLS } = require('./agenticRunner');
8
+ const crypto = require('crypto');
9
+
10
+ const MAX_DEPTH = 2;
11
+ const DEFAULT_MAX_ITERATIONS_SUBAGENT = 25;
12
+ const DEFAULT_TIMEOUT_CLOUD_MS = 120_000;
13
+ const DEFAULT_TIMEOUT_LOCAL_MS = 300_000;
14
+ const DEFAULT_MAX_TOKENS_SUBAGENT = 16384;
15
+ const MAX_CONCURRENT_CLOUD = 5;
16
+
17
+ const SUB_AGENT_DIRECTIVE = [
18
+ 'You are a sub-agent spawned to perform a specific task. Focus exclusively on the task.',
19
+ 'You have full tool access. Complete the task and report findings concisely.',
20
+ 'Do not ask follow-up questions. Do not suggest next steps. Just do the work and report.'
21
+ ].join('\n');
22
+
23
+ class SubAgentManager {
24
+ constructor(options = {}) {
25
+ this.providerManager = options.providerManager;
26
+ this.modelRegistry = options.modelRegistry;
27
+ this.promptManager = options.promptManager;
28
+ this.config = options.config;
29
+ this.projectDir = options.projectDir || process.cwd();
30
+
31
+ // Callbacks
32
+ this.onAgentStart = options.onAgentStart || (() => {});
33
+ this.onAgentToolCall = options.onAgentToolCall || (() => {});
34
+ this.onAgentToolResult = options.onAgentToolResult || (() => {});
35
+ this.onAgentComplete = options.onAgentComplete || (() => {});
36
+ this.onAgentError = options.onAgentError || (() => {});
37
+
38
+ // Active agents map: id -> SubAgent record
39
+ this.agents = new Map();
40
+ }
41
+
42
+ /**
43
+ * Spawn a sub-agent on a given model/provider.
44
+ *
45
+ * @param {string} modelKey - "provider:alias" format, e.g. "anthropic:claude-sonnet-4.6", "local:current"
46
+ * @param {string} task - Task description
47
+ * @param {Object} options
48
+ * @param {string} options.context - Optional context string
49
+ * @param {string[]} options.files - File paths to pre-read and inject
50
+ * @param {boolean} options.readOnly - Restrict to read-only tools
51
+ * @param {number} options.maxIterations - Default 25
52
+ * @param {number} options.timeout - Timeout in ms
53
+ * @param {number} options.depth - Recursion depth (0 = main, 1 = first sub-agent)
54
+ * @param {AbortSignal} options.signal - Parent abort signal
55
+ * @returns {Object} - { id, result, error, tokens, toolCalls, elapsed }
56
+ */
57
+ async spawn(modelKey, task, options = {}) {
58
+ const depth = options.depth || 1;
59
+
60
+ // Resolve model - fall back to current model if requested model not found
61
+ let resolved = this.modelRegistry.resolveModelKey(modelKey);
62
+ if (!resolved) {
63
+ resolved = this.modelRegistry.getCurrentModel();
64
+ if (!resolved) {
65
+ return {
66
+ error: `Could not resolve model "${modelKey}" and no current model available. Use /agent models to see available models.`,
67
+ tokens: 0,
68
+ toolCalls: 0
69
+ };
70
+ }
71
+ }
72
+
73
+ const id = crypto.randomBytes(4).toString('hex');
74
+ const isLocal = (resolved.provider || 'local') === 'local';
75
+ const timeout = options.timeout || (isLocal ? DEFAULT_TIMEOUT_LOCAL_MS : DEFAULT_TIMEOUT_CLOUD_MS);
76
+ const maxIterations = options.maxIterations || DEFAULT_MAX_ITERATIONS_SUBAGENT;
77
+
78
+ const agent = {
79
+ id,
80
+ model: resolved.key,
81
+ provider: resolved.provider || 'local',
82
+ providerModelId: resolved.providerModelId || resolved.id,
83
+ task,
84
+ status: 'running',
85
+ result: null,
86
+ error: null,
87
+ startedAt: Date.now(),
88
+ completedAt: null,
89
+ toolCalls: [],
90
+ tokens: { prompt: 0, completion: 0, total: 0 },
91
+ depth,
92
+ parentId: options.parentId || null
93
+ };
94
+
95
+ this.agents.set(id, agent);
96
+ this.onAgentStart(agent);
97
+
98
+ try {
99
+ // Get provider client for this model
100
+ const client = await this.providerManager.getClientForModel(resolved);
101
+
102
+ // Build system prompt
103
+ const basePrompt = this._buildSystemPrompt(resolved);
104
+ const systemContent = this._assembleSystemPrompt(basePrompt, task, options, resolved);
105
+
106
+ // Build messages
107
+ const messages = [
108
+ { role: 'system', content: systemContent }
109
+ ];
110
+
111
+ // Pre-read files if requested
112
+ if (options.files && options.files.length > 0) {
113
+ const fileContents = this._readFiles(options.files);
114
+ if (fileContents) {
115
+ messages.push({ role: 'user', content: `Here are the relevant files:\n\n${fileContents}` });
116
+ }
117
+ }
118
+
119
+ // Main task message
120
+ messages.push({ role: 'user', content: task });
121
+
122
+ // Create runner
123
+ const runner = new AgenticRunner(client, {
124
+ onToolCall: (tool, args) => {
125
+ agent.toolCalls.push({ tool, args, timestamp: Date.now() });
126
+ this.onAgentToolCall(agent, tool, args);
127
+ },
128
+ onToolResult: (tool, success, result) => {
129
+ this.onAgentToolResult(agent, tool, success, result);
130
+ },
131
+ onToken: () => {}, // sub-agents don't stream to terminal
132
+ onContent: () => {},
133
+ onReasoning: () => {},
134
+ onWarning: () => {},
135
+ onIntermediateContent: () => {}
136
+ });
137
+
138
+ // Set up abort with timeout
139
+ const abortController = new AbortController();
140
+ agent.abortController = abortController;
141
+ const timeoutId = setTimeout(() => abortController.abort(), timeout);
142
+
143
+ // Forward parent signal
144
+ if (options.signal) {
145
+ if (options.signal.aborted) {
146
+ abortController.abort();
147
+ } else {
148
+ options.signal.addEventListener('abort', () => abortController.abort(), { once: true });
149
+ }
150
+ }
151
+
152
+ // Build tool list and custom executors
153
+ const baseTools = options.readOnly ? READ_ONLY_TOOLS : TOOLS;
154
+ let customTools = [];
155
+ let customToolExecutors = new Map();
156
+ if (depth < MAX_DEPTH) {
157
+ customTools = [SPAWN_AGENT_TOOL_DEF];
158
+ customToolExecutors.set('spawn_agent', async (args, opts) => {
159
+ return await this.spawn(args.model, args.task, {
160
+ context: args.context,
161
+ readOnly: args.read_only,
162
+ depth: depth + 1,
163
+ parentId: id,
164
+ signal: opts.signal
165
+ });
166
+ });
167
+ }
168
+
169
+ try {
170
+ const result = await runner.run(messages, this.projectDir, {
171
+ model: resolved.providerModelId || resolved.id,
172
+ temperature: resolved.inferenceSettings?.temperature ?? 0.5,
173
+ topP: resolved.inferenceSettings?.topP,
174
+ reasoningEffort: resolved.reasoningEffort,
175
+ maxTokens: DEFAULT_MAX_TOKENS_SUBAGENT,
176
+ tools: [...baseTools, ...customTools],
177
+ readOnly: options.readOnly || false,
178
+ signal: abortController.signal,
179
+ customToolExecutors,
180
+ customTools
181
+ });
182
+
183
+ clearTimeout(timeoutId);
184
+
185
+ agent.status = 'completed';
186
+ agent.result = result;
187
+ agent.completedAt = Date.now();
188
+ agent.tokens = {
189
+ prompt: runner.totalPromptTokens,
190
+ completion: runner.totalCompletionTokens,
191
+ total: runner.totalTokens
192
+ };
193
+
194
+ this.onAgentComplete(agent);
195
+
196
+ return {
197
+ id,
198
+ result: result || '(No response from sub-agent)',
199
+ tokens: agent.tokens.total,
200
+ toolCalls: agent.toolCalls.length,
201
+ elapsed: agent.completedAt - agent.startedAt,
202
+ model: resolved.key,
203
+ provider: resolved.provider
204
+ };
205
+
206
+ } catch (err) {
207
+ clearTimeout(timeoutId);
208
+ throw err;
209
+ }
210
+
211
+ } catch (err) {
212
+ agent.status = 'failed';
213
+ agent.error = err.message || String(err);
214
+ agent.completedAt = Date.now();
215
+ this.onAgentError(agent, err);
216
+
217
+ return {
218
+ id,
219
+ error: `Sub-agent failed: ${agent.error}`,
220
+ tokens: agent.tokens?.total || 0,
221
+ toolCalls: agent.toolCalls?.length || 0,
222
+ elapsed: (agent.completedAt || Date.now()) - agent.startedAt,
223
+ model: modelKey,
224
+ provider: agent.provider
225
+ };
226
+ }
227
+ }
228
+
229
+ /**
230
+ * Spawn multiple agents. Cloud agents run concurrently (up to MAX_CONCURRENT_CLOUD),
231
+ * local agents run sequentially.
232
+ */
233
+ async spawnParallel(specs, options = {}) {
234
+ const cloudSpecs = [];
235
+ const localSpecs = [];
236
+
237
+ for (const spec of specs) {
238
+ const resolved = this.modelRegistry.resolveModelKey(spec.model);
239
+ if (resolved && (resolved.provider || 'local') === 'local') {
240
+ localSpecs.push(spec);
241
+ } else {
242
+ cloudSpecs.push(spec);
243
+ }
244
+ }
245
+
246
+ const results = [];
247
+
248
+ // Cloud agents: run concurrently in batches
249
+ if (cloudSpecs.length > 0) {
250
+ for (let i = 0; i < cloudSpecs.length; i += MAX_CONCURRENT_CLOUD) {
251
+ const batch = cloudSpecs.slice(i, i + MAX_CONCURRENT_CLOUD);
252
+ const batchResults = await Promise.all(
253
+ batch.map(spec => this.spawn(spec.model, spec.task, {
254
+ ...options,
255
+ context: spec.context,
256
+ readOnly: spec.read_only,
257
+ files: spec.files
258
+ }))
259
+ );
260
+ results.push(...batchResults);
261
+ }
262
+ }
263
+
264
+ // Local agents: sequential
265
+ for (const spec of localSpecs) {
266
+ const result = await this.spawn(spec.model, spec.task, {
267
+ ...options,
268
+ context: spec.context,
269
+ readOnly: spec.read_only,
270
+ files: spec.files
271
+ });
272
+ results.push(result);
273
+ }
274
+
275
+ return results;
276
+ }
277
+
278
+ /**
279
+ * Get a specific agent record.
280
+ */
281
+ getAgent(id) {
282
+ return this.agents.get(id) || null;
283
+ }
284
+
285
+ /**
286
+ * List all agents in this session.
287
+ */
288
+ listAgents() {
289
+ return Array.from(this.agents.values());
290
+ }
291
+
292
+ /**
293
+ * Cancel a running agent (best-effort via AbortController).
294
+ */
295
+ cancel(id) {
296
+ const agent = this.agents.get(id);
297
+ if (!agent || agent.status !== 'running') return false;
298
+ agent.status = 'cancelled';
299
+ agent.completedAt = Date.now();
300
+ // Signal the abort controller to actually stop the runner
301
+ if (agent.abortController) {
302
+ agent.abortController.abort();
303
+ }
304
+ return true;
305
+ }
306
+
307
+ // ─── Private Helpers ─────────────────────────────────────────────────────
308
+
309
+ _buildSystemPrompt(modelMeta) {
310
+ // Use code-agent prompt as base, fall back to whatever is available
311
+ const promptName = modelMeta.prompt || 'code-agent';
312
+ if (this.promptManager && this.promptManager.has(promptName)) {
313
+ return this.promptManager.get(promptName);
314
+ }
315
+ if (this.promptManager && this.promptManager.has('code-agent')) {
316
+ return this.promptManager.get('code-agent');
317
+ }
318
+ return '';
319
+ }
320
+
321
+ _assembleSystemPrompt(basePrompt, task, options, modelMeta) {
322
+ let prompt = basePrompt;
323
+ prompt += `\n\n${SUB_AGENT_DIRECTIVE}`;
324
+
325
+ // Add project instructions
326
+ if (this.config) {
327
+ const instructions = this.config.getInstructions();
328
+ if (instructions) {
329
+ prompt += `\n\n## Project Instructions (from ${instructions.source})\n\n${instructions.content}`;
330
+ }
331
+ }
332
+
333
+ // Add context
334
+ if (options.context) {
335
+ prompt += `\n\n## Context from Parent Agent\n\n${options.context}`;
336
+ }
337
+
338
+ // Context budget: trim if exceeding ~50% of model's context limit
339
+ const contextLimit = modelMeta.contextLimit || 32768;
340
+ const maxPromptChars = Math.floor(contextLimit * 2); // rough char-to-token ratio
341
+ if (prompt.length > maxPromptChars) {
342
+ prompt = prompt.slice(0, maxPromptChars) + '\n\n[...context trimmed to fit model context window]';
343
+ }
344
+
345
+ return prompt;
346
+ }
347
+
348
+ _readFiles(filePaths) {
349
+ const fs = require('fs');
350
+ const path = require('path');
351
+ const parts = [];
352
+
353
+ for (const filePath of filePaths) {
354
+ try {
355
+ const fullPath = path.resolve(this.projectDir, filePath);
356
+ if (fs.existsSync(fullPath) && fs.statSync(fullPath).isFile()) {
357
+ const content = fs.readFileSync(fullPath, 'utf-8');
358
+ // Limit each file to 10k chars
359
+ const trimmed = content.length > 10000
360
+ ? content.slice(0, 10000) + '\n[...truncated]'
361
+ : content;
362
+ parts.push(`### ${filePath}\n\`\`\`\n${trimmed}\n\`\`\``);
363
+ }
364
+ } catch {
365
+ // Skip unreadable files
366
+ }
367
+ }
368
+
369
+ return parts.length > 0 ? parts.join('\n\n') : null;
370
+ }
371
+ }
372
+
373
+ /**
374
+ * Tool definition for spawn_agent, exported for use by banana.js
375
+ */
376
+ const SPAWN_AGENT_TOOL_DEF = {
377
+ type: 'function',
378
+ function: {
379
+ name: 'spawn_agent',
380
+ description: 'Spawn a sub-agent on a different model. Use for code review, parallel research, complex reasoning, or delegating to specialized models. Use "local:current" to spawn on the same model you are running on.',
381
+ parameters: {
382
+ type: 'object',
383
+ properties: {
384
+ model: {
385
+ type: 'string',
386
+ description: 'Model in "provider:alias" format. Use "local:current" for the same model (recommended default). Cloud examples: "anthropic:claude-sonnet-4-6", "openai:gpt-4o"'
387
+ },
388
+ task: {
389
+ type: 'string',
390
+ description: 'Clear task description for the sub-agent'
391
+ },
392
+ context: {
393
+ type: 'string',
394
+ description: 'Optional context (relevant code, findings, constraints)'
395
+ },
396
+ read_only: {
397
+ type: 'boolean',
398
+ description: 'Restrict to read-only tools. Default: false'
399
+ }
400
+ },
401
+ required: ['model', 'task']
402
+ }
403
+ }
404
+ };
405
+
406
+ module.exports = { SubAgentManager, SPAWN_AGENT_TOOL_DEF, MAX_DEPTH };
@@ -0,0 +1,132 @@
1
+ /**
2
+ * Token counting and cost estimation for Banana Code
3
+ */
4
+
5
+ class TokenCounter {
6
+ constructor(config) {
7
+ this.config = config;
8
+ this.sessionTokens = {
9
+ input: 0,
10
+ output: 0
11
+ };
12
+
13
+ // Rough cost estimates per 1M tokens (adjust based on your model)
14
+ this.costs = {
15
+ local: { input: 0, output: 0 }, // Free for local models
16
+ 'gpt-4': { input: 30, output: 60 },
17
+ 'gpt-3.5-turbo': { input: 0.5, output: 1.5 },
18
+ 'claude-3-opus': { input: 15, output: 75 },
19
+ 'claude-3-sonnet': { input: 3, output: 15 }
20
+ };
21
+ }
22
+
23
+ // Estimate tokens from text (rough approximation)
24
+ // More accurate would be to use tiktoken, but this works for estimates
25
+ estimateTokens(text) {
26
+ if (!text) return 0;
27
+
28
+ // Average English: ~4 chars per token
29
+ // Code tends to be more tokenized: ~3.5 chars per token
30
+ const charCount = text.length;
31
+
32
+ // Detect if it's mostly code
33
+ const codeIndicators = ['{', '}', '(', ')', ';', '=>', 'function', 'const', 'import'];
34
+ const isCode = codeIndicators.some(indicator => text.includes(indicator));
35
+
36
+ const charsPerToken = isCode ? 3.5 : 4;
37
+ return Math.ceil(charCount / charsPerToken);
38
+ }
39
+
40
+ // Track tokens for a request/response
41
+ trackUsage(inputText, outputText) {
42
+ const inputTokens = this.estimateTokens(inputText);
43
+ const outputTokens = this.estimateTokens(outputText);
44
+
45
+ this.sessionTokens.input += inputTokens;
46
+ this.sessionTokens.output += outputTokens;
47
+
48
+ return { inputTokens, outputTokens };
49
+ }
50
+
51
+ // Track exact API-reported usage when available
52
+ addUsage(inputTokens = 0, outputTokens = 0) {
53
+ const inTokens = Math.max(0, Number(inputTokens) || 0);
54
+ const outTokens = Math.max(0, Number(outputTokens) || 0);
55
+ this.sessionTokens.input += inTokens;
56
+ this.sessionTokens.output += outTokens;
57
+ return { inputTokens: inTokens, outputTokens: outTokens };
58
+ }
59
+
60
+ // Get session totals
61
+ getSessionUsage() {
62
+ return {
63
+ input: this.sessionTokens.input,
64
+ output: this.sessionTokens.output,
65
+ total: this.sessionTokens.input + this.sessionTokens.output
66
+ };
67
+ }
68
+
69
+ // Reset session tracking
70
+ resetSession() {
71
+ this.sessionTokens = { input: 0, output: 0 };
72
+ }
73
+
74
+ // Estimate cost (for non-local models)
75
+ estimateCost(model = 'local') {
76
+ const rates = this.costs[model] || this.costs.local;
77
+ const inputCost = (this.sessionTokens.input / 1000000) * rates.input;
78
+ const outputCost = (this.sessionTokens.output / 1000000) * rates.output;
79
+
80
+ return {
81
+ input: inputCost,
82
+ output: outputCost,
83
+ total: inputCost + outputCost,
84
+ formatted: `$${(inputCost + outputCost).toFixed(4)}`
85
+ };
86
+ }
87
+
88
+ // Check if we're approaching token limit
89
+ checkLimit(additionalTokens = 0) {
90
+ const maxTokens = this.config.get('maxTokens') || 32000;
91
+ const threshold = this.config.get('tokenWarningThreshold') || 0.8;
92
+ const current = this.sessionTokens.input + this.sessionTokens.output + additionalTokens;
93
+
94
+ const percentage = current / maxTokens;
95
+
96
+ return {
97
+ current,
98
+ max: maxTokens,
99
+ percentage,
100
+ isWarning: percentage >= threshold,
101
+ isExceeded: percentage >= 1,
102
+ remaining: maxTokens - current
103
+ };
104
+ }
105
+
106
+ // Format token count for display
107
+ formatCount(count) {
108
+ if (count >= 1000000) {
109
+ return `${(count / 1000000).toFixed(2)}M`;
110
+ } else if (count >= 1000) {
111
+ return `${(count / 1000).toFixed(1)}K`;
112
+ }
113
+ return count.toString();
114
+ }
115
+
116
+ // Get a nice summary string
117
+ getSummary() {
118
+ const usage = this.getSessionUsage();
119
+ const limit = this.checkLimit();
120
+
121
+ let summary = `Tokens: ${this.formatCount(usage.total)} `;
122
+ summary += `(${this.formatCount(usage.input)} in / ${this.formatCount(usage.output)} out)`;
123
+
124
+ if (limit.isWarning) {
125
+ summary += ` ⚠️ ${Math.round(limit.percentage * 100)}% of limit`;
126
+ }
127
+
128
+ return summary;
129
+ }
130
+ }
131
+
132
+ module.exports = TokenCounter;
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Vision Analyzer for Banana Code
3
+ *
4
+ * Primary: Send images directly to local vision models (Qwen3 VL) via LM Studio
5
+ * Fallback: Use Gemini API to analyze images when non-vision model is loaded
6
+ */
7
+
8
+ const fs = require('fs');
9
+ const path = require('path');
10
+
11
+ class VisionAnalyzer {
12
+ constructor(options = {}) {
13
+ // Gemini API endpoint - fallback for non-vision models
14
+ this.apiKey = options.apiKey || process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY;
15
+ this.apiUrl = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent';
16
+ this.geminiEnabled = !!this.apiKey;
17
+ }
18
+
19
+ /**
20
+ * Set the active provider so vision checks work correctly.
21
+ * Monkey Models provider handles vision server-side via Gemini proxy.
22
+ */
23
+ setActiveProvider(provider) {
24
+ this.activeProvider = provider;
25
+ }
26
+
27
+ /**
28
+ * Check if any vision capability is available.
29
+ * Monkey Models handles vision server-side, so it's always available.
30
+ */
31
+ isEnabled() {
32
+ if (this.activeProvider === 'monkey') return true;
33
+ return this.geminiEnabled;
34
+ }
35
+
36
+ /**
37
+ * Build a multimodal message for LM Studio (OpenAI-compatible format)
38
+ * Use this when the current model supports vision natively.
39
+ * @param {string} text - The user's message text
40
+ * @param {Array} images - Array of {base64, mimeType, path} objects
41
+ * @returns {Object} - A message object with multimodal content array
42
+ */
43
+ buildMultimodalMessage(text, images) {
44
+ const content = [
45
+ { type: 'text', text }
46
+ ];
47
+ for (const img of images) {
48
+ content.push({
49
+ type: 'image_url',
50
+ image_url: {
51
+ url: `data:${img.mimeType};base64,${img.base64}`
52
+ }
53
+ });
54
+ }
55
+ return { role: 'user', content };
56
+ }
57
+
58
+ /**
59
+ * Analyze an image via Gemini API (fallback when local model isn't vision-capable)
60
+ * @param {Object} imageData - Image data with base64 and mimeType
61
+ * @param {string} context - Optional context about what user wants to know
62
+ * @returns {Promise<string>} - Analysis text
63
+ */
64
+ async analyzeImage(imageData, context = '') {
65
+ if (!this.geminiEnabled) {
66
+ return null;
67
+ }
68
+
69
+ const prompt = context
70
+ ? `Analyze this image in detail. The user wants to know: "${context}"\n\nProvide a comprehensive description including:\n1. What type of content this is (UI screenshot, code, diagram, photo, etc.)\n2. All visible text, labels, and content\n3. Layout and structure\n4. Colors, styling, and design elements\n5. Any issues, errors, or notable observations\n6. Specific details relevant to the user's question`
71
+ : `Analyze this image in comprehensive detail for a software developer. Include:\n1. What type of content this is (UI screenshot, code, diagram, error message, etc.)\n2. All visible text, labels, buttons, and content (transcribe exactly)\n3. Layout structure and hierarchy\n4. Colors, styling, fonts, spacing\n5. Any visible errors, warnings, or issues\n6. Technical observations (framework hints, patterns, accessibility concerns)\n7. What a developer would need to know to work with or fix this`;
72
+
73
+ try {
74
+ const response = await fetch(`${this.apiUrl}?key=${this.apiKey}`, {
75
+ method: 'POST',
76
+ headers: { 'Content-Type': 'application/json' },
77
+ body: JSON.stringify({
78
+ contents: [{
79
+ parts: [
80
+ { text: prompt },
81
+ {
82
+ inline_data: {
83
+ mime_type: imageData.mimeType,
84
+ data: imageData.base64
85
+ }
86
+ }
87
+ ]
88
+ }],
89
+ generationConfig: {
90
+ temperature: 0.2,
91
+ maxOutputTokens: 2048
92
+ }
93
+ })
94
+ });
95
+
96
+ if (!response.ok) {
97
+ const error = await response.text();
98
+ console.error('Vision API error:', error);
99
+ return null;
100
+ }
101
+
102
+ const data = await response.json();
103
+ const analysis = data.candidates?.[0]?.content?.parts?.[0]?.text;
104
+
105
+ return analysis || null;
106
+ } catch (error) {
107
+ console.error('Vision analysis failed:', error.message);
108
+ return null;
109
+ }
110
+ }
111
+
112
+ /**
113
+ * Analyze multiple images via Gemini (fallback)
114
+ * @param {Array} images - Array of image data objects
115
+ * @param {string} context - User's question/context
116
+ * @returns {Promise<string>} - Combined analysis
117
+ */
118
+ async analyzeImages(images, context = '') {
119
+ if (!this.geminiEnabled || images.length === 0) {
120
+ return null;
121
+ }
122
+
123
+ const analyses = [];
124
+
125
+ for (let i = 0; i < images.length; i++) {
126
+ const img = images[i];
127
+ const imgContext = images.length > 1
128
+ ? `Image ${i + 1} of ${images.length}. ${context}`
129
+ : context;
130
+
131
+ const analysis = await this.analyzeImage(img, imgContext);
132
+ if (analysis) {
133
+ analyses.push(`**[Image ${i + 1}: ${img.path || 'uploaded'}]**\n${analysis}`);
134
+ }
135
+ }
136
+
137
+ if (analyses.length === 0) {
138
+ return null;
139
+ }
140
+
141
+ return analyses.join('\n\n---\n\n');
142
+ }
143
+
144
+ /**
145
+ * Format analysis for inclusion in prompt
146
+ * @param {string} analysis - The image analysis
147
+ * @returns {string} - Formatted for prompt injection
148
+ */
149
+ formatForPrompt(analysis) {
150
+ if (!analysis) return '';
151
+
152
+ return `
153
+ <image_analysis>
154
+ The following is an AI-generated analysis of the attached image(s). Use this information to understand what the user is showing you:
155
+
156
+ ${analysis}
157
+ </image_analysis>
158
+
159
+ `;
160
+ }
161
+ }
162
+
163
+ module.exports = VisionAnalyzer;