copilot-liku-cli 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/ARCHITECTURE.md +411 -0
  2. package/CONFIGURATION.md +302 -0
  3. package/CONTRIBUTING.md +225 -0
  4. package/ELECTRON_README.md +121 -0
  5. package/INSTALLATION.md +350 -0
  6. package/LICENSE.md +1 -0
  7. package/PROJECT_STATUS.md +229 -0
  8. package/QUICKSTART.md +255 -0
  9. package/README.md +167 -0
  10. package/TESTING.md +274 -0
  11. package/package.json +61 -0
  12. package/scripts/start.js +30 -0
  13. package/src/assets/tray-icon.png +0 -0
  14. package/src/cli/commands/agent.js +327 -0
  15. package/src/cli/commands/click.js +108 -0
  16. package/src/cli/commands/drag.js +85 -0
  17. package/src/cli/commands/find.js +109 -0
  18. package/src/cli/commands/keys.js +132 -0
  19. package/src/cli/commands/mouse.js +79 -0
  20. package/src/cli/commands/repl.js +290 -0
  21. package/src/cli/commands/screenshot.js +72 -0
  22. package/src/cli/commands/scroll.js +74 -0
  23. package/src/cli/commands/start.js +67 -0
  24. package/src/cli/commands/type.js +57 -0
  25. package/src/cli/commands/wait.js +84 -0
  26. package/src/cli/commands/window.js +104 -0
  27. package/src/cli/liku.js +249 -0
  28. package/src/cli/util/output.js +174 -0
  29. package/src/main/agents/base-agent.js +410 -0
  30. package/src/main/agents/builder.js +484 -0
  31. package/src/main/agents/index.js +62 -0
  32. package/src/main/agents/orchestrator.js +362 -0
  33. package/src/main/agents/researcher.js +511 -0
  34. package/src/main/agents/state-manager.js +344 -0
  35. package/src/main/agents/supervisor.js +365 -0
  36. package/src/main/agents/verifier.js +452 -0
  37. package/src/main/ai-service.js +1633 -0
  38. package/src/main/index.js +2208 -0
  39. package/src/main/inspect-service.js +467 -0
  40. package/src/main/system-automation.js +1186 -0
  41. package/src/main/ui-automation/config.js +76 -0
  42. package/src/main/ui-automation/core/helpers.js +41 -0
  43. package/src/main/ui-automation/core/index.js +15 -0
  44. package/src/main/ui-automation/core/powershell.js +82 -0
  45. package/src/main/ui-automation/elements/finder.js +274 -0
  46. package/src/main/ui-automation/elements/index.js +14 -0
  47. package/src/main/ui-automation/elements/wait.js +66 -0
  48. package/src/main/ui-automation/index.js +164 -0
  49. package/src/main/ui-automation/interactions/element-click.js +211 -0
  50. package/src/main/ui-automation/interactions/high-level.js +230 -0
  51. package/src/main/ui-automation/interactions/index.js +47 -0
  52. package/src/main/ui-automation/keyboard/index.js +15 -0
  53. package/src/main/ui-automation/keyboard/input.js +179 -0
  54. package/src/main/ui-automation/mouse/click.js +186 -0
  55. package/src/main/ui-automation/mouse/drag.js +88 -0
  56. package/src/main/ui-automation/mouse/index.js +30 -0
  57. package/src/main/ui-automation/mouse/movement.js +51 -0
  58. package/src/main/ui-automation/mouse/scroll.js +116 -0
  59. package/src/main/ui-automation/screenshot.js +183 -0
  60. package/src/main/ui-automation/window/index.js +23 -0
  61. package/src/main/ui-automation/window/manager.js +305 -0
  62. package/src/main/utils/time.js +62 -0
  63. package/src/main/visual-awareness.js +597 -0
  64. package/src/renderer/chat/chat.js +671 -0
  65. package/src/renderer/chat/index.html +725 -0
  66. package/src/renderer/chat/preload.js +112 -0
  67. package/src/renderer/overlay/index.html +648 -0
  68. package/src/renderer/overlay/overlay.js +782 -0
  69. package/src/renderer/overlay/preload.js +90 -0
  70. package/src/shared/grid-math.js +82 -0
  71. package/src/shared/inspect-types.js +230 -0
@@ -0,0 +1,1633 @@
1
+ /**
2
+ * AI Service Module
3
+ * Handles integration with AI backends (GitHub Copilot, OpenAI, Claude, local models)
4
+ * Supports visual context for AI awareness of screen content
5
+ * Supports AGENTIC actions (mouse, keyboard, system control)
6
+ * Supports inspect mode for precision targeting
7
+ */
8
+
9
+ const https = require('https');
10
+ const http = require('http');
11
+ const fs = require('fs');
12
+ const path = require('path');
13
+ const { shell } = require('electron');
14
+ const systemAutomation = require('./system-automation');
15
+
16
+ // Lazy-load inspect service to avoid circular dependencies
17
+ let inspectService = null;
18
+ function getInspectService() {
19
+ if (!inspectService) {
20
+ inspectService = require('./inspect-service');
21
+ }
22
+ return inspectService;
23
+ }
24
+
25
+ // ===== CONFIGURATION =====
26
+
27
+ // Available models for GitHub Copilot (based on Copilot CLI changelog)
28
+ const COPILOT_MODELS = {
29
+ 'claude-sonnet-4.5': { name: 'Claude Sonnet 4.5', id: 'claude-sonnet-4.5-20250929', vision: true },
30
+ 'claude-sonnet-4': { name: 'Claude Sonnet 4', id: 'claude-sonnet-4-20250514', vision: true },
31
+ 'claude-opus-4.5': { name: 'Claude Opus 4.5', id: 'claude-opus-4.5', vision: true },
32
+ 'claude-haiku-4.5': { name: 'Claude Haiku 4.5', id: 'claude-haiku-4.5', vision: true },
33
+ 'gpt-4o': { name: 'GPT-4o', id: 'gpt-4o', vision: true },
34
+ 'gpt-4o-mini': { name: 'GPT-4o Mini', id: 'gpt-4o-mini', vision: true },
35
+ 'gpt-4.1': { name: 'GPT-4.1', id: 'gpt-4.1', vision: true },
36
+ 'o1': { name: 'o1', id: 'o1', vision: false },
37
+ 'o1-mini': { name: 'o1 Mini', id: 'o1-mini', vision: false },
38
+ 'o3-mini': { name: 'o3 Mini', id: 'o3-mini', vision: false }
39
+ };
40
+
41
+ // Default Copilot model
42
+ let currentCopilotModel = 'gpt-4o';
43
+
44
+ const AI_PROVIDERS = {
45
+ copilot: {
46
+ baseUrl: 'api.githubcopilot.com',
47
+ path: '/chat/completions',
48
+ model: 'gpt-4o',
49
+ visionModel: 'gpt-4o'
50
+ },
51
+ openai: {
52
+ baseUrl: 'api.openai.com',
53
+ path: '/v1/chat/completions',
54
+ model: 'gpt-4o',
55
+ visionModel: 'gpt-4o'
56
+ },
57
+ anthropic: {
58
+ baseUrl: 'api.anthropic.com',
59
+ path: '/v1/messages',
60
+ model: 'claude-sonnet-4-20250514',
61
+ visionModel: 'claude-sonnet-4-20250514'
62
+ },
63
+ ollama: {
64
+ baseUrl: 'localhost',
65
+ port: 11434,
66
+ path: '/api/chat',
67
+ model: 'llama3.2-vision',
68
+ visionModel: 'llama3.2-vision'
69
+ }
70
+ };
71
+
72
+ // GitHub Copilot OAuth Configuration
73
+ const COPILOT_CLIENT_ID = 'Iv1.b507a08c87ecfe98';
74
+
75
+ // Current configuration
76
+ let currentProvider = 'copilot'; // Default to GitHub Copilot
77
+ let apiKeys = {
78
+ copilot: process.env.GH_TOKEN || process.env.GITHUB_TOKEN || '', // OAuth token
79
+ copilotSession: '', // Copilot session token (exchanged from OAuth)
80
+ openai: process.env.OPENAI_API_KEY || '',
81
+ anthropic: process.env.ANTHROPIC_API_KEY || ''
82
+ };
83
+
84
+ // Model metadata tracking
85
+ let currentModelMetadata = {
86
+ modelId: currentCopilotModel,
87
+ provider: currentProvider,
88
+ modelVersion: COPILOT_MODELS[currentCopilotModel]?.id || null,
89
+ capabilities: COPILOT_MODELS[currentCopilotModel]?.vision ? ['vision', 'text'] : ['text'],
90
+ lastUpdated: new Date().toISOString()
91
+ };
92
+
93
+ // Token persistence path
94
+ const TOKEN_FILE = path.join(process.env.APPDATA || process.env.HOME || '.', 'copilot-agent', 'copilot-token.json');
95
+
96
+ // OAuth state
97
+ let oauthInProgress = false;
98
+ let oauthCallback = null;
99
+
100
+ // Conversation history for context
101
+ let conversationHistory = [];
102
+ const MAX_HISTORY = 20;
103
+
104
+ // Visual context for AI awareness
105
+ let visualContextBuffer = [];
106
+ const MAX_VISUAL_CONTEXT = 5;
107
+
108
+ // ===== SYSTEM PROMPT =====
109
+ const SYSTEM_PROMPT = `You are Liku, an intelligent AGENTIC AI assistant integrated into a desktop overlay system with visual screen awareness AND the ability to control the user's computer.
110
+
111
+ ## Your Core Capabilities
112
+
113
+ 1. **Screen Vision**: When the user captures their screen, you receive it as an image. ALWAYS analyze visible content immediately.
114
+
115
+ 2. **Grid Coordinate System**: The screen has a dot grid overlay:
116
+ - **Columns**: Letters A, B, C, D... (left to right), spacing 100px
117
+ - **Rows**: Numbers 0, 1, 2, 3... (top to bottom), spacing 100px
118
+ - **Start**: Grid is centered, so A0 is at (50, 50)
119
+ - **Format**: "C3" = column C (index 2), row 3 = pixel (250, 350)
120
+ - **Formula**: x = 50 + col_index * 100, y = 50 + row_index * 100
121
+ - A0 ≈ (50, 50), B0 ≈ (150, 50), A1 ≈ (50, 150)
122
+ - **Fine Grid**: Sub-labels like C3.12 refer to 25px subcells inside C3
123
+
124
+ 3. **SYSTEM CONTROL - AGENTIC ACTIONS**: You can execute actions on the user's computer:
125
+ - **Click**: Click at coordinates
126
+ - **Type**: Type text into focused fields
127
+ - **Press Keys**: Press keyboard shortcuts (ctrl+c, enter, etc.)
128
+ - **Scroll**: Scroll up/down
129
+ - **Drag**: Drag from one point to another
130
+
131
+ ## ACTION FORMAT - CRITICAL
132
+
133
+ When the user asks you to DO something (click, type, interact), respond with a JSON action block:
134
+
135
+ \`\`\`json
136
+ {
137
+ "thought": "Brief explanation of what I'm about to do",
138
+ "actions": [
139
+ {"type": "click", "x": 300, "y": 200, "reason": "Click the input field"},
140
+ {"type": "type", "text": "Hello world", "reason": "Type the requested text"},
141
+ {"type": "key", "key": "enter", "reason": "Submit the form"}
142
+ ],
143
+ "verification": "After these actions, the text field should show 'Hello world'"
144
+ }
145
+ \`\`\`
146
+
147
+ ### Action Types:
148
+ - \`{"type": "click", "x": <number>, "y": <number>}\` - Left click at pixel coordinates
149
+ - \`{"type": "double_click", "x": <number>, "y": <number>}\` - Double click
150
+ - \`{"type": "right_click", "x": <number>, "y": <number>}\` - Right click
151
+ - \`{"type": "type", "text": "<string>"}\` - Type text (types into currently focused element)
152
+ - \`{"type": "key", "key": "<key combo>"}\` - Press key (e.g., "enter", "ctrl+c", "alt+tab", "f5")
153
+ - \`{"type": "scroll", "direction": "up|down", "amount": <number>}\` - Scroll (amount = clicks)
154
+ - \`{"type": "drag", "fromX": <n>, "fromY": <n>, "toX": <n>, "toY": <n>}\` - Drag
155
+ - \`{"type": "wait", "ms": <number>}\` - Wait milliseconds
156
+ - \`{"type": "screenshot"}\` - Take screenshot to verify result
157
+
158
+ ### Grid to Pixel Conversion:
159
+ - A0 → (50, 50), B0 → (150, 50), C0 → (250, 50)
160
+ - A1 → (50, 150), B1 → (150, 150), C1 → (250, 150)
161
+ - Formula: x = 50 + col_index * 100, y = 50 + row_index * 100
162
+ - Column A=0, B=1, C=2... so C3 = x: 50 + 2*100 = 250, y: 50 + 3*100 = 350
163
+ - Fine labels: C3.12 = x: 12.5 + (2*4+1)*25 = 237.5, y: 12.5 + (3*4+2)*25 = 362.5
164
+
165
+ ## Response Guidelines
166
+
167
+ **For OBSERVATION requests** (what's at C3, describe the screen):
168
+ - Respond with natural language describing what you see
169
+ - Be specific about UI elements, text, buttons
170
+
171
+ **For ACTION requests** (click here, type this, open that):
172
+ - ALWAYS respond with the JSON action block
173
+ - Include your thought process
174
+ - Calculate coordinates precisely
175
+ - Add verification step to confirm success
176
+
177
+ **When executing a sequence**:
178
+ 1. First action: click to focus the target element
179
+ 2. Second action: perform the main task (type, etc.)
180
+ 3. Optional: verify with screenshot
181
+
182
+ **IMPORTANT**: When asked to interact with something visible in the screenshot:
183
+ 1. Identify the element's approximate position
184
+ 2. Convert to pixel coordinates
185
+ 3. Return the action JSON
186
+
187
+ Be precise, efficient, and execute actions confidently based on visual information.`;
188
+
189
+ /**
190
+ * Set the AI provider
191
+ */
192
+ function setProvider(provider) {
193
+ if (AI_PROVIDERS[provider]) {
194
+ currentProvider = provider;
195
+ currentModelMetadata.provider = provider;
196
+ currentModelMetadata.lastUpdated = new Date().toISOString();
197
+ return true;
198
+ }
199
+ return false;
200
+ }
201
+
202
+ /**
203
+ * Set API key for a provider
204
+ */
205
+ function setApiKey(provider, key) {
206
+ if (apiKeys.hasOwnProperty(provider)) {
207
+ apiKeys[provider] = key;
208
+ return true;
209
+ }
210
+ return false;
211
+ }
212
+
213
+ /**
214
+ * Set the Copilot model
215
+ */
216
+ function setCopilotModel(model) {
217
+ if (COPILOT_MODELS[model]) {
218
+ currentCopilotModel = model;
219
+ currentModelMetadata = {
220
+ modelId: model,
221
+ provider: currentProvider,
222
+ modelVersion: COPILOT_MODELS[model].id,
223
+ capabilities: COPILOT_MODELS[model].vision ? ['vision', 'text'] : ['text'],
224
+ lastUpdated: new Date().toISOString()
225
+ };
226
+ return true;
227
+ }
228
+ return false;
229
+ }
230
+
231
+ /**
232
+ * Get available Copilot models
233
+ */
234
+ function getCopilotModels() {
235
+ return Object.entries(COPILOT_MODELS).map(([key, value]) => ({
236
+ id: key,
237
+ name: value.name,
238
+ vision: value.vision,
239
+ current: key === currentCopilotModel
240
+ }));
241
+ }
242
+
243
+ /**
244
+ * Get current model metadata
245
+ */
246
+ function getModelMetadata() {
247
+ return {
248
+ ...currentModelMetadata,
249
+ sessionToken: apiKeys.copilotSession ? 'present' : 'absent'
250
+ };
251
+ }
252
+
253
+ /**
254
+ * Get current Copilot model
255
+ */
256
+ function getCurrentCopilotModel() {
257
+ return currentCopilotModel;
258
+ }
259
+
260
+ /**
261
+ * Add visual context (screenshot data)
262
+ */
263
+ function addVisualContext(imageData) {
264
+ visualContextBuffer.push({
265
+ ...imageData,
266
+ addedAt: Date.now()
267
+ });
268
+
269
+ // Keep only recent visual context
270
+ while (visualContextBuffer.length > MAX_VISUAL_CONTEXT) {
271
+ visualContextBuffer.shift();
272
+ }
273
+ }
274
+
275
+ /**
276
+ * Get the latest visual context
277
+ */
278
+ function getLatestVisualContext() {
279
+ return visualContextBuffer.length > 0
280
+ ? visualContextBuffer[visualContextBuffer.length - 1]
281
+ : null;
282
+ }
283
+
284
+ /**
285
+ * Clear visual context
286
+ */
287
+ function clearVisualContext() {
288
+ visualContextBuffer = [];
289
+ }
290
+
291
+ /**
292
+ * Build messages array for API call
293
+ */
294
+ function buildMessages(userMessage, includeVisual = false) {
295
+ const messages = [
296
+ { role: 'system', content: SYSTEM_PROMPT }
297
+ ];
298
+
299
+ // Add conversation history
300
+ conversationHistory.slice(-MAX_HISTORY).forEach(msg => {
301
+ messages.push(msg);
302
+ });
303
+
304
+ // Build user message with optional visual and inspect context
305
+ const latestVisual = includeVisual ? getLatestVisualContext() : null;
306
+
307
+ // Get inspect context if inspect mode is active
308
+ let inspectContextText = '';
309
+ try {
310
+ const inspect = getInspectService();
311
+ if (inspect.isInspectModeActive()) {
312
+ const inspectContext = inspect.generateAIContext();
313
+ if (inspectContext.regions && inspectContext.regions.length > 0) {
314
+ inspectContextText = `\n\n## Detected UI Regions (Inspect Mode)
315
+ ${inspectContext.regions.slice(0, 20).map((r, i) =>
316
+ `${i + 1}. **${r.label || 'Unknown'}** (${r.role}) at (${r.center.x}, ${r.center.y}) - confidence: ${Math.round(r.confidence * 100)}%`
317
+ ).join('\n')}
318
+
319
+ **Note**: Use the coordinates provided above for precise targeting. If confidence is below 70%, verify with user before clicking.`;
320
+
321
+ // Add window context if available
322
+ if (inspectContext.windowContext) {
323
+ inspectContextText += `\n\n## Active Window
324
+ - App: ${inspectContext.windowContext.appName || 'Unknown'}
325
+ - Title: ${inspectContext.windowContext.windowTitle || 'Unknown'}
326
+ - Scale Factor: ${inspectContext.windowContext.scaleFactor || 1}`;
327
+ }
328
+ }
329
+ }
330
+ } catch (e) {
331
+ console.warn('[AI] Could not get inspect context:', e.message);
332
+ }
333
+
334
+ const enhancedMessage = inspectContextText
335
+ ? `${userMessage}${inspectContextText}`
336
+ : userMessage;
337
+
338
+ if (latestVisual && (currentProvider === 'copilot' || currentProvider === 'openai')) {
339
+ // OpenAI/Copilot vision format (both use same API format)
340
+ console.log('[AI] Including visual context in message (provider:', currentProvider, ')');
341
+ messages.push({
342
+ role: 'user',
343
+ content: [
344
+ { type: 'text', text: enhancedMessage },
345
+ {
346
+ type: 'image_url',
347
+ image_url: {
348
+ url: latestVisual.dataURL,
349
+ detail: 'high'
350
+ }
351
+ }
352
+ ]
353
+ });
354
+ } else if (latestVisual && currentProvider === 'anthropic') {
355
+ // Anthropic vision format
356
+ const base64Data = latestVisual.dataURL.replace(/^data:image\/\w+;base64,/, '');
357
+ messages.push({
358
+ role: 'user',
359
+ content: [
360
+ {
361
+ type: 'image',
362
+ source: {
363
+ type: 'base64',
364
+ media_type: 'image/png',
365
+ data: base64Data
366
+ }
367
+ },
368
+ { type: 'text', text: enhancedMessage }
369
+ ]
370
+ });
371
+ } else if (latestVisual && currentProvider === 'ollama') {
372
+ // Ollama vision format
373
+ const base64Data = latestVisual.dataURL.replace(/^data:image\/\w+;base64,/, '');
374
+ messages.push({
375
+ role: 'user',
376
+ content: enhancedMessage,
377
+ images: [base64Data]
378
+ });
379
+ } else {
380
+ messages.push({
381
+ role: 'user',
382
+ content: enhancedMessage
383
+ });
384
+ }
385
+
386
+ return messages;
387
+ }
388
+
389
+ // ===== GITHUB COPILOT OAUTH =====
390
+
391
+ /**
392
+ * Load saved Copilot token from disk
393
+ */
394
+ function loadCopilotToken() {
395
+ try {
396
+ if (fs.existsSync(TOKEN_FILE)) {
397
+ const data = JSON.parse(fs.readFileSync(TOKEN_FILE, 'utf8'));
398
+ if (data.access_token) {
399
+ apiKeys.copilot = data.access_token;
400
+ console.log('[COPILOT] Loaded saved token');
401
+ return true;
402
+ }
403
+ }
404
+ } catch (e) {
405
+ console.error('[COPILOT] Failed to load token:', e.message);
406
+ }
407
+ return false;
408
+ }
409
+
410
+ /**
411
+ * Save Copilot token to disk
412
+ */
413
+ function saveCopilotToken(token) {
414
+ try {
415
+ const dir = path.dirname(TOKEN_FILE);
416
+ if (!fs.existsSync(dir)) {
417
+ fs.mkdirSync(dir, { recursive: true });
418
+ }
419
+ fs.writeFileSync(TOKEN_FILE, JSON.stringify({
420
+ access_token: token,
421
+ saved_at: new Date().toISOString()
422
+ }));
423
+ console.log('[COPILOT] Token saved');
424
+ } catch (e) {
425
+ console.error('[COPILOT] Failed to save token:', e.message);
426
+ }
427
+ }
428
+
429
+ /**
430
+ * Start GitHub Copilot OAuth device code flow
431
+ * Returns { user_code, verification_uri } for user to complete auth
432
+ */
433
+ function startCopilotOAuth() {
434
+ return new Promise((resolve, reject) => {
435
+ if (oauthInProgress) {
436
+ return reject(new Error('OAuth already in progress'));
437
+ }
438
+
439
+ const data = JSON.stringify({
440
+ client_id: COPILOT_CLIENT_ID,
441
+ scope: 'copilot'
442
+ });
443
+
444
+ const req = https.request({
445
+ hostname: 'github.com',
446
+ path: '/login/device/code',
447
+ method: 'POST',
448
+ headers: {
449
+ 'Content-Type': 'application/json',
450
+ 'Accept': 'application/json',
451
+ 'Content-Length': Buffer.byteLength(data)
452
+ }
453
+ }, (res) => {
454
+ let body = '';
455
+ res.on('data', chunk => body += chunk);
456
+ res.on('end', () => {
457
+ try {
458
+ const result = JSON.parse(body);
459
+ if (result.device_code && result.user_code) {
460
+ console.log('[COPILOT] OAuth started. User code:', result.user_code);
461
+ oauthInProgress = true;
462
+
463
+ // Open browser for user to authorize
464
+ shell.openExternal(result.verification_uri_complete || result.verification_uri);
465
+
466
+ // Start polling for token
467
+ pollForToken(result.device_code, result.interval || 5);
468
+
469
+ resolve({
470
+ user_code: result.user_code,
471
+ verification_uri: result.verification_uri,
472
+ expires_in: result.expires_in
473
+ });
474
+ } else {
475
+ reject(new Error(result.error_description || 'Failed to get device code'));
476
+ }
477
+ } catch (e) {
478
+ reject(new Error('Invalid response from GitHub'));
479
+ }
480
+ });
481
+ });
482
+
483
+ req.on('error', reject);
484
+ req.write(data);
485
+ req.end();
486
+ });
487
+ }
488
+
489
+ /**
490
+ * Poll GitHub for access token after user authorizes
491
+ */
492
+ function pollForToken(deviceCode, interval) {
493
+ const poll = () => {
494
+ const data = JSON.stringify({
495
+ client_id: COPILOT_CLIENT_ID,
496
+ device_code: deviceCode,
497
+ grant_type: 'urn:ietf:params:oauth:grant-type:device_code'
498
+ });
499
+
500
+ const req = https.request({
501
+ hostname: 'github.com',
502
+ path: '/login/oauth/access_token',
503
+ method: 'POST',
504
+ headers: {
505
+ 'Content-Type': 'application/json',
506
+ 'Accept': 'application/json',
507
+ 'Content-Length': Buffer.byteLength(data)
508
+ }
509
+ }, (res) => {
510
+ let body = '';
511
+ res.on('data', chunk => body += chunk);
512
+ res.on('end', () => {
513
+ try {
514
+ const result = JSON.parse(body);
515
+
516
+ if (result.access_token) {
517
+ // Success!
518
+ console.log('[COPILOT] OAuth successful!');
519
+ apiKeys.copilot = result.access_token;
520
+ saveCopilotToken(result.access_token);
521
+ oauthInProgress = false;
522
+
523
+ if (oauthCallback) {
524
+ oauthCallback({ success: true, message: 'GitHub Copilot authenticated!' });
525
+ oauthCallback = null;
526
+ }
527
+ } else if (result.error === 'authorization_pending') {
528
+ // User hasn't authorized yet, keep polling
529
+ setTimeout(poll, interval * 1000);
530
+ } else if (result.error === 'slow_down') {
531
+ // Rate limited, slow down
532
+ setTimeout(poll, (interval + 5) * 1000);
533
+ } else if (result.error === 'expired_token') {
534
+ oauthInProgress = false;
535
+ if (oauthCallback) {
536
+ oauthCallback({ success: false, message: 'Authorization expired. Try /login again.' });
537
+ oauthCallback = null;
538
+ }
539
+ } else {
540
+ oauthInProgress = false;
541
+ if (oauthCallback) {
542
+ oauthCallback({ success: false, message: result.error_description || 'OAuth failed' });
543
+ oauthCallback = null;
544
+ }
545
+ }
546
+ } catch (e) {
547
+ // Parse error, retry
548
+ setTimeout(poll, interval * 1000);
549
+ }
550
+ });
551
+ });
552
+
553
+ req.on('error', () => setTimeout(poll, interval * 1000));
554
+ req.write(data);
555
+ req.end();
556
+ };
557
+
558
+ setTimeout(poll, interval * 1000);
559
+ }
560
+
561
+ /**
562
+ * Exchange OAuth token for Copilot session token
563
+ * Required because the OAuth token alone can't call Copilot API directly
564
+ */
565
+ function exchangeForCopilotSession() {
566
+ return new Promise((resolve, reject) => {
567
+ if (!apiKeys.copilot) {
568
+ return reject(new Error('No OAuth token available'));
569
+ }
570
+
571
+ console.log('[Copilot] Exchanging OAuth token for session token...');
572
+ console.log('[Copilot] OAuth token prefix:', apiKeys.copilot.substring(0, 10) + '...');
573
+
574
+ // First try the Copilot internal endpoint
575
+ const options = {
576
+ hostname: 'api.github.com',
577
+ path: '/copilot_internal/v2/token',
578
+ method: 'GET',
579
+ headers: {
580
+ 'Authorization': `token ${apiKeys.copilot}`,
581
+ 'Accept': 'application/json',
582
+ 'User-Agent': 'GithubCopilot/1.155.0',
583
+ 'Editor-Version': 'vscode/1.96.0',
584
+ 'Editor-Plugin-Version': 'copilot-chat/0.22.0'
585
+ }
586
+ };
587
+
588
+ const req = https.request(options, (res) => {
589
+ let body = '';
590
+ res.on('data', chunk => body += chunk);
591
+ res.on('end', () => {
592
+ console.log('[Copilot] Token exchange response:', res.statusCode);
593
+ console.log('[Copilot] Response body preview:', body.substring(0, 200));
594
+
595
+ if (res.statusCode === 401 || res.statusCode === 403) {
596
+ console.log('[Copilot] Token exchange got', res.statusCode, '- will use OAuth token directly');
597
+ apiKeys.copilotSession = apiKeys.copilot;
598
+ return resolve(apiKeys.copilot);
599
+ }
600
+
601
+ try {
602
+ const result = JSON.parse(body);
603
+ if (result.token) {
604
+ apiKeys.copilotSession = result.token;
605
+ console.log('[Copilot] Session token obtained successfully, expires:', result.expires_at);
606
+ console.log('[Copilot] Session token prefix:', result.token.substring(0, 15) + '...');
607
+ resolve(result.token);
608
+ } else if (result.message) {
609
+ console.log('[Copilot] API message:', result.message);
610
+ apiKeys.copilotSession = apiKeys.copilot;
611
+ resolve(apiKeys.copilot);
612
+ } else {
613
+ console.log('[Copilot] Unexpected response format, using OAuth token');
614
+ apiKeys.copilotSession = apiKeys.copilot;
615
+ resolve(apiKeys.copilot);
616
+ }
617
+ } catch (e) {
618
+ console.log('[Copilot] Token exchange parse error:', e.message);
619
+ apiKeys.copilotSession = apiKeys.copilot;
620
+ resolve(apiKeys.copilot);
621
+ }
622
+ });
623
+ });
624
+
625
+ req.on('error', (e) => {
626
+ console.log('[Copilot] Token exchange network error:', e.message);
627
+ apiKeys.copilotSession = apiKeys.copilot;
628
+ resolve(apiKeys.copilot);
629
+ });
630
+
631
+ req.end();
632
+ });
633
+ }
634
+
635
+ /**
636
+ * Call GitHub Copilot API
637
+ * Uses session token (not OAuth token) - exchanges if needed
638
+ */
639
+ async function callCopilot(messages) {
640
+ // Ensure we have OAuth token
641
+ if (!apiKeys.copilot) {
642
+ if (!loadCopilotToken()) {
643
+ throw new Error('Not authenticated. Use /login to authenticate with GitHub Copilot.');
644
+ }
645
+ }
646
+
647
+ // Exchange for session token if we don't have one
648
+ if (!apiKeys.copilotSession) {
649
+ try {
650
+ await exchangeForCopilotSession();
651
+ } catch (e) {
652
+ throw new Error(`Session token exchange failed: ${e.message}`);
653
+ }
654
+ }
655
+
656
+ return new Promise((resolve, reject) => {
657
+ const hasVision = messages.some(m => Array.isArray(m.content));
658
+ const modelInfo = COPILOT_MODELS[currentCopilotModel] || COPILOT_MODELS['gpt-4o'];
659
+ const modelId = hasVision && !modelInfo.vision ? 'gpt-4o' : modelInfo.id;
660
+
661
+ console.log(`[Copilot] Vision request: ${hasVision}, Model: ${modelId}`);
662
+
663
+ const data = JSON.stringify({
664
+ model: modelId,
665
+ messages: messages,
666
+ max_tokens: 4096,
667
+ temperature: 0.7,
668
+ stream: false
669
+ });
670
+
671
+ // Try multiple endpoint formats
672
+ const tryEndpoint = (hostname, pathPrefix = '') => {
673
+ const headers = {
674
+ 'Content-Type': 'application/json',
675
+ 'Authorization': `Bearer ${apiKeys.copilotSession}`,
676
+ 'Accept': 'application/json',
677
+ 'User-Agent': 'GithubCopilot/1.0.0',
678
+ 'Editor-Version': 'vscode/1.96.0',
679
+ 'Editor-Plugin-Version': 'copilot-chat/0.22.0',
680
+ 'Copilot-Integration-Id': 'vscode-chat',
681
+ 'X-Request-Id': `${Date.now()}-${Math.random().toString(36).slice(2, 11)}`,
682
+ 'Openai-Organization': 'github-copilot',
683
+ 'Openai-Intent': 'conversation-panel',
684
+ 'Content-Length': Buffer.byteLength(data)
685
+ };
686
+
687
+ // CRITICAL: Add vision header for image requests
688
+ if (hasVision) {
689
+ headers['Copilot-Vision-Request'] = 'true';
690
+ console.log('[Copilot] Added Copilot-Vision-Request header');
691
+ }
692
+
693
+ const options = {
694
+ hostname: hostname,
695
+ path: pathPrefix + '/chat/completions',
696
+ method: 'POST',
697
+ headers: headers
698
+ };
699
+
700
+ console.log(`[Copilot] Calling ${hostname}${options.path} with model ${modelId}...`);
701
+
702
+ return new Promise((resolveReq, rejectReq) => {
703
+ const req = https.request(options, (res) => {
704
+ let body = '';
705
+ res.on('data', chunk => body += chunk);
706
+ res.on('end', () => {
707
+ console.log('[Copilot] API response status:', res.statusCode);
708
+
709
+ if (res.statusCode === 401) {
710
+ // Session token expired, clear it
711
+ apiKeys.copilotSession = '';
712
+ return rejectReq(new Error('SESSION_EXPIRED'));
713
+ }
714
+
715
+ if (res.statusCode === 403) {
716
+ return rejectReq(new Error('ACCESS_DENIED'));
717
+ }
718
+
719
+ if (res.statusCode >= 400) {
720
+ console.error('[Copilot] Error response:', body.substring(0, 300));
721
+ return rejectReq(new Error(`API_ERROR_${res.statusCode}: ${body.substring(0, 200)}`));
722
+ }
723
+
724
+ try {
725
+ const result = JSON.parse(body);
726
+ if (result.choices && result.choices[0]) {
727
+ resolveReq(result.choices[0].message.content);
728
+ } else if (result.error) {
729
+ rejectReq(new Error(result.error.message || 'Copilot API error'));
730
+ } else {
731
+ console.error('[Copilot] Unexpected response:', JSON.stringify(result).substring(0, 300));
732
+ rejectReq(new Error('Invalid response format'));
733
+ }
734
+ } catch (e) {
735
+ console.error('[Copilot] Parse error. Body:', body.substring(0, 300));
736
+ rejectReq(new Error(`PARSE_ERROR: ${body.substring(0, 100)}`));
737
+ }
738
+ });
739
+ });
740
+
741
+ req.on('error', (e) => {
742
+ console.error('[Copilot] Request error:', e.message);
743
+ rejectReq(e);
744
+ });
745
+
746
+ req.write(data);
747
+ req.end();
748
+ });
749
+ };
750
+
751
+ // Try primary endpoint first
752
+ tryEndpoint('api.githubcopilot.com')
753
+ .then(resolve)
754
+ .catch(async (err) => {
755
+ console.log('[Copilot] Primary endpoint failed:', err.message);
756
+
757
+ // If session expired, re-exchange and retry once
758
+ if (err.message === 'SESSION_EXPIRED') {
759
+ try {
760
+ await exchangeForCopilotSession();
761
+ const result = await tryEndpoint('api.githubcopilot.com');
762
+ return resolve(result);
763
+ } catch (retryErr) {
764
+ return reject(new Error('Session expired. Please try /login again.'));
765
+ }
766
+ }
767
+
768
+ // Try alternate endpoint
769
+ try {
770
+ console.log('[Copilot] Trying alternate endpoint...');
771
+ const result = await tryEndpoint('copilot-proxy.githubusercontent.com', '/v1');
772
+ resolve(result);
773
+ } catch (altErr) {
774
+ console.log('[Copilot] Alternate endpoint also failed:', altErr.message);
775
+
776
+ // Return user-friendly error messages
777
+ if (err.message.includes('ACCESS_DENIED')) {
778
+ reject(new Error('Access denied. Ensure you have an active GitHub Copilot subscription.'));
779
+ } else if (err.message.includes('PARSE_ERROR')) {
780
+ reject(new Error('API returned invalid response. You may need to re-authenticate with /login'));
781
+ } else {
782
+ reject(new Error(`Copilot API error: ${err.message}`));
783
+ }
784
+ }
785
+ });
786
+ });
787
+ }
788
+
789
+ /**
790
+ * Call OpenAI API
791
+ */
792
+ function callOpenAI(messages) {
793
+ return new Promise((resolve, reject) => {
794
+ const config = AI_PROVIDERS.openai;
795
+ const hasVision = messages.some(m => Array.isArray(m.content));
796
+
797
+ const data = JSON.stringify({
798
+ model: hasVision ? config.visionModel : config.model,
799
+ messages: messages,
800
+ max_tokens: 2048,
801
+ temperature: 0.7
802
+ });
803
+
804
+ const options = {
805
+ hostname: config.baseUrl,
806
+ path: config.path,
807
+ method: 'POST',
808
+ headers: {
809
+ 'Content-Type': 'application/json',
810
+ 'Authorization': `Bearer ${apiKeys.openai}`,
811
+ 'Content-Length': Buffer.byteLength(data)
812
+ }
813
+ };
814
+
815
+ const req = https.request(options, (res) => {
816
+ let body = '';
817
+ res.on('data', chunk => body += chunk);
818
+ res.on('end', () => {
819
+ try {
820
+ const response = JSON.parse(body);
821
+ if (response.error) {
822
+ reject(new Error(response.error.message));
823
+ } else {
824
+ resolve(response.choices[0].message.content);
825
+ }
826
+ } catch (e) {
827
+ reject(e);
828
+ }
829
+ });
830
+ });
831
+
832
+ req.on('error', reject);
833
+ req.write(data);
834
+ req.end();
835
+ });
836
+ }
837
+
838
+ /**
839
+ * Call Anthropic API
840
+ */
841
+ function callAnthropic(messages) {
842
+ return new Promise((resolve, reject) => {
843
+ const config = AI_PROVIDERS.anthropic;
844
+
845
+ // Convert messages format for Anthropic
846
+ const systemMsg = messages.find(m => m.role === 'system');
847
+ const otherMessages = messages.filter(m => m.role !== 'system');
848
+
849
+ const data = JSON.stringify({
850
+ model: config.model,
851
+ max_tokens: 2048,
852
+ system: systemMsg ? systemMsg.content : '',
853
+ messages: otherMessages
854
+ });
855
+
856
+ const options = {
857
+ hostname: config.baseUrl,
858
+ path: config.path,
859
+ method: 'POST',
860
+ headers: {
861
+ 'Content-Type': 'application/json',
862
+ 'x-api-key': apiKeys.anthropic,
863
+ 'anthropic-version': '2023-06-01',
864
+ 'Content-Length': Buffer.byteLength(data)
865
+ }
866
+ };
867
+
868
+ const req = https.request(options, (res) => {
869
+ let body = '';
870
+ res.on('data', chunk => body += chunk);
871
+ res.on('end', () => {
872
+ try {
873
+ const response = JSON.parse(body);
874
+ if (response.error) {
875
+ reject(new Error(response.error.message));
876
+ } else {
877
+ const textContent = response.content.find(c => c.type === 'text');
878
+ resolve(textContent ? textContent.text : '');
879
+ }
880
+ } catch (e) {
881
+ reject(e);
882
+ }
883
+ });
884
+ });
885
+
886
+ req.on('error', reject);
887
+ req.write(data);
888
+ req.end();
889
+ });
890
+ }
891
+
892
+ /**
893
+ * Call Ollama API (local)
894
+ */
895
+ function callOllama(messages) {
896
+ return new Promise((resolve, reject) => {
897
+ const config = AI_PROVIDERS.ollama;
898
+
899
+ // Check for images in the last message
900
+ const lastMsg = messages[messages.length - 1];
901
+ const hasImages = lastMsg.images && lastMsg.images.length > 0;
902
+
903
+ const data = JSON.stringify({
904
+ model: hasImages ? config.visionModel : config.model,
905
+ messages: messages.map(m => ({
906
+ role: m.role,
907
+ content: typeof m.content === 'string' ? m.content :
908
+ Array.isArray(m.content) ? m.content.map(c => c.text || '').join('\n') : '',
909
+ images: m.images || undefined
910
+ })),
911
+ stream: false
912
+ });
913
+
914
+ const options = {
915
+ hostname: config.baseUrl,
916
+ port: config.port,
917
+ path: config.path,
918
+ method: 'POST',
919
+ headers: {
920
+ 'Content-Type': 'application/json',
921
+ 'Content-Length': Buffer.byteLength(data)
922
+ }
923
+ };
924
+
925
+ const req = http.request(options, (res) => {
926
+ let body = '';
927
+ res.on('data', chunk => body += chunk);
928
+ res.on('end', () => {
929
+ try {
930
+ const response = JSON.parse(body);
931
+ if (response.error) {
932
+ reject(new Error(response.error));
933
+ } else {
934
+ resolve(response.message?.content || '');
935
+ }
936
+ } catch (e) {
937
+ reject(e);
938
+ }
939
+ });
940
+ });
941
+
942
+ req.on('error', (err) => {
943
+ // Provide helpful error for Ollama
944
+ if (err.code === 'ECONNREFUSED') {
945
+ reject(new Error('Ollama not running. Start it with: ollama serve\nOr set a different provider with /provider openai or /provider anthropic'));
946
+ } else {
947
+ reject(err);
948
+ }
949
+ });
950
+
951
+ req.write(data);
952
+ req.end();
953
+ });
954
+ }
955
+
956
+ /**
957
+ * Send a message and get AI response
958
+ */
959
+ async function sendMessage(userMessage, options = {}) {
960
+ const { includeVisualContext = false, coordinates = null } = options;
961
+
962
+ // Enhance message with coordinate context if provided
963
+ let enhancedMessage = userMessage;
964
+ if (coordinates) {
965
+ enhancedMessage = `[User selected coordinates: (${coordinates.x}, ${coordinates.y}) with label "${coordinates.label}"]\n\n${userMessage}`;
966
+ }
967
+
968
+ // Build messages with optional visual context
969
+ const messages = buildMessages(enhancedMessage, includeVisualContext);
970
+
971
+ try {
972
+ let response;
973
+
974
+ switch (currentProvider) {
975
+ case 'copilot':
976
+ // GitHub Copilot - uses OAuth token or env var
977
+ if (!apiKeys.copilot) {
978
+ // Try loading saved token
979
+ if (!loadCopilotToken()) {
980
+ throw new Error('Not authenticated with GitHub Copilot.\n\nTo authenticate:\n1. Type /login and authorize in browser\n2. Or set GH_TOKEN or GITHUB_TOKEN environment variable');
981
+ }
982
+ }
983
+ response = await callCopilot(messages);
984
+ break;
985
+
986
+ case 'openai':
987
+ if (!apiKeys.openai) {
988
+ throw new Error('OpenAI API key not set. Use /setkey openai <key> or set OPENAI_API_KEY environment variable.');
989
+ }
990
+ response = await callOpenAI(messages);
991
+ break;
992
+
993
+ case 'anthropic':
994
+ if (!apiKeys.anthropic) {
995
+ throw new Error('Anthropic API key not set. Use /setkey anthropic <key> or set ANTHROPIC_API_KEY environment variable.');
996
+ }
997
+ response = await callAnthropic(messages);
998
+ break;
999
+
1000
+ case 'ollama':
1001
+ default:
1002
+ response = await callOllama(messages);
1003
+ break;
1004
+ }
1005
+
1006
+ // Add to conversation history
1007
+ conversationHistory.push({ role: 'user', content: enhancedMessage });
1008
+ conversationHistory.push({ role: 'assistant', content: response });
1009
+
1010
+ // Trim history if too long
1011
+ while (conversationHistory.length > MAX_HISTORY * 2) {
1012
+ conversationHistory.shift();
1013
+ }
1014
+
1015
+ return {
1016
+ success: true,
1017
+ message: response,
1018
+ provider: currentProvider,
1019
+ hasVisualContext: includeVisualContext && visualContextBuffer.length > 0
1020
+ };
1021
+
1022
+ } catch (error) {
1023
+ return {
1024
+ success: false,
1025
+ error: error.message,
1026
+ provider: currentProvider
1027
+ };
1028
+ }
1029
+ }
1030
+
1031
+ /**
1032
+ * Handle slash commands
1033
+ */
1034
+ function handleCommand(command) {
1035
+ const parts = command.split(' ');
1036
+ const cmd = parts[0].toLowerCase();
1037
+
1038
+ switch (cmd) {
1039
+ case '/provider':
1040
+ if (parts[1]) {
1041
+ if (setProvider(parts[1])) {
1042
+ return { type: 'system', message: `Switched to ${parts[1]} provider.` };
1043
+ } else {
1044
+ return { type: 'error', message: `Unknown provider. Available: ${Object.keys(AI_PROVIDERS).join(', ')}` };
1045
+ }
1046
+ }
1047
+ return { type: 'info', message: `Current provider: ${currentProvider}\nAvailable: ${Object.keys(AI_PROVIDERS).join(', ')}` };
1048
+
1049
+ case '/setkey':
1050
+ if (parts[1] && parts[2]) {
1051
+ if (setApiKey(parts[1], parts[2])) {
1052
+ return { type: 'system', message: `API key set for ${parts[1]}.` };
1053
+ }
1054
+ }
1055
+ return { type: 'error', message: 'Usage: /setkey <provider> <key>' };
1056
+
1057
+ case '/clear':
1058
+ conversationHistory = [];
1059
+ clearVisualContext();
1060
+ return { type: 'system', message: 'Conversation and visual context cleared.' };
1061
+
1062
+ case '/vision':
1063
+ if (parts[1] === 'on') {
1064
+ return { type: 'info', message: 'Visual context will be included in next message. Use the capture button first.' };
1065
+ } else if (parts[1] === 'off') {
1066
+ clearVisualContext();
1067
+ return { type: 'system', message: 'Visual context cleared.' };
1068
+ }
1069
+ return { type: 'info', message: `Visual context buffer: ${visualContextBuffer.length} image(s)` };
1070
+
1071
+ case '/login':
1072
+ // Start GitHub Copilot OAuth device code flow
1073
+ return startCopilotOAuth()
1074
+ .then(result => ({
1075
+ type: 'login',
1076
+ message: `GitHub Copilot authentication started!\n\nYour code: ${result.user_code}\n\nA browser window has opened. Enter the code to authorize.\nWaiting for authentication...`
1077
+ }))
1078
+ .catch(err => ({
1079
+ type: 'error',
1080
+ message: `Login failed: ${err.message}`
1081
+ }));
1082
+
1083
+ case '/logout':
1084
+ apiKeys.copilot = '';
1085
+ apiKeys.copilotSession = '';
1086
+ try {
1087
+ if (fs.existsSync(TOKEN_FILE)) fs.unlinkSync(TOKEN_FILE);
1088
+ } catch (e) {}
1089
+ return { type: 'system', message: 'Logged out from GitHub Copilot.' };
1090
+
1091
+ case '/model':
1092
+ if (parts.length > 1) {
1093
+ const model = parts[1].toLowerCase();
1094
+ if (setCopilotModel(model)) {
1095
+ const modelInfo = COPILOT_MODELS[model];
1096
+ return {
1097
+ type: 'system',
1098
+ message: `Switched to ${modelInfo.name}${modelInfo.vision ? ' (supports vision)' : ''}`
1099
+ };
1100
+ } else {
1101
+ const available = Object.entries(COPILOT_MODELS)
1102
+ .map(([k, v]) => ` ${k} - ${v.name}`)
1103
+ .join('\n');
1104
+ return {
1105
+ type: 'error',
1106
+ message: `Unknown model. Available models:\n${available}`
1107
+ };
1108
+ }
1109
+ } else {
1110
+ const models = getCopilotModels();
1111
+ const list = models.map(m =>
1112
+ `${m.current ? '→' : ' '} ${m.id} - ${m.name}${m.vision ? ' 👁' : ''}`
1113
+ ).join('\n');
1114
+ return {
1115
+ type: 'info',
1116
+ message: `Current model: ${COPILOT_MODELS[currentCopilotModel].name}\n\nAvailable models:\n${list}\n\nUse /model <name> to switch`
1117
+ };
1118
+ }
1119
+
1120
+ case '/status':
1121
+ const status = getStatus();
1122
+ return {
1123
+ type: 'info',
1124
+ message: `Provider: ${status.provider}\nModel: ${COPILOT_MODELS[currentCopilotModel]?.name || currentCopilotModel}\nCopilot: ${status.hasCopilotKey ? 'Authenticated' : 'Not authenticated'}\nOpenAI: ${status.hasOpenAIKey ? 'Key set' : 'No key'}\nAnthropic: ${status.hasAnthropicKey ? 'Key set' : 'No key'}\nHistory: ${status.historyLength} messages\nVisual: ${status.visualContextCount} captures`
1125
+ };
1126
+
1127
+ case '/help':
1128
+ return {
1129
+ type: 'info',
1130
+ message: `Available commands:
1131
+ /login - Authenticate with GitHub Copilot (recommended)
1132
+ /logout - Remove GitHub Copilot authentication
1133
+ /model [name] - List or set Copilot model
1134
+ /provider [name] - Get/set AI provider (copilot, openai, anthropic, ollama)
1135
+ /setkey <provider> <key> - Set API key
1136
+ /status - Show authentication status
1137
+ /clear - Clear conversation history
1138
+ /vision [on|off] - Manage visual context
1139
+ /capture - Capture screen for AI analysis
1140
+ /help - Show this help`
1141
+ };
1142
+
1143
+ default:
1144
+ return null; // Not a command
1145
+ }
1146
+ }
1147
+
1148
+ /**
1149
+ * Get current status
1150
+ */
1151
+ /**
1152
+ * Set callback for OAuth completion
1153
+ */
1154
+ function setOAuthCallback(callback) {
1155
+ oauthCallback = callback;
1156
+ }
1157
+
1158
+ /**
1159
+ * Get current status
1160
+ */
1161
+ function getStatus() {
1162
+ return {
1163
+ provider: currentProvider,
1164
+ model: currentCopilotModel,
1165
+ modelName: COPILOT_MODELS[currentCopilotModel]?.name || currentCopilotModel,
1166
+ hasCopilotKey: !!apiKeys.copilot,
1167
+ hasApiKey: currentProvider === 'copilot' ? !!apiKeys.copilot :
1168
+ currentProvider === 'openai' ? !!apiKeys.openai :
1169
+ currentProvider === 'anthropic' ? !!apiKeys.anthropic : true,
1170
+ hasOpenAIKey: !!apiKeys.openai,
1171
+ hasAnthropicKey: !!apiKeys.anthropic,
1172
+ historyLength: conversationHistory.length,
1173
+ visualContextCount: visualContextBuffer.length,
1174
+ availableProviders: Object.keys(AI_PROVIDERS),
1175
+ copilotModels: getCopilotModels()
1176
+ };
1177
+ }
1178
+
1179
+ // ===== SAFETY GUARDRAILS =====
1180
+
1181
+ /**
1182
+ * Action risk levels for safety classification
1183
+ */
1184
+ const ActionRiskLevel = {
1185
+ SAFE: 'SAFE', // Read-only, no risk (e.g., screenshot)
1186
+ LOW: 'LOW', // Minor risk (e.g., scroll, move mouse)
1187
+ MEDIUM: 'MEDIUM', // Moderate risk (e.g., click, type text)
1188
+ HIGH: 'HIGH', // Significant risk (e.g., file operations, form submit)
1189
+ CRITICAL: 'CRITICAL' // Dangerous (e.g., delete, purchase, payment)
1190
+ };
1191
+
1192
+ /**
1193
+ * Dangerous text patterns that require user confirmation
1194
+ */
1195
+ const DANGER_PATTERNS = [
1196
+ // Destructive actions
1197
+ /\b(delete|remove|erase|destroy|clear|reset|uninstall|format)\b/i,
1198
+ // Financial actions
1199
+ /\b(buy|purchase|order|checkout|pay|payment|subscribe|donate|transfer|send money)\b/i,
1200
+ // Account actions
1201
+ /\b(logout|log out|sign out|deactivate|close account|cancel subscription)\b/i,
1202
+ // System actions
1203
+ /\b(shutdown|restart|reboot|sleep|hibernate|power off)\b/i,
1204
+ // Confirmation buttons with risk
1205
+ /\b(confirm|yes,? delete|yes,? remove|permanently|irreversible|cannot be undone)\b/i,
1206
+ // Administrative actions
1207
+ /\b(admin|administrator|root|sudo|elevated|run as)\b/i
1208
+ ];
1209
+
1210
+ /**
1211
+ * Safe/benign patterns that reduce risk level
1212
+ */
1213
+ const SAFE_PATTERNS = [
1214
+ /\b(cancel|back|close|dismiss|skip|later|no thanks|maybe later)\b/i,
1215
+ /\b(search|find|view|show|display|open|read|look)\b/i,
1216
+ /\b(help|info|about|settings|preferences)\b/i
1217
+ ];
1218
+
1219
+ /**
1220
+ * Pending action awaiting user confirmation
1221
+ */
1222
+ let pendingAction = null;
1223
+
1224
+ /**
1225
+ * Analyze the safety/risk level of an action
1226
+ * @param {Object} action - The action to analyze
1227
+ * @param {Object} targetInfo - Information about what's at the click target
1228
+ * @returns {Object} Safety analysis result
1229
+ */
1230
+ function analyzeActionSafety(action, targetInfo = {}) {
1231
+ const result = {
1232
+ actionId: `action-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`,
1233
+ action: action,
1234
+ targetInfo: targetInfo,
1235
+ riskLevel: ActionRiskLevel.SAFE,
1236
+ warnings: [],
1237
+ requiresConfirmation: false,
1238
+ description: '',
1239
+ timestamp: Date.now()
1240
+ };
1241
+
1242
+ // Check action type base risk
1243
+ switch (action.type) {
1244
+ case 'screenshot':
1245
+ case 'wait':
1246
+ result.riskLevel = ActionRiskLevel.SAFE;
1247
+ break;
1248
+ case 'scroll':
1249
+ result.riskLevel = ActionRiskLevel.LOW;
1250
+ break;
1251
+ case 'click':
1252
+ case 'double_click':
1253
+ result.riskLevel = ActionRiskLevel.MEDIUM;
1254
+ break;
1255
+ case 'right_click':
1256
+ result.riskLevel = ActionRiskLevel.MEDIUM;
1257
+ result.warnings.push('Right-click may open context menu with destructive options');
1258
+ break;
1259
+ case 'type':
1260
+ result.riskLevel = ActionRiskLevel.MEDIUM;
1261
+ // Check what's being typed
1262
+ if (action.text && action.text.length > 100) {
1263
+ result.warnings.push('Typing large amount of text');
1264
+ }
1265
+ break;
1266
+ case 'key':
1267
+ // Analyze key combinations
1268
+ const key = (action.key || '').toLowerCase();
1269
+ if (key.includes('delete') || key.includes('backspace')) {
1270
+ result.riskLevel = ActionRiskLevel.HIGH;
1271
+ result.warnings.push('Delete/Backspace key may remove content');
1272
+ } else if (key.includes('enter') || key.includes('return')) {
1273
+ result.riskLevel = ActionRiskLevel.MEDIUM;
1274
+ result.warnings.push('Enter key may submit form or confirm action');
1275
+ } else if (key.includes('ctrl') || key.includes('cmd') || key.includes('alt')) {
1276
+ result.riskLevel = ActionRiskLevel.MEDIUM;
1277
+ result.warnings.push('Keyboard shortcut detected');
1278
+ }
1279
+ break;
1280
+ case 'drag':
1281
+ result.riskLevel = ActionRiskLevel.MEDIUM;
1282
+ break;
1283
+ }
1284
+
1285
+ // Check target info for dangerous patterns
1286
+ const textToCheck = [
1287
+ targetInfo.text || '',
1288
+ targetInfo.buttonText || '',
1289
+ targetInfo.label || '',
1290
+ action.reason || '',
1291
+ ...(targetInfo.nearbyText || [])
1292
+ ].join(' ');
1293
+
1294
+ // Check for danger patterns
1295
+ for (const pattern of DANGER_PATTERNS) {
1296
+ if (pattern.test(textToCheck)) {
1297
+ result.riskLevel = ActionRiskLevel.HIGH;
1298
+ result.warnings.push(`Detected risky keyword: ${textToCheck.match(pattern)?.[0]}`);
1299
+ result.requiresConfirmation = true;
1300
+ }
1301
+ }
1302
+
1303
+ // Elevate to CRITICAL if multiple danger flags
1304
+ if (result.warnings.length >= 2 && result.riskLevel === ActionRiskLevel.HIGH) {
1305
+ result.riskLevel = ActionRiskLevel.CRITICAL;
1306
+ }
1307
+
1308
+ // Always require confirmation for HIGH or CRITICAL
1309
+ if (result.riskLevel === ActionRiskLevel.HIGH || result.riskLevel === ActionRiskLevel.CRITICAL) {
1310
+ result.requiresConfirmation = true;
1311
+ }
1312
+
1313
+ // Check for low confidence inspect region targets
1314
+ if (targetInfo.confidence !== undefined && targetInfo.confidence < 0.7) {
1315
+ result.warnings.push(`Low confidence target (${Math.round(targetInfo.confidence * 100)}%)`);
1316
+ result.requiresConfirmation = true;
1317
+ if (result.riskLevel === ActionRiskLevel.SAFE || result.riskLevel === ActionRiskLevel.LOW) {
1318
+ result.riskLevel = ActionRiskLevel.MEDIUM;
1319
+ }
1320
+ }
1321
+
1322
+ // Check if target is from inspect mode with very low confidence
1323
+ if (targetInfo.confidence !== undefined && targetInfo.confidence < 0.5) {
1324
+ result.riskLevel = ActionRiskLevel.HIGH;
1325
+ result.warnings.push('Very low confidence - verify target manually');
1326
+ }
1327
+
1328
+ // Generate human-readable description
1329
+ result.description = describeAction(action, targetInfo);
1330
+
1331
+ return result;
1332
+ }
1333
+
1334
+ /**
1335
+ * Generate human-readable description of an action
1336
+ */
1337
+ function describeAction(action, targetInfo = {}) {
1338
+ const target = targetInfo.text || targetInfo.buttonText || targetInfo.label || '';
1339
+ const location = action.x !== undefined ? `at (${action.x}, ${action.y})` : '';
1340
+
1341
+ switch (action.type) {
1342
+ case 'click':
1343
+ return `Click ${target ? `"${target}"` : ''} ${location}`.trim();
1344
+ case 'double_click':
1345
+ return `Double-click ${target ? `"${target}"` : ''} ${location}`.trim();
1346
+ case 'right_click':
1347
+ return `Right-click ${target ? `"${target}"` : ''} ${location}`.trim();
1348
+ case 'type':
1349
+ const preview = action.text?.length > 30 ? action.text.substring(0, 30) + '...' : action.text;
1350
+ return `Type "${preview}"`;
1351
+ case 'key':
1352
+ return `Press ${action.key}`;
1353
+ case 'scroll':
1354
+ return `Scroll ${action.direction} ${action.amount || 3} times`;
1355
+ case 'drag':
1356
+ return `Drag from (${action.fromX}, ${action.fromY}) to (${action.toX}, ${action.toY})`;
1357
+ case 'wait':
1358
+ return `Wait ${action.ms}ms`;
1359
+ case 'screenshot':
1360
+ return 'Take screenshot';
1361
+ default:
1362
+ return `${action.type} action`;
1363
+ }
1364
+ }
1365
+
1366
+ /**
1367
+ * Store pending action for user confirmation
1368
+ */
1369
+ function setPendingAction(actionData) {
1370
+ pendingAction = actionData;
1371
+ return actionData.actionId;
1372
+ }
1373
+
1374
+ /**
1375
+ * Get pending action
1376
+ */
1377
+ function getPendingAction() {
1378
+ return pendingAction;
1379
+ }
1380
+
1381
+ /**
1382
+ * Clear pending action
1383
+ */
1384
+ function clearPendingAction() {
1385
+ pendingAction = null;
1386
+ }
1387
+
1388
+ /**
1389
+ * Confirm pending action
1390
+ */
1391
+ function confirmPendingAction(actionId) {
1392
+ if (pendingAction && pendingAction.actionId === actionId) {
1393
+ const action = pendingAction;
1394
+ pendingAction = null;
1395
+ return action;
1396
+ }
1397
+ return null;
1398
+ }
1399
+
1400
+ /**
1401
+ * Reject pending action
1402
+ */
1403
+ function rejectPendingAction(actionId) {
1404
+ if (pendingAction && pendingAction.actionId === actionId) {
1405
+ pendingAction = null;
1406
+ return true;
1407
+ }
1408
+ return false;
1409
+ }
1410
+
1411
+ // ===== AGENTIC ACTION HANDLING =====
1412
+
1413
+ /**
1414
+ * Parse AI response to extract actions
1415
+ * @param {string} aiResponse - The AI's response text
1416
+ * @returns {Object|null} Parsed action object or null if no actions
1417
+ */
1418
+ function parseActions(aiResponse) {
1419
+ return systemAutomation.parseAIActions(aiResponse);
1420
+ }
1421
+
1422
+ /**
1423
+ * Check if AI response contains actions
1424
+ * @param {string} aiResponse - The AI's response text
1425
+ * @returns {boolean}
1426
+ */
1427
+ function hasActions(aiResponse) {
1428
+ const parsed = parseActions(aiResponse);
1429
+ return parsed && parsed.actions && parsed.actions.length > 0;
1430
+ }
1431
+
1432
+ /**
1433
+ * Execute actions from AI response with safety checks
1434
+ * @param {Object} actionData - Parsed action data with actions array
1435
+ * @param {Function} onAction - Callback after each action
1436
+ * @param {Function} onScreenshot - Callback when screenshot is needed
1437
+ * @param {Object} options - Additional options
1438
+ * @param {Function} options.onRequireConfirmation - Callback when action needs user confirmation
1439
+ * @param {Object} options.targetAnalysis - Visual analysis of click targets
1440
+ * @returns {Object} Execution results
1441
+ */
1442
+ async function executeActions(actionData, onAction = null, onScreenshot = null, options = {}) {
1443
+ if (!actionData || !actionData.actions || !Array.isArray(actionData.actions)) {
1444
+ return { success: false, error: 'No valid actions provided' };
1445
+ }
1446
+
1447
+ const { onRequireConfirmation, targetAnalysis = {}, actionExecutor } = options;
1448
+
1449
+ console.log('[AI-SERVICE] Executing actions:', actionData.thought || 'No thought provided');
1450
+ console.log('[AI-SERVICE] Actions:', JSON.stringify(actionData.actions, null, 2));
1451
+
1452
+ const results = [];
1453
+ let screenshotRequested = false;
1454
+ let pendingConfirmation = false;
1455
+
1456
+ for (let i = 0; i < actionData.actions.length; i++) {
1457
+ const action = actionData.actions[i];
1458
+
1459
+ // Handle screenshot requests specially
1460
+ if (action.type === 'screenshot') {
1461
+ screenshotRequested = true;
1462
+ if (onScreenshot) {
1463
+ await onScreenshot();
1464
+ }
1465
+ results.push({ success: true, action: 'screenshot', message: 'Screenshot captured' });
1466
+ continue;
1467
+ }
1468
+
1469
+ // ===== SAFETY CHECK =====
1470
+ // Get target info if available (from visual analysis)
1471
+ const targetInfo = targetAnalysis[`${action.x},${action.y}`] || {
1472
+ text: action.reason || '',
1473
+ buttonText: action.targetText || '',
1474
+ nearbyText: []
1475
+ };
1476
+
1477
+ // Analyze safety
1478
+ const safety = analyzeActionSafety(action, targetInfo);
1479
+ console.log(`[AI-SERVICE] Action ${i} safety: ${safety.riskLevel}`, safety.warnings);
1480
+
1481
+ // If HIGH or CRITICAL risk, require confirmation
1482
+ if (safety.requiresConfirmation) {
1483
+ console.log(`[AI-SERVICE] Action ${i} requires user confirmation`);
1484
+
1485
+ // Store as pending action
1486
+ setPendingAction({
1487
+ ...safety,
1488
+ actionIndex: i,
1489
+ remainingActions: actionData.actions.slice(i),
1490
+ completedResults: [...results],
1491
+ thought: actionData.thought,
1492
+ verification: actionData.verification
1493
+ });
1494
+
1495
+ // Notify via callback
1496
+ if (onRequireConfirmation) {
1497
+ onRequireConfirmation(safety);
1498
+ }
1499
+
1500
+ pendingConfirmation = true;
1501
+ break; // Stop execution, wait for confirmation
1502
+ }
1503
+
1504
+ // Execute the action (SAFE/LOW/MEDIUM risk)
1505
+ const result = await (actionExecutor ? actionExecutor(action) : systemAutomation.executeAction(action));
1506
+ result.reason = action.reason || '';
1507
+ result.safety = safety;
1508
+ results.push(result);
1509
+
1510
+ // Callback for UI updates
1511
+ if (onAction) {
1512
+ onAction(result, i, actionData.actions.length);
1513
+ }
1514
+
1515
+ // Stop on failure unless action specifies continue_on_error
1516
+ if (!result.success && !action.continue_on_error) {
1517
+ console.log(`[AI-SERVICE] Sequence stopped at action ${i} due to error`);
1518
+ break;
1519
+ }
1520
+ }
1521
+
1522
+ return {
1523
+ success: !pendingConfirmation && results.every(r => r.success),
1524
+ thought: actionData.thought,
1525
+ verification: actionData.verification,
1526
+ results,
1527
+ screenshotRequested,
1528
+ pendingConfirmation,
1529
+ pendingActionId: pendingConfirmation ? getPendingAction()?.actionId : null
1530
+ };
1531
+ }
1532
+
1533
+ /**
1534
+ * Resume execution after user confirms pending action
1535
+ * @param {Function} onAction - Callback after each action
1536
+ * @param {Function} onScreenshot - Callback when screenshot is needed
1537
+ * @returns {Object} Execution results
1538
+ */
1539
+ async function resumeAfterConfirmation(onAction = null, onScreenshot = null, options = {}) {
1540
+ const pending = getPendingAction();
1541
+ if (!pending) {
1542
+ return { success: false, error: 'No pending action to resume' };
1543
+ }
1544
+
1545
+ const { actionExecutor } = options;
1546
+
1547
+ console.log('[AI-SERVICE] Resuming after user confirmation');
1548
+
1549
+ const results = [...pending.completedResults];
1550
+ let screenshotRequested = false;
1551
+
1552
+ // Execute the confirmed action and remaining actions
1553
+ for (let i = 0; i < pending.remainingActions.length; i++) {
1554
+ const action = pending.remainingActions[i];
1555
+
1556
+ if (action.type === 'screenshot') {
1557
+ screenshotRequested = true;
1558
+ if (onScreenshot) {
1559
+ await onScreenshot();
1560
+ }
1561
+ results.push({ success: true, action: 'screenshot', message: 'Screenshot captured' });
1562
+ continue;
1563
+ }
1564
+
1565
+ // Execute action (user confirmed, skip safety for first action)
1566
+ const result = await (actionExecutor ? actionExecutor(action) : systemAutomation.executeAction(action));
1567
+ result.reason = action.reason || '';
1568
+ result.userConfirmed = i === 0; // First one was confirmed
1569
+ results.push(result);
1570
+
1571
+ if (onAction) {
1572
+ onAction(result, pending.actionIndex + i, pending.actionIndex + pending.remainingActions.length);
1573
+ }
1574
+
1575
+ if (!result.success && !action.continue_on_error) {
1576
+ break;
1577
+ }
1578
+ }
1579
+
1580
+ clearPendingAction();
1581
+
1582
+ return {
1583
+ success: results.every(r => r.success),
1584
+ thought: pending.thought,
1585
+ verification: pending.verification,
1586
+ results,
1587
+ screenshotRequested,
1588
+ userConfirmed: true
1589
+ };
1590
+ }
1591
+
1592
+ /**
1593
+ * Convert grid coordinate to pixel position
1594
+ */
1595
+ function gridToPixels(coord) {
1596
+ return systemAutomation.gridToPixels(coord);
1597
+ }
1598
+
1599
+ module.exports = {
1600
+ setProvider,
1601
+ setApiKey,
1602
+ setCopilotModel,
1603
+ getCopilotModels,
1604
+ getCurrentCopilotModel,
1605
+ getModelMetadata,
1606
+ addVisualContext,
1607
+ getLatestVisualContext,
1608
+ clearVisualContext,
1609
+ sendMessage,
1610
+ handleCommand,
1611
+ getStatus,
1612
+ startCopilotOAuth,
1613
+ setOAuthCallback,
1614
+ loadCopilotToken,
1615
+ AI_PROVIDERS,
1616
+ COPILOT_MODELS,
1617
+ // Agentic capabilities
1618
+ parseActions,
1619
+ hasActions,
1620
+ executeActions,
1621
+ gridToPixels,
1622
+ systemAutomation,
1623
+ // Safety guardrails
1624
+ ActionRiskLevel,
1625
+ analyzeActionSafety,
1626
+ describeAction,
1627
+ setPendingAction,
1628
+ getPendingAction,
1629
+ clearPendingAction,
1630
+ confirmPendingAction,
1631
+ rejectPendingAction,
1632
+ resumeAfterConfirmation
1633
+ };