copilot-liku-cli 0.0.3 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/QUICKSTART.md +24 -0
  2. package/README.md +85 -33
  3. package/package.json +23 -14
  4. package/scripts/postinstall.js +63 -0
  5. package/src/cli/commands/window.js +66 -0
  6. package/src/main/agents/base-agent.js +15 -7
  7. package/src/main/agents/builder.js +211 -0
  8. package/src/main/agents/index.js +7 -4
  9. package/src/main/agents/orchestrator.js +13 -0
  10. package/src/main/agents/producer.js +891 -0
  11. package/src/main/agents/researcher.js +78 -0
  12. package/src/main/agents/state-manager.js +134 -2
  13. package/src/main/agents/verifier.js +201 -0
  14. package/src/main/ai-service.js +349 -35
  15. package/src/main/index.js +702 -113
  16. package/src/main/inspect-service.js +24 -1
  17. package/src/main/python-bridge.js +395 -0
  18. package/src/main/system-automation.js +876 -131
  19. package/src/main/ui-automation/core/ui-provider.js +99 -0
  20. package/src/main/ui-automation/core/uia-host.js +214 -0
  21. package/src/main/ui-automation/index.js +30 -0
  22. package/src/main/ui-automation/interactions/element-click.js +6 -6
  23. package/src/main/ui-automation/interactions/high-level.js +28 -6
  24. package/src/main/ui-automation/interactions/index.js +21 -0
  25. package/src/main/ui-automation/interactions/pattern-actions.js +236 -0
  26. package/src/main/ui-automation/window/index.js +6 -0
  27. package/src/main/ui-automation/window/manager.js +173 -26
  28. package/src/main/ui-watcher.js +401 -58
  29. package/src/main/visual-awareness.js +18 -1
  30. package/src/native/windows-uia/Program.cs +89 -0
  31. package/src/native/windows-uia/build.ps1 +24 -0
  32. package/src/native/windows-uia-dotnet/Program.cs +920 -0
  33. package/src/native/windows-uia-dotnet/WindowsUIA.csproj +11 -0
  34. package/src/native/windows-uia-dotnet/build.ps1 +24 -0
  35. package/src/renderer/chat/chat.js +915 -671
  36. package/src/renderer/chat/index.html +2 -4
  37. package/src/renderer/chat/preload.js +8 -1
  38. package/src/renderer/overlay/overlay.js +157 -8
  39. package/src/renderer/overlay/preload.js +4 -0
  40. package/src/shared/inspect-types.js +82 -6
  41. package/ARCHITECTURE.md +0 -411
  42. package/CONFIGURATION.md +0 -302
  43. package/CONTRIBUTING.md +0 -225
  44. package/ELECTRON_README.md +0 -121
  45. package/PROJECT_STATUS.md +0 -229
  46. package/TESTING.md +0 -274
@@ -29,16 +29,116 @@ function getInspectService() {
29
29
  return inspectService;
30
30
  }
31
31
 
32
- // Lazy-load UI watcher for live UI context
32
+ // Shared UI watcher for live UI context (set by index.js after starting)
33
33
  let uiWatcher = null;
34
+ let semanticDomSnapshot = null;
35
+ let semanticDomUpdatedAt = 0;
36
+ const SEMANTIC_DOM_MAX_DEPTH = 4;
37
+ const SEMANTIC_DOM_MAX_NODES = 120;
38
+ const SEMANTIC_DOM_MAX_CHARS = 3500;
39
+ const SEMANTIC_DOM_MAX_AGE_MS = 5000;
40
+
41
+ /**
42
+ * Set the shared UI watcher instance (called from index.js)
43
+ */
44
+ function setUIWatcher(watcher) {
45
+ uiWatcher = watcher;
46
+ console.log('[AI-SERVICE] UI Watcher connected');
47
+ }
48
+
34
49
  function getUIWatcher() {
35
- if (!uiWatcher) {
36
- const { UIWatcher } = require('./ui-watcher');
37
- uiWatcher = new UIWatcher();
38
- }
39
50
  return uiWatcher;
40
51
  }
41
52
 
53
+ function setSemanticDOMSnapshot(tree) {
54
+ semanticDomSnapshot = tree || null;
55
+ semanticDomUpdatedAt = Date.now();
56
+ }
57
+
58
+ function clearSemanticDOMSnapshot() {
59
+ semanticDomSnapshot = null;
60
+ semanticDomUpdatedAt = 0;
61
+ }
62
+
63
+ function pruneSemanticTree(root) {
64
+ const results = [];
65
+
66
+ function walk(node, depth = 0) {
67
+ if (!node || depth > SEMANTIC_DOM_MAX_DEPTH || results.length >= SEMANTIC_DOM_MAX_NODES) {
68
+ return;
69
+ }
70
+
71
+ const bounds = node.bounds || {};
72
+ const isInteractive = !!node.isClickable || !!node.isFocusable;
73
+ const hasName = typeof node.name === 'string' && node.name.trim().length > 0;
74
+ const hasValidBounds = [bounds.x, bounds.y, bounds.width, bounds.height].every(Number.isFinite)
75
+ && bounds.width > 0
76
+ && bounds.height > 0;
77
+
78
+ if ((isInteractive || hasName) && hasValidBounds) {
79
+ results.push({
80
+ id: node.id || '',
81
+ name: hasName ? node.name.trim().slice(0, 64) : '',
82
+ role: node.role || 'Unknown',
83
+ bounds: {
84
+ x: Math.round(bounds.x),
85
+ y: Math.round(bounds.y),
86
+ width: Math.round(bounds.width),
87
+ height: Math.round(bounds.height)
88
+ },
89
+ isClickable: !!node.isClickable,
90
+ isFocusable: !!node.isFocusable
91
+ });
92
+ }
93
+
94
+ if (Array.isArray(node.children)) {
95
+ for (const child of node.children) {
96
+ if (results.length >= SEMANTIC_DOM_MAX_NODES) break;
97
+ walk(child, depth + 1);
98
+ }
99
+ }
100
+ }
101
+
102
+ walk(root, 0);
103
+ return results;
104
+ }
105
+
106
+ function getSemanticDOMContextText() {
107
+ if (!semanticDomSnapshot || !semanticDomUpdatedAt) {
108
+ return '';
109
+ }
110
+
111
+ if ((Date.now() - semanticDomUpdatedAt) > SEMANTIC_DOM_MAX_AGE_MS) {
112
+ return '';
113
+ }
114
+
115
+ const nodes = pruneSemanticTree(semanticDomSnapshot);
116
+ if (!nodes.length) {
117
+ return '';
118
+ }
119
+
120
+ const lines = [];
121
+ for (let i = 0; i < nodes.length; i++) {
122
+ const node = nodes[i];
123
+ const namePart = node.name ? ` \"${node.name}\"` : '';
124
+ const idPart = node.id ? ` id=${node.id}` : '';
125
+ const flags = [node.isClickable ? 'clickable' : null, node.isFocusable ? 'focusable' : null]
126
+ .filter(Boolean)
127
+ .join(',');
128
+ const flagPart = flags ? ` [${flags}]` : '';
129
+ lines.push(
130
+ `- [${i + 1}] ${node.role}${namePart}${idPart} at (${node.bounds.x}, ${node.bounds.y}, ${node.bounds.width}, ${node.bounds.height})${flagPart}`
131
+ );
132
+ }
133
+
134
+ let text = `\n\n## Semantic DOM (grounded accessibility tree)\n${lines.join('\n')}`;
135
+ if (text.length > SEMANTIC_DOM_MAX_CHARS) {
136
+ text = `${text.slice(0, SEMANTIC_DOM_MAX_CHARS)}\n... (truncated)`;
137
+ }
138
+
139
+ return text;
140
+ }
141
+
42
142
  // ===== CONFIGURATION =====
43
143
 
44
144
  // Available models for GitHub Copilot (based on Copilot CLI changelog)
@@ -107,8 +207,9 @@ let currentModelMetadata = {
107
207
  lastUpdated: new Date().toISOString()
108
208
  };
109
209
 
110
- // Token persistence path
111
- const TOKEN_FILE = path.join(process.env.APPDATA || process.env.HOME || '.', 'copilot-agent', 'copilot-token.json');
210
+ // Token persistence path — lives inside ~/.liku-cli/ alongside Electron userData
211
+ const LIKU_HOME = path.join(os.homedir(), '.liku-cli');
212
+ const TOKEN_FILE = path.join(LIKU_HOME, 'copilot-token.json');
112
213
 
113
214
  // OAuth state
114
215
  let oauthInProgress = false;
@@ -180,13 +281,29 @@ const SYSTEM_PROMPT = `You are Liku, an intelligent AGENTIC AI assistant integra
180
281
 
181
282
  ${getPlatformContext()}
182
283
 
284
+ ## LIVE UI AWARENESS (CRITICAL - READ THIS!)
285
+
286
+ The user will provide a **Live UI State** section in their messages. This section lists visible UI elements detected on the screen.
287
+ Format: \`- [Index] Type: "Name" at (x, y)\`
288
+
289
+ ⚠️ **HOW TO USE LIVE UI STATE:**
290
+ 1. **Identify Elements**: Use the numeric [Index] or Name to identify elements.
291
+ 2. **Clicking**: To click an element from the list, PREFER using its coordinates provided in the entry:
292
+ - Example Entry: \`- [42] Button: "Submit" at (500, 300)\`
293
+ - Action: \`{"type": "click", "x": 500, "y": 300, "reason": "Click Submit button [42]"}\`
294
+ - Alternatively: \`{"type": "click_element", "text": "Submit"}\` works if the name is unique.
295
+ 3. **Context**: Group elements by their Window header to understand which application they belong to.
296
+
297
+ ⚠️ **DO NOT REQUEST SCREENSHOTS** to find standard UI elements - check the Live UI State first.
298
+
299
+ **TO LIST ELEMENTS**: Read the Live UI State section and list what's there (e.g., "I see a 'Save' button at index [15]").
300
+
183
301
  ## Your Core Capabilities
184
302
 
185
- 1. **Screen Vision**: When the user captures their screen, you receive it as an image. ALWAYS analyze visible content immediately.
303
+ 1. **Screen Vision**: When the user captures their screen, you receive it as an image. Use this for spatial/visual tasks. For element-based tasks, the Live UI State is sufficient.
186
304
 
187
- 2. **SEMANTIC ELEMENT ACTIONS (PREFERRED!)**: You can interact with UI elements by their text/name - MORE RELIABLE than coordinates:
305
+ 2. **SEMANTIC ELEMENT ACTIONS**: You can interact with UI elements by their text/name:
188
306
  - \`{"type": "click_element", "text": "Submit", "reason": "Click Submit button"}\` - Finds and clicks element by text
189
- - \`{"type": "find_element", "text": "Save", "reason": "Locate Save button"}\` - Finds element info
190
307
 
191
308
  3. **Grid Coordinate System**: The screen has a dot grid overlay:
192
309
  - **Columns**: Letters A, B, C, D... (left to right), spacing 100px
@@ -229,6 +346,12 @@ When the user asks you to DO something, respond with a JSON action block:
229
346
  - \`{"type": "drag", "fromX": <n>, "fromY": <n>, "toX": <n>, "toY": <n>}\` - Drag
230
347
  - \`{"type": "wait", "ms": <number>}\` - Wait milliseconds (IMPORTANT: add waits between multi-step actions!)
231
348
  - \`{"type": "screenshot"}\` - Take screenshot to verify result
349
+ - \`{"type": "focus_window", "windowHandle": <number>}\` - Bring a window to the foreground (use if target is in background)
350
+ - \`{"type": "bring_window_to_front", "title": "<partial title>"}\` - Bring matching background app to foreground
351
+ - \`{"type": "send_window_to_back", "title": "<partial title>"}\` - Push matching window behind others without activating
352
+ - \`{"type": "minimize_window", "title": "<partial title>"}\` - Minimize a specific window
353
+ - \`{"type": "restore_window", "title": "<partial title>"}\` - Restore a minimized window
354
+ - \`{"type": "run_command", "command": "<shell command>", "cwd": "<optional path>", "shell": "powershell|cmd|bash"}\` - **PREFERRED FOR SHELL TASKS**: Execute shell command directly and return output (timeout: 30s)
232
355
 
233
356
  ### Grid to Pixel Conversion:
234
357
  - A0 → (50, 50), B0 → (150, 50), C0 → (250, 50)
@@ -251,15 +374,19 @@ When the user asks you to DO something, respond with a JSON action block:
251
374
 
252
375
  **Common Task Patterns**:
253
376
  ${PLATFORM === 'win32' ? `
254
- - **Open new terminal**: Use \`win+x\` then \`i\` (or \`win+r\` → type "wt" \`enter\`)
377
+ - **Run shell commands**: Use \`run_command\` action - e.g., \`{"type": "run_command", "command": "Get-Process | Select-Object -First 5"}\`
378
+ - **List files**: \`{"type": "run_command", "command": "dir", "cwd": "C:\\\\Users"}\` or \`{"type": "run_command", "command": "Get-ChildItem"}\`
379
+ - **Open terminal GUI**: Use \`win+x\` then \`i\` (or \`win+r\` → type "wt" → \`enter\`) - only if user wants visible terminal
255
380
  - **Open application**: Use \`win\` key, type app name, press \`enter\`
256
381
  - **Save file**: \`ctrl+s\`
257
382
  - **Copy/Paste**: \`ctrl+c\` / \`ctrl+v\`` : PLATFORM === 'darwin' ? `
258
- - **Open terminal**: \`cmd+space\`, type "Terminal", \`enter\`
383
+ - **Run shell commands**: Use \`run_command\` action - e.g., \`{"type": "run_command", "command": "ls -la", "shell": "bash"}\`
384
+ - **Open terminal GUI**: \`cmd+space\`, type "Terminal", \`enter\` - only if user wants visible terminal
259
385
  - **Open application**: \`cmd+space\`, type app name, \`enter\`
260
386
  - **Save file**: \`cmd+s\`
261
387
  - **Copy/Paste**: \`cmd+c\` / \`cmd+v\`` : `
262
- - **Open terminal**: \`ctrl+alt+t\`
388
+ - **Run shell commands**: Use \`run_command\` action - e.g., \`{"type": "run_command", "command": "ls -la", "shell": "bash"}\`
389
+ - **Open terminal GUI**: \`ctrl+alt+t\` - only if user wants visible terminal
263
390
  - **Open application**: \`super\` key, type name, \`enter\`
264
391
  - **Save file**: \`ctrl+s\`
265
392
  - **Copy/Paste**: \`ctrl+c\` / \`ctrl+v\``}
@@ -308,6 +435,16 @@ function setCopilotModel(model) {
308
435
  return false;
309
436
  }
310
437
 
438
+ /**
439
+ * Resolve a requested Copilot model key to a valid configured key.
440
+ */
441
+ function resolveCopilotModelKey(requestedModel) {
442
+ if (requestedModel && COPILOT_MODELS[requestedModel]) {
443
+ return requestedModel;
444
+ }
445
+ return currentCopilotModel;
446
+ }
447
+
311
448
  /**
312
449
  * Get available Copilot models
313
450
  */
@@ -338,13 +475,14 @@ function getCurrentCopilotModel() {
338
475
  }
339
476
 
340
477
  /**
341
- * Add visual context (screenshot data)
478
+ * Add visual context (screenshot data) as a typed VisualFrame
479
+ * @param {Object} imageData - Raw image data with dataURL, width, height, etc.
342
480
  */
343
481
  function addVisualContext(imageData) {
344
- visualContextBuffer.push({
345
- ...imageData,
346
- addedAt: Date.now()
347
- });
482
+ const { createVisualFrame } = require('../shared/inspect-types');
483
+ const frame = createVisualFrame(imageData);
484
+ frame.addedAt = Date.now();
485
+ visualContextBuffer.push(frame);
348
486
 
349
487
  // Keep only recent visual context
350
488
  while (visualContextBuffer.length > MAX_VISUAL_CONTEXT) {
@@ -415,19 +553,24 @@ ${inspectContext.regions.slice(0, 20).map((r, i) =>
415
553
  let liveUIContextText = '';
416
554
  try {
417
555
  const watcher = getUIWatcher();
418
- if (watcher && watcher.isRunning) {
556
+ if (watcher && watcher.isPolling) {
419
557
  const uiContext = watcher.getContextForAI();
420
558
  if (uiContext && uiContext.trim()) {
421
- liveUIContextText = `\n\n${uiContext}`;
422
- console.log('[AI] Including live UI context from watcher');
559
+ // Frame the context as trustworthy real-time data
560
+ liveUIContextText = `\n\n---\n🔴 **LIVE UI STATE** (auto-refreshed every 400ms - TRUST THIS DATA!)\n${uiContext}\n---`;
561
+ console.log('[AI] Including live UI context from watcher (', uiContext.split('\n').length, 'lines)');
423
562
  }
563
+ } else {
564
+ console.log('[AI] UI Watcher not available or not running (watcher:', !!watcher, ', polling:', watcher?.isPolling, ')');
424
565
  }
425
566
  } catch (e) {
426
567
  console.warn('[AI] Could not get live UI context:', e.message);
427
568
  }
569
+
570
+ const semanticDOMContextText = getSemanticDOMContextText();
428
571
 
429
- const enhancedMessage = inspectContextText || liveUIContextText
430
- ? `${userMessage}${inspectContextText}${liveUIContextText}`
572
+ const enhancedMessage = inspectContextText || liveUIContextText || semanticDOMContextText
573
+ ? `${userMessage}${inspectContextText}${liveUIContextText}${semanticDOMContextText}`
431
574
  : userMessage;
432
575
 
433
576
  if (latestVisual && (currentProvider === 'copilot' || currentProvider === 'openai')) {
@@ -484,10 +627,26 @@ ${inspectContext.regions.slice(0, 20).map((r, i) =>
484
627
  // ===== GITHUB COPILOT OAUTH =====
485
628
 
486
629
  /**
487
- * Load saved Copilot token from disk
630
+ * Load saved Copilot token from disk.
631
+ * On first run after the path migration, copies the token from the
632
+ * legacy location (%APPDATA%/copilot-agent/) to ~/.liku-cli/.
488
633
  */
489
634
  function loadCopilotToken() {
490
635
  try {
636
+ // Migrate from legacy path if new location is empty
637
+ if (!fs.existsSync(TOKEN_FILE)) {
638
+ const legacyPath = path.join(
639
+ process.env.APPDATA || process.env.HOME || '.',
640
+ 'copilot-agent', 'copilot-token.json'
641
+ );
642
+ if (fs.existsSync(legacyPath)) {
643
+ const dir = path.dirname(TOKEN_FILE);
644
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
645
+ fs.copyFileSync(legacyPath, TOKEN_FILE);
646
+ console.log('[COPILOT] Migrated token from legacy path');
647
+ }
648
+ }
649
+
491
650
  if (fs.existsSync(TOKEN_FILE)) {
492
651
  const data = JSON.parse(fs.readFileSync(TOKEN_FILE, 'utf8'));
493
652
  if (data.access_token) {
@@ -731,7 +890,7 @@ function exchangeForCopilotSession() {
731
890
  * Call GitHub Copilot API
732
891
  * Uses session token (not OAuth token) - exchanges if needed
733
892
  */
734
- async function callCopilot(messages) {
893
+ async function callCopilot(messages, modelOverride = null) {
735
894
  // Ensure we have OAuth token
736
895
  if (!apiKeys.copilot) {
737
896
  if (!loadCopilotToken()) {
@@ -750,10 +909,11 @@ async function callCopilot(messages) {
750
909
 
751
910
  return new Promise((resolve, reject) => {
752
911
  const hasVision = messages.some(m => Array.isArray(m.content));
753
- const modelInfo = COPILOT_MODELS[currentCopilotModel] || COPILOT_MODELS['gpt-4o'];
912
+ const modelKey = resolveCopilotModelKey(modelOverride);
913
+ const modelInfo = COPILOT_MODELS[modelKey] || COPILOT_MODELS['gpt-4o'];
754
914
  const modelId = hasVision && !modelInfo.vision ? 'gpt-4o' : modelInfo.id;
755
915
 
756
- console.log(`[Copilot] Vision request: ${hasVision}, Model: ${modelId}`);
916
+ console.log(`[Copilot] Vision request: ${hasVision}, Model: ${modelId} (key=${modelKey})`);
757
917
 
758
918
  const data = JSON.stringify({
759
919
  model: modelId,
@@ -1049,10 +1209,36 @@ function callOllama(messages) {
1049
1209
  }
1050
1210
 
1051
1211
  /**
1052
- * Send a message and get AI response
1212
+ * Detect if AI response was truncated mid-stream
1213
+ * Uses heuristics to identify incomplete responses
1214
+ */
1215
+ function detectTruncation(response) {
1216
+ if (!response || response.length < 100) return false;
1217
+
1218
+ const truncationSignals = [
1219
+ // Ends mid-JSON block
1220
+ /```json\s*\{[^}]*$/s.test(response),
1221
+ // Ends with unclosed code block
1222
+ (response.match(/```/g) || []).length % 2 !== 0,
1223
+ // Ends mid-sentence (lowercase letter or comma, no terminal punctuation)
1224
+ /[a-z,]\s*$/i.test(response) && !/[.!?:]\s*$/i.test(response),
1225
+ // Ends with numbered list item starting
1226
+ /\d+\.\s*$/m.test(response),
1227
+ // Ends with "- " suggesting incomplete list item
1228
+ /-\s*$/m.test(response),
1229
+ // Has unclosed parentheses/brackets
1230
+ (response.match(/\(/g) || []).length > (response.match(/\)/g) || []).length,
1231
+ (response.match(/\[/g) || []).length > (response.match(/\]/g) || []).length
1232
+ ];
1233
+
1234
+ return truncationSignals.some(Boolean);
1235
+ }
1236
+
1237
+ /**
1238
+ * Send a message and get AI response with auto-continuation
1053
1239
  */
1054
1240
  async function sendMessage(userMessage, options = {}) {
1055
- const { includeVisualContext = false, coordinates = null } = options;
1241
+ const { includeVisualContext = false, coordinates = null, maxContinuations = 2, model = null } = options;
1056
1242
 
1057
1243
  // Enhance message with coordinate context if provided
1058
1244
  let enhancedMessage = userMessage;
@@ -1065,6 +1251,7 @@ async function sendMessage(userMessage, options = {}) {
1065
1251
 
1066
1252
  try {
1067
1253
  let response;
1254
+ let effectiveModel = currentCopilotModel;
1068
1255
 
1069
1256
  switch (currentProvider) {
1070
1257
  case 'copilot':
@@ -1075,7 +1262,14 @@ async function sendMessage(userMessage, options = {}) {
1075
1262
  throw new Error('Not authenticated with GitHub Copilot.\n\nTo authenticate:\n1. Type /login and authorize in browser\n2. Or set GH_TOKEN or GITHUB_TOKEN environment variable');
1076
1263
  }
1077
1264
  }
1078
- response = await callCopilot(messages);
1265
+ effectiveModel = resolveCopilotModelKey(model);
1266
+ // Enforce vision-capable model when visual context is included
1267
+ if (includeVisualContext && COPILOT_MODELS[effectiveModel] && !COPILOT_MODELS[effectiveModel].vision) {
1268
+ const visionFallback = AI_PROVIDERS.copilot.visionModel || 'gpt-4o';
1269
+ console.log(`[AI] Model ${effectiveModel} lacks vision, upgrading to ${visionFallback} for visual context`);
1270
+ effectiveModel = visionFallback;
1271
+ }
1272
+ response = await callCopilot(messages, effectiveModel);
1079
1273
  break;
1080
1274
 
1081
1275
  case 'openai':
@@ -1098,6 +1292,50 @@ async function sendMessage(userMessage, options = {}) {
1098
1292
  break;
1099
1293
  }
1100
1294
 
1295
+ // Auto-continuation for truncated responses
1296
+ let fullResponse = response;
1297
+ let continuationCount = 0;
1298
+
1299
+ while (detectTruncation(fullResponse) && continuationCount < maxContinuations) {
1300
+ continuationCount++;
1301
+ console.log(`[AI] Response appears truncated, continuing (${continuationCount}/${maxContinuations})...`);
1302
+
1303
+ // Add partial response to history temporarily
1304
+ conversationHistory.push({ role: 'assistant', content: fullResponse });
1305
+
1306
+ // Build continuation request
1307
+ const continueMessages = buildMessages('Continue from where you left off. Do not repeat what you already said.', false);
1308
+
1309
+ try {
1310
+ let continuation;
1311
+ switch (currentProvider) {
1312
+ case 'copilot':
1313
+ continuation = await callCopilot(continueMessages, effectiveModel);
1314
+ break;
1315
+ case 'openai':
1316
+ continuation = await callOpenAI(continueMessages);
1317
+ break;
1318
+ case 'anthropic':
1319
+ continuation = await callAnthropic(continueMessages);
1320
+ break;
1321
+ case 'ollama':
1322
+ default:
1323
+ continuation = await callOllama(continueMessages);
1324
+ }
1325
+
1326
+ // Append continuation
1327
+ fullResponse += '\n' + continuation;
1328
+
1329
+ // Update history with combined response
1330
+ conversationHistory.pop(); // Remove partial
1331
+ } catch (contErr) {
1332
+ console.warn('[AI] Continuation failed:', contErr.message);
1333
+ break;
1334
+ }
1335
+ }
1336
+
1337
+ response = fullResponse;
1338
+
1101
1339
  // Add to conversation history
1102
1340
  conversationHistory.push({ role: 'user', content: enhancedMessage });
1103
1341
  conversationHistory.push({ role: 'assistant', content: response });
@@ -1111,6 +1349,8 @@ async function sendMessage(userMessage, options = {}) {
1111
1349
  success: true,
1112
1350
  message: response,
1113
1351
  provider: currentProvider,
1352
+ model: effectiveModel,
1353
+ modelVersion: COPILOT_MODELS[effectiveModel]?.id || null,
1114
1354
  hasVisualContext: includeVisualContext && visualContextBuffer.length > 0
1115
1355
  };
1116
1356
 
@@ -1118,7 +1358,8 @@ async function sendMessage(userMessage, options = {}) {
1118
1358
  return {
1119
1359
  success: false,
1120
1360
  error: error.message,
1121
- provider: currentProvider
1361
+ provider: currentProvider,
1362
+ model: resolveCopilotModelKey(model)
1122
1363
  };
1123
1364
  }
1124
1365
  }
@@ -1375,6 +1616,44 @@ function analyzeActionSafety(action, targetInfo = {}) {
1375
1616
  case 'drag':
1376
1617
  result.riskLevel = ActionRiskLevel.MEDIUM;
1377
1618
  break;
1619
+ case 'focus_window':
1620
+ case 'bring_window_to_front':
1621
+ result.riskLevel = ActionRiskLevel.LOW;
1622
+ break;
1623
+ case 'send_window_to_back':
1624
+ case 'minimize_window':
1625
+ case 'restore_window':
1626
+ result.riskLevel = ActionRiskLevel.LOW;
1627
+ break;
1628
+ case 'run_command':
1629
+ // Analyze command safety
1630
+ const cmd = (action.command || '').toLowerCase();
1631
+ const dangerousPatterns = [
1632
+ /\b(rm|del|erase|rmdir|rd)\s+(-[rf]+|\/[sq]+|\*)/i,
1633
+ /Remove-Item.*-Recurse.*-Force/i,
1634
+ /\bformat\s+[a-z]:/i, // Match "format C:" but not "Format-Table"
1635
+ /\b(shutdown|restart|reboot)\b/i,
1636
+ /\breg\s+(delete|add)\b/i,
1637
+ /\bnet\s+(user|localgroup)\b/i,
1638
+ /\b(sudo|runas)\b/i,
1639
+ /Start-Process.*-Verb\s+RunAs/i,
1640
+ /Set-ExecutionPolicy/i,
1641
+ /Stop-Process.*-Force/i,
1642
+ ];
1643
+
1644
+ const isDangerous = dangerousPatterns.some(p => p.test(action.command || ''));
1645
+ if (isDangerous) {
1646
+ result.riskLevel = ActionRiskLevel.CRITICAL;
1647
+ result.warnings.push('Potentially destructive command');
1648
+ result.requiresConfirmation = true;
1649
+ } else if (cmd.includes('rm ') || cmd.includes('del ') || cmd.includes('remove')) {
1650
+ result.riskLevel = ActionRiskLevel.HIGH;
1651
+ result.warnings.push('Command may delete files');
1652
+ result.requiresConfirmation = true;
1653
+ } else {
1654
+ result.riskLevel = ActionRiskLevel.MEDIUM;
1655
+ }
1656
+ break;
1378
1657
  }
1379
1658
 
1380
1659
  // Check target info for dangerous patterns
@@ -1449,6 +1728,16 @@ function describeAction(action, targetInfo = {}) {
1449
1728
  return `Scroll ${action.direction} ${action.amount || 3} times`;
1450
1729
  case 'drag':
1451
1730
  return `Drag from (${action.fromX}, ${action.fromY}) to (${action.toX}, ${action.toY})`;
1731
+ case 'focus_window':
1732
+ return `Focus window ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
1733
+ case 'bring_window_to_front':
1734
+ return `Bring window to front ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
1735
+ case 'send_window_to_back':
1736
+ return `Send window to back ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
1737
+ case 'minimize_window':
1738
+ return `Minimize window ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
1739
+ case 'restore_window':
1740
+ return `Restore window ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
1452
1741
  case 'wait':
1453
1742
  return `Wait ${action.ms}ms`;
1454
1743
  case 'screenshot':
@@ -1539,7 +1828,7 @@ async function executeActions(actionData, onAction = null, onScreenshot = null,
1539
1828
  return { success: false, error: 'No valid actions provided' };
1540
1829
  }
1541
1830
 
1542
- const { onRequireConfirmation, targetAnalysis = {}, actionExecutor } = options;
1831
+ const { onRequireConfirmation, targetAnalysis = {}, actionExecutor, skipSafetyConfirmation = false } = options;
1543
1832
 
1544
1833
  console.log('[AI-SERVICE] Executing actions:', actionData.thought || 'No thought provided');
1545
1834
  console.log('[AI-SERVICE] Actions:', JSON.stringify(actionData.actions, null, 2));
@@ -1573,8 +1862,8 @@ async function executeActions(actionData, onAction = null, onScreenshot = null,
1573
1862
  const safety = analyzeActionSafety(action, targetInfo);
1574
1863
  console.log(`[AI-SERVICE] Action ${i} safety: ${safety.riskLevel}`, safety.warnings);
1575
1864
 
1576
- // If HIGH or CRITICAL risk, require confirmation
1577
- if (safety.requiresConfirmation) {
1865
+ // If HIGH or CRITICAL risk, require confirmation (unless user already confirmed via Execute button)
1866
+ if (safety.requiresConfirmation && !skipSafetyConfirmation) {
1578
1867
  console.log(`[AI-SERVICE] Action ${i} requires user confirmation`);
1579
1868
 
1580
1869
  // Store as pending action
@@ -1595,8 +1884,28 @@ async function executeActions(actionData, onAction = null, onScreenshot = null,
1595
1884
  pendingConfirmation = true;
1596
1885
  break; // Stop execution, wait for confirmation
1597
1886
  }
1887
+
1888
+ if (skipSafetyConfirmation && safety.requiresConfirmation) {
1889
+ console.log(`[AI-SERVICE] Action ${i} safety bypassed (user pre-confirmed via Execute button)`);
1890
+ }
1598
1891
 
1599
1892
  // Execute the action (SAFE/LOW/MEDIUM risk)
1893
+ // AUTO-FOCUS: Check if this is an interaction that requires window focus (click/type)
1894
+ // and if the target window is in the background.
1895
+ if ((action.type === 'click' || action.type === 'double_click' || action.type === 'right_click') && action.x !== undefined) {
1896
+ if (uiWatcher && uiWatcher.isPolling) {
1897
+ const elementAtPoint = uiWatcher.getElementAtPoint(action.x, action.y);
1898
+ if (elementAtPoint && elementAtPoint.windowHandle) {
1899
+ // Found an element with a known window handle
1900
+ // Focus it first to ensure click goes to the right window (not trapped by overlay or obscuring window)
1901
+ // We can call systemAutomation.focusWindow directly
1902
+ console.log(`[AI-SERVICE] Auto-focusing window handle ${elementAtPoint.windowHandle} for click at (${action.x}, ${action.y})`);
1903
+ await systemAutomation.focusWindow(elementAtPoint.windowHandle);
1904
+ await new Promise(r => setTimeout(r, 450)); // Wait for window animation/focus settling
1905
+ }
1906
+ }
1907
+ }
1908
+
1600
1909
  const result = await (actionExecutor ? actionExecutor(action) : systemAutomation.executeAction(action));
1601
1910
  result.reason = action.reason || '';
1602
1911
  result.safety = safety;
@@ -1724,5 +2033,10 @@ module.exports = {
1724
2033
  clearPendingAction,
1725
2034
  confirmPendingAction,
1726
2035
  rejectPendingAction,
1727
- resumeAfterConfirmation
2036
+ resumeAfterConfirmation,
2037
+ // UI awareness
2038
+ setUIWatcher,
2039
+ getUIWatcher,
2040
+ setSemanticDOMSnapshot,
2041
+ clearSemanticDOMSnapshot
1728
2042
  };