npm - copilot-liku-cli - Versions diffs - 0.0.3 → 0.0.8 - Mend

copilot-liku-cli 0.0.3 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/QUICKSTART.md +24 -0
package/README.md +85 -33
package/package.json +23 -14
package/scripts/postinstall.js +63 -0
package/src/cli/commands/window.js +66 -0
package/src/main/agents/base-agent.js +15 -7
package/src/main/agents/builder.js +211 -0
package/src/main/agents/index.js +7 -4
package/src/main/agents/orchestrator.js +13 -0
package/src/main/agents/producer.js +891 -0
package/src/main/agents/researcher.js +78 -0
package/src/main/agents/state-manager.js +134 -2
package/src/main/agents/verifier.js +201 -0
package/src/main/ai-service.js +349 -35
package/src/main/index.js +702 -113
package/src/main/inspect-service.js +24 -1
package/src/main/python-bridge.js +395 -0
package/src/main/system-automation.js +876 -131
package/src/main/ui-automation/core/ui-provider.js +99 -0
package/src/main/ui-automation/core/uia-host.js +214 -0
package/src/main/ui-automation/index.js +30 -0
package/src/main/ui-automation/interactions/element-click.js +6 -6
package/src/main/ui-automation/interactions/high-level.js +28 -6
package/src/main/ui-automation/interactions/index.js +21 -0
package/src/main/ui-automation/interactions/pattern-actions.js +236 -0
package/src/main/ui-automation/window/index.js +6 -0
package/src/main/ui-automation/window/manager.js +173 -26
package/src/main/ui-watcher.js +401 -58
package/src/main/visual-awareness.js +18 -1
package/src/native/windows-uia/Program.cs +89 -0
package/src/native/windows-uia/build.ps1 +24 -0
package/src/native/windows-uia-dotnet/Program.cs +920 -0
package/src/native/windows-uia-dotnet/WindowsUIA.csproj +11 -0
package/src/native/windows-uia-dotnet/build.ps1 +24 -0
package/src/renderer/chat/chat.js +915 -671
package/src/renderer/chat/index.html +2 -4
package/src/renderer/chat/preload.js +8 -1
package/src/renderer/overlay/overlay.js +157 -8
package/src/renderer/overlay/preload.js +4 -0
package/src/shared/inspect-types.js +82 -6
package/ARCHITECTURE.md +0 -411
package/CONFIGURATION.md +0 -302
package/CONTRIBUTING.md +0 -225
package/ELECTRON_README.md +0 -121
package/PROJECT_STATUS.md +0 -229
package/TESTING.md +0 -274

package/src/main/ai-service.js CHANGED Viewed

@@ -29,16 +29,116 @@ function getInspectService() {
   return inspectService;
 }
-// Lazy-load UI watcher for live UI context
+// Shared UI watcher for live UI context (set by index.js after starting)
 let uiWatcher = null;
+let semanticDomSnapshot = null;
+let semanticDomUpdatedAt = 0;
+const SEMANTIC_DOM_MAX_DEPTH = 4;
+const SEMANTIC_DOM_MAX_NODES = 120;
+const SEMANTIC_DOM_MAX_CHARS = 3500;
+const SEMANTIC_DOM_MAX_AGE_MS = 5000;
+/**
+ * Set the shared UI watcher instance (called from index.js)
+ */
+function setUIWatcher(watcher) {
+  uiWatcher = watcher;
+  console.log('[AI-SERVICE] UI Watcher connected');
+}
 function getUIWatcher() {
-  if (!uiWatcher) {
-    const { UIWatcher } = require('./ui-watcher');
-    uiWatcher = new UIWatcher();
-  }
   return uiWatcher;
 }
+function setSemanticDOMSnapshot(tree) {
+  semanticDomSnapshot = tree || null;
+  semanticDomUpdatedAt = Date.now();
+}
+function clearSemanticDOMSnapshot() {
+  semanticDomSnapshot = null;
+  semanticDomUpdatedAt = 0;
+}
+function pruneSemanticTree(root) {
+  const results = [];
+  function walk(node, depth = 0) {
+    if (!node || depth > SEMANTIC_DOM_MAX_DEPTH || results.length >= SEMANTIC_DOM_MAX_NODES) {
+      return;
+    }
+    const bounds = node.bounds || {};
+    const isInteractive = !!node.isClickable || !!node.isFocusable;
+    const hasName = typeof node.name === 'string' && node.name.trim().length > 0;
+    const hasValidBounds = [bounds.x, bounds.y, bounds.width, bounds.height].every(Number.isFinite)
+      && bounds.width > 0
+      && bounds.height > 0;
+    if ((isInteractive || hasName) && hasValidBounds) {
+      results.push({
+        id: node.id || '',
+        name: hasName ? node.name.trim().slice(0, 64) : '',
+        role: node.role || 'Unknown',
+        bounds: {
+          x: Math.round(bounds.x),
+          y: Math.round(bounds.y),
+          width: Math.round(bounds.width),
+          height: Math.round(bounds.height)
+        },
+        isClickable: !!node.isClickable,
+        isFocusable: !!node.isFocusable
+      });
+    }
+    if (Array.isArray(node.children)) {
+      for (const child of node.children) {
+        if (results.length >= SEMANTIC_DOM_MAX_NODES) break;
+        walk(child, depth + 1);
+      }
+    }
+  }
+  walk(root, 0);
+  return results;
+}
+function getSemanticDOMContextText() {
+  if (!semanticDomSnapshot || !semanticDomUpdatedAt) {
+    return '';
+  }
+  if ((Date.now() - semanticDomUpdatedAt) > SEMANTIC_DOM_MAX_AGE_MS) {
+    return '';
+  }
+  const nodes = pruneSemanticTree(semanticDomSnapshot);
+  if (!nodes.length) {
+    return '';
+  }
+  const lines = [];
+  for (let i = 0; i < nodes.length; i++) {
+    const node = nodes[i];
+    const namePart = node.name ? ` \"${node.name}\"` : '';
+    const idPart = node.id ? ` id=${node.id}` : '';
+    const flags = [node.isClickable ? 'clickable' : null, node.isFocusable ? 'focusable' : null]
+      .filter(Boolean)
+      .join(',');
+    const flagPart = flags ? ` [${flags}]` : '';
+    lines.push(
+      `- [${i + 1}] ${node.role}${namePart}${idPart} at (${node.bounds.x}, ${node.bounds.y}, ${node.bounds.width}, ${node.bounds.height})${flagPart}`
+    );
+  }
+  let text = `\n\n## Semantic DOM (grounded accessibility tree)\n${lines.join('\n')}`;
+  if (text.length > SEMANTIC_DOM_MAX_CHARS) {
+    text = `${text.slice(0, SEMANTIC_DOM_MAX_CHARS)}\n... (truncated)`;
+  }
+  return text;
+}
 // ===== CONFIGURATION =====
 // Available models for GitHub Copilot (based on Copilot CLI changelog)
@@ -107,8 +207,9 @@ let currentModelMetadata = {
   lastUpdated: new Date().toISOString()
 };
-// Token persistence path
-const TOKEN_FILE = path.join(process.env.APPDATA || process.env.HOME || '.', 'copilot-agent', 'copilot-token.json');
+// Token persistence path — lives inside ~/.liku-cli/ alongside Electron userData
+const LIKU_HOME = path.join(os.homedir(), '.liku-cli');
+const TOKEN_FILE = path.join(LIKU_HOME, 'copilot-token.json');
 // OAuth state
 let oauthInProgress = false;
@@ -180,13 +281,29 @@ const SYSTEM_PROMPT = `You are Liku, an intelligent AGENTIC AI assistant integra
 ${getPlatformContext()}
+## LIVE UI AWARENESS (CRITICAL - READ THIS!)
+The user will provide a **Live UI State** section in their messages. This section lists visible UI elements detected on the screen.
+Format: \`- [Index] Type: "Name" at (x, y)\`
+⚠️ **HOW TO USE LIVE UI STATE:**
+1. **Identify Elements**: Use the numeric [Index] or Name to identify elements.
+2. **Clicking**: To click an element from the list, PREFER using its coordinates provided in the entry:
+   - Example Entry: \`- [42] Button: "Submit" at (500, 300)\`
+   - Action: \`{"type": "click", "x": 500, "y": 300, "reason": "Click Submit button [42]"}\`
+   - Alternatively: \`{"type": "click_element", "text": "Submit"}\` works if the name is unique.
+3. **Context**: Group elements by their Window header to understand which application they belong to.
+⚠️ **DO NOT REQUEST SCREENSHOTS** to find standard UI elements - check the Live UI State first.
+**TO LIST ELEMENTS**: Read the Live UI State section and list what's there (e.g., "I see a 'Save' button at index [15]").
 ## Your Core Capabilities
-1. **Screen Vision**: When the user captures their screen, you receive it as an image. ALWAYS analyze visible content immediately.
+1. **Screen Vision**: When the user captures their screen, you receive it as an image. Use this for spatial/visual tasks. For element-based tasks, the Live UI State is sufficient.
-2. **SEMANTIC ELEMENT ACTIONS (PREFERRED!)**: You can interact with UI elements by their text/name - MORE RELIABLE than coordinates:
+2. **SEMANTIC ELEMENT ACTIONS**: You can interact with UI elements by their text/name:
    - \`{"type": "click_element", "text": "Submit", "reason": "Click Submit button"}\` - Finds and clicks element by text
-   - \`{"type": "find_element", "text": "Save", "reason": "Locate Save button"}\` - Finds element info
 3. **Grid Coordinate System**: The screen has a dot grid overlay:
    - **Columns**: Letters A, B, C, D... (left to right), spacing 100px
@@ -229,6 +346,12 @@ When the user asks you to DO something, respond with a JSON action block:
 - \`{"type": "drag", "fromX": <n>, "fromY": <n>, "toX": <n>, "toY": <n>}\` - Drag
 - \`{"type": "wait", "ms": <number>}\` - Wait milliseconds (IMPORTANT: add waits between multi-step actions!)
 - \`{"type": "screenshot"}\` - Take screenshot to verify result
+- \`{"type": "focus_window", "windowHandle": <number>}\` - Bring a window to the foreground (use if target is in background)
+- \`{"type": "bring_window_to_front", "title": "<partial title>"}\` - Bring matching background app to foreground
+- \`{"type": "send_window_to_back", "title": "<partial title>"}\` - Push matching window behind others without activating
+- \`{"type": "minimize_window", "title": "<partial title>"}\` - Minimize a specific window
+- \`{"type": "restore_window", "title": "<partial title>"}\` - Restore a minimized window
+- \`{"type": "run_command", "command": "<shell command>", "cwd": "<optional path>", "shell": "powershell|cmd|bash"}\` - **PREFERRED FOR SHELL TASKS**: Execute shell command directly and return output (timeout: 30s)
 ### Grid to Pixel Conversion:
 - A0 → (50, 50), B0 → (150, 50), C0 → (250, 50)
@@ -251,15 +374,19 @@ When the user asks you to DO something, respond with a JSON action block:
 **Common Task Patterns**:
 ${PLATFORM === 'win32' ? `
-- **Open new terminal**: Use \`win+x\` then \`i\` (or \`win+r\` → type "wt" → \`enter\`)
+- **Run shell commands**: Use \`run_command\` action - e.g., \`{"type": "run_command", "command": "Get-Process | Select-Object -First 5"}\`
+- **List files**: \`{"type": "run_command", "command": "dir", "cwd": "C:\\\\Users"}\` or \`{"type": "run_command", "command": "Get-ChildItem"}\`
+- **Open terminal GUI**: Use \`win+x\` then \`i\` (or \`win+r\` → type "wt" → \`enter\`) - only if user wants visible terminal
 - **Open application**: Use \`win\` key, type app name, press \`enter\`
 - **Save file**: \`ctrl+s\`
 - **Copy/Paste**: \`ctrl+c\` / \`ctrl+v\`` : PLATFORM === 'darwin' ? `
-- **Open terminal**: \`cmd+space\`, type "Terminal", \`enter\`
+- **Run shell commands**: Use \`run_command\` action - e.g., \`{"type": "run_command", "command": "ls -la", "shell": "bash"}\`
+- **Open terminal GUI**: \`cmd+space\`, type "Terminal", \`enter\` - only if user wants visible terminal
 - **Open application**: \`cmd+space\`, type app name, \`enter\`
 - **Save file**: \`cmd+s\`
 - **Copy/Paste**: \`cmd+c\` / \`cmd+v\`` : `
-- **Open terminal**: \`ctrl+alt+t\`
+- **Run shell commands**: Use \`run_command\` action - e.g., \`{"type": "run_command", "command": "ls -la", "shell": "bash"}\`
+- **Open terminal GUI**: \`ctrl+alt+t\` - only if user wants visible terminal
 - **Open application**: \`super\` key, type name, \`enter\`
 - **Save file**: \`ctrl+s\`
 - **Copy/Paste**: \`ctrl+c\` / \`ctrl+v\``}
@@ -308,6 +435,16 @@ function setCopilotModel(model) {
   return false;
 }
+/**
+ * Resolve a requested Copilot model key to a valid configured key.
+ */
+function resolveCopilotModelKey(requestedModel) {
+  if (requestedModel && COPILOT_MODELS[requestedModel]) {
+    return requestedModel;
+  }
+  return currentCopilotModel;
+}
 /**
  * Get available Copilot models
  */
@@ -338,13 +475,14 @@ function getCurrentCopilotModel() {
 }
 /**
- * Add visual context (screenshot data)
+ * Add visual context (screenshot data) as a typed VisualFrame
+ * @param {Object} imageData - Raw image data with dataURL, width, height, etc.
  */
 function addVisualContext(imageData) {
-  visualContextBuffer.push({
-    ...imageData,
-    addedAt: Date.now()
-  });
+  const { createVisualFrame } = require('../shared/inspect-types');
+  const frame = createVisualFrame(imageData);
+  frame.addedAt = Date.now();
+  visualContextBuffer.push(frame);
   // Keep only recent visual context
   while (visualContextBuffer.length > MAX_VISUAL_CONTEXT) {
@@ -415,19 +553,24 @@ ${inspectContext.regions.slice(0, 20).map((r, i) =>
   let liveUIContextText = '';
   try {
     const watcher = getUIWatcher();
-    if (watcher && watcher.isRunning) {
+    if (watcher && watcher.isPolling) {
       const uiContext = watcher.getContextForAI();
       if (uiContext && uiContext.trim()) {
-        liveUIContextText = `\n\n${uiContext}`;
-        console.log('[AI] Including live UI context from watcher');
+        // Frame the context as trustworthy real-time data
+        liveUIContextText = `\n\n---\n🔴 **LIVE UI STATE** (auto-refreshed every 400ms - TRUST THIS DATA!)\n${uiContext}\n---`;
+        console.log('[AI] Including live UI context from watcher (', uiContext.split('\n').length, 'lines)');
       }
+    } else {
+      console.log('[AI] UI Watcher not available or not running (watcher:', !!watcher, ', polling:', watcher?.isPolling, ')');
     }
   } catch (e) {
     console.warn('[AI] Could not get live UI context:', e.message);
   }
+  const semanticDOMContextText = getSemanticDOMContextText();
-  const enhancedMessage = inspectContextText || liveUIContextText
-    ? `${userMessage}${inspectContextText}${liveUIContextText}`
+  const enhancedMessage = inspectContextText || liveUIContextText || semanticDOMContextText
+    ? `${userMessage}${inspectContextText}${liveUIContextText}${semanticDOMContextText}`
     : userMessage;
   if (latestVisual && (currentProvider === 'copilot' || currentProvider === 'openai')) {
@@ -484,10 +627,26 @@ ${inspectContext.regions.slice(0, 20).map((r, i) =>
 // ===== GITHUB COPILOT OAUTH =====
 /**
- * Load saved Copilot token from disk
+ * Load saved Copilot token from disk.
+ * On first run after the path migration, copies the token from the
+ * legacy location (%APPDATA%/copilot-agent/) to ~/.liku-cli/.
  */
 function loadCopilotToken() {
   try {
+    // Migrate from legacy path if new location is empty
+    if (!fs.existsSync(TOKEN_FILE)) {
+      const legacyPath = path.join(
+        process.env.APPDATA || process.env.HOME || '.',
+        'copilot-agent', 'copilot-token.json'
+      );
+      if (fs.existsSync(legacyPath)) {
+        const dir = path.dirname(TOKEN_FILE);
+        if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+        fs.copyFileSync(legacyPath, TOKEN_FILE);
+        console.log('[COPILOT] Migrated token from legacy path');
+      }
+    }
     if (fs.existsSync(TOKEN_FILE)) {
       const data = JSON.parse(fs.readFileSync(TOKEN_FILE, 'utf8'));
       if (data.access_token) {
@@ -731,7 +890,7 @@ function exchangeForCopilotSession() {
  * Call GitHub Copilot API
  * Uses session token (not OAuth token) - exchanges if needed
  */
-async function callCopilot(messages) {
+async function callCopilot(messages, modelOverride = null) {
   // Ensure we have OAuth token
   if (!apiKeys.copilot) {
     if (!loadCopilotToken()) {
@@ -750,10 +909,11 @@ async function callCopilot(messages) {
   return new Promise((resolve, reject) => {
     const hasVision = messages.some(m => Array.isArray(m.content));
-    const modelInfo = COPILOT_MODELS[currentCopilotModel] || COPILOT_MODELS['gpt-4o'];
+    const modelKey = resolveCopilotModelKey(modelOverride);
+    const modelInfo = COPILOT_MODELS[modelKey] || COPILOT_MODELS['gpt-4o'];
     const modelId = hasVision && !modelInfo.vision ? 'gpt-4o' : modelInfo.id;
-    console.log(`[Copilot] Vision request: ${hasVision}, Model: ${modelId}`);
+    console.log(`[Copilot] Vision request: ${hasVision}, Model: ${modelId} (key=${modelKey})`);
     const data = JSON.stringify({
       model: modelId,
@@ -1049,10 +1209,36 @@ function callOllama(messages) {
 }
 /**
- * Send a message and get AI response
+ * Detect if AI response was truncated mid-stream
+ * Uses heuristics to identify incomplete responses
+ */
+function detectTruncation(response) {
+  if (!response || response.length < 100) return false;
+  const truncationSignals = [
+    // Ends mid-JSON block
+    /```json\s*\{[^}]*$/s.test(response),
+    // Ends with unclosed code block
+    (response.match(/```/g) || []).length % 2 !== 0,
+    // Ends mid-sentence (lowercase letter or comma, no terminal punctuation)
+    /[a-z,]\s*$/i.test(response) && !/[.!?:]\s*$/i.test(response),
+    // Ends with numbered list item starting
+    /\d+\.\s*$/m.test(response),
+    // Ends with "- " suggesting incomplete list item
+    /-\s*$/m.test(response),
+    // Has unclosed parentheses/brackets
+    (response.match(/\(/g) || []).length > (response.match(/\)/g) || []).length,
+    (response.match(/\[/g) || []).length > (response.match(/\]/g) || []).length
+  ];
+  return truncationSignals.some(Boolean);
+}
+/**
+ * Send a message and get AI response with auto-continuation
  */
 async function sendMessage(userMessage, options = {}) {
-  const { includeVisualContext = false, coordinates = null } = options;
+  const { includeVisualContext = false, coordinates = null, maxContinuations = 2, model = null } = options;
   // Enhance message with coordinate context if provided
   let enhancedMessage = userMessage;
@@ -1065,6 +1251,7 @@ async function sendMessage(userMessage, options = {}) {
   try {
     let response;
+    let effectiveModel = currentCopilotModel;
     switch (currentProvider) {
       case 'copilot':
@@ -1075,7 +1262,14 @@ async function sendMessage(userMessage, options = {}) {
             throw new Error('Not authenticated with GitHub Copilot.\n\nTo authenticate:\n1. Type /login and authorize in browser\n2. Or set GH_TOKEN or GITHUB_TOKEN environment variable');
           }
         }
-        response = await callCopilot(messages);
+        effectiveModel = resolveCopilotModelKey(model);
+        // Enforce vision-capable model when visual context is included
+        if (includeVisualContext && COPILOT_MODELS[effectiveModel] && !COPILOT_MODELS[effectiveModel].vision) {
+          const visionFallback = AI_PROVIDERS.copilot.visionModel || 'gpt-4o';
+          console.log(`[AI] Model ${effectiveModel} lacks vision, upgrading to ${visionFallback} for visual context`);
+          effectiveModel = visionFallback;
+        }
+        response = await callCopilot(messages, effectiveModel);
         break;
       case 'openai':
@@ -1098,6 +1292,50 @@ async function sendMessage(userMessage, options = {}) {
         break;
     }
+    // Auto-continuation for truncated responses
+    let fullResponse = response;
+    let continuationCount = 0;
+    while (detectTruncation(fullResponse) && continuationCount < maxContinuations) {
+      continuationCount++;
+      console.log(`[AI] Response appears truncated, continuing (${continuationCount}/${maxContinuations})...`);
+      // Add partial response to history temporarily
+      conversationHistory.push({ role: 'assistant', content: fullResponse });
+      // Build continuation request
+      const continueMessages = buildMessages('Continue from where you left off. Do not repeat what you already said.', false);
+      try {
+        let continuation;
+        switch (currentProvider) {
+          case 'copilot':
+            continuation = await callCopilot(continueMessages, effectiveModel);
+            break;
+          case 'openai':
+            continuation = await callOpenAI(continueMessages);
+            break;
+          case 'anthropic':
+            continuation = await callAnthropic(continueMessages);
+            break;
+          case 'ollama':
+          default:
+            continuation = await callOllama(continueMessages);
+        }
+        // Append continuation
+        fullResponse += '\n' + continuation;
+        // Update history with combined response
+        conversationHistory.pop(); // Remove partial
+      } catch (contErr) {
+        console.warn('[AI] Continuation failed:', contErr.message);
+        break;
+      }
+    }
+    response = fullResponse;
     // Add to conversation history
     conversationHistory.push({ role: 'user', content: enhancedMessage });
     conversationHistory.push({ role: 'assistant', content: response });
@@ -1111,6 +1349,8 @@ async function sendMessage(userMessage, options = {}) {
       success: true,
       message: response,
       provider: currentProvider,
+      model: effectiveModel,
+      modelVersion: COPILOT_MODELS[effectiveModel]?.id || null,
       hasVisualContext: includeVisualContext && visualContextBuffer.length > 0
     };
@@ -1118,7 +1358,8 @@ async function sendMessage(userMessage, options = {}) {
     return {
       success: false,
       error: error.message,
-      provider: currentProvider
+      provider: currentProvider,
+      model: resolveCopilotModelKey(model)
     };
   }
 }
@@ -1375,6 +1616,44 @@ function analyzeActionSafety(action, targetInfo = {}) {
     case 'drag':
       result.riskLevel = ActionRiskLevel.MEDIUM;
       break;
+    case 'focus_window':
+    case 'bring_window_to_front':
+      result.riskLevel = ActionRiskLevel.LOW;
+      break;
+    case 'send_window_to_back':
+    case 'minimize_window':
+    case 'restore_window':
+      result.riskLevel = ActionRiskLevel.LOW;
+      break;
+    case 'run_command':
+      // Analyze command safety
+      const cmd = (action.command || '').toLowerCase();
+      const dangerousPatterns = [
+        /\b(rm|del|erase|rmdir|rd)\s+(-[rf]+|\/[sq]+|\*)/i,
+        /Remove-Item.*-Recurse.*-Force/i,
+        /\bformat\s+[a-z]:/i,  // Match "format C:" but not "Format-Table"
+        /\b(shutdown|restart|reboot)\b/i,
+        /\breg\s+(delete|add)\b/i,
+        /\bnet\s+(user|localgroup)\b/i,
+        /\b(sudo|runas)\b/i,
+        /Start-Process.*-Verb\s+RunAs/i,
+        /Set-ExecutionPolicy/i,
+        /Stop-Process.*-Force/i,
+      ];
+      const isDangerous = dangerousPatterns.some(p => p.test(action.command || ''));
+      if (isDangerous) {
+        result.riskLevel = ActionRiskLevel.CRITICAL;
+        result.warnings.push('Potentially destructive command');
+        result.requiresConfirmation = true;
+      } else if (cmd.includes('rm ') || cmd.includes('del ') || cmd.includes('remove')) {
+        result.riskLevel = ActionRiskLevel.HIGH;
+        result.warnings.push('Command may delete files');
+        result.requiresConfirmation = true;
+      } else {
+        result.riskLevel = ActionRiskLevel.MEDIUM;
+      }
+      break;
   }
   // Check target info for dangerous patterns
@@ -1449,6 +1728,16 @@ function describeAction(action, targetInfo = {}) {
       return `Scroll ${action.direction} ${action.amount || 3} times`;
     case 'drag':
       return `Drag from (${action.fromX}, ${action.fromY}) to (${action.toX}, ${action.toY})`;
+    case 'focus_window':
+      return `Focus window ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
+    case 'bring_window_to_front':
+      return `Bring window to front ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
+    case 'send_window_to_back':
+      return `Send window to back ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
+    case 'minimize_window':
+      return `Minimize window ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
+    case 'restore_window':
+      return `Restore window ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
     case 'wait':
       return `Wait ${action.ms}ms`;
     case 'screenshot':
@@ -1539,7 +1828,7 @@ async function executeActions(actionData, onAction = null, onScreenshot = null,
     return { success: false, error: 'No valid actions provided' };
   }
-  const { onRequireConfirmation, targetAnalysis = {}, actionExecutor } = options;
+  const { onRequireConfirmation, targetAnalysis = {}, actionExecutor, skipSafetyConfirmation = false } = options;
   console.log('[AI-SERVICE] Executing actions:', actionData.thought || 'No thought provided');
   console.log('[AI-SERVICE] Actions:', JSON.stringify(actionData.actions, null, 2));
@@ -1573,8 +1862,8 @@ async function executeActions(actionData, onAction = null, onScreenshot = null,
     const safety = analyzeActionSafety(action, targetInfo);
     console.log(`[AI-SERVICE] Action ${i} safety: ${safety.riskLevel}`, safety.warnings);
-    // If HIGH or CRITICAL risk, require confirmation
-    if (safety.requiresConfirmation) {
+    // If HIGH or CRITICAL risk, require confirmation (unless user already confirmed via Execute button)
+    if (safety.requiresConfirmation && !skipSafetyConfirmation) {
       console.log(`[AI-SERVICE] Action ${i} requires user confirmation`);
       // Store as pending action
@@ -1595,8 +1884,28 @@ async function executeActions(actionData, onAction = null, onScreenshot = null,
       pendingConfirmation = true;
       break; // Stop execution, wait for confirmation
     }
+    if (skipSafetyConfirmation && safety.requiresConfirmation) {
+      console.log(`[AI-SERVICE] Action ${i} safety bypassed (user pre-confirmed via Execute button)`);
+    }
     // Execute the action (SAFE/LOW/MEDIUM risk)
+    // AUTO-FOCUS: Check if this is an interaction that requires window focus (click/type)
+    // and if the target window is in the background.
+    if ((action.type === 'click' || action.type === 'double_click' || action.type === 'right_click') && action.x !== undefined) {
+      if (uiWatcher && uiWatcher.isPolling) {
+        const elementAtPoint = uiWatcher.getElementAtPoint(action.x, action.y);
+        if (elementAtPoint && elementAtPoint.windowHandle) {
+          // Found an element with a known window handle
+          // Focus it first to ensure click goes to the right window (not trapped by overlay or obscuring window)
+          // We can call systemAutomation.focusWindow directly
+          console.log(`[AI-SERVICE] Auto-focusing window handle ${elementAtPoint.windowHandle} for click at (${action.x}, ${action.y})`);
+          await systemAutomation.focusWindow(elementAtPoint.windowHandle);
+          await new Promise(r => setTimeout(r, 450)); // Wait for window animation/focus settling
+        }
+      }
+    }
     const result = await (actionExecutor ? actionExecutor(action) : systemAutomation.executeAction(action));
     result.reason = action.reason || '';
     result.safety = safety;
@@ -1724,5 +2033,10 @@ module.exports = {
   clearPendingAction,
   confirmPendingAction,
   rejectPendingAction,
-  resumeAfterConfirmation
+  resumeAfterConfirmation,
+  // UI awareness
+  setUIWatcher,
+  getUIWatcher,
+  setSemanticDOMSnapshot,
+  clearSemanticDOMSnapshot
 };