copilot-liku-cli 0.0.3 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QUICKSTART.md +24 -0
- package/README.md +85 -33
- package/package.json +23 -14
- package/scripts/postinstall.js +63 -0
- package/src/cli/commands/window.js +66 -0
- package/src/main/agents/base-agent.js +15 -7
- package/src/main/agents/builder.js +211 -0
- package/src/main/agents/index.js +7 -4
- package/src/main/agents/orchestrator.js +13 -0
- package/src/main/agents/producer.js +891 -0
- package/src/main/agents/researcher.js +78 -0
- package/src/main/agents/state-manager.js +134 -2
- package/src/main/agents/verifier.js +201 -0
- package/src/main/ai-service.js +349 -35
- package/src/main/index.js +702 -113
- package/src/main/inspect-service.js +24 -1
- package/src/main/python-bridge.js +395 -0
- package/src/main/system-automation.js +876 -131
- package/src/main/ui-automation/core/ui-provider.js +99 -0
- package/src/main/ui-automation/core/uia-host.js +214 -0
- package/src/main/ui-automation/index.js +30 -0
- package/src/main/ui-automation/interactions/element-click.js +6 -6
- package/src/main/ui-automation/interactions/high-level.js +28 -6
- package/src/main/ui-automation/interactions/index.js +21 -0
- package/src/main/ui-automation/interactions/pattern-actions.js +236 -0
- package/src/main/ui-automation/window/index.js +6 -0
- package/src/main/ui-automation/window/manager.js +173 -26
- package/src/main/ui-watcher.js +401 -58
- package/src/main/visual-awareness.js +18 -1
- package/src/native/windows-uia/Program.cs +89 -0
- package/src/native/windows-uia/build.ps1 +24 -0
- package/src/native/windows-uia-dotnet/Program.cs +920 -0
- package/src/native/windows-uia-dotnet/WindowsUIA.csproj +11 -0
- package/src/native/windows-uia-dotnet/build.ps1 +24 -0
- package/src/renderer/chat/chat.js +915 -671
- package/src/renderer/chat/index.html +2 -4
- package/src/renderer/chat/preload.js +8 -1
- package/src/renderer/overlay/overlay.js +157 -8
- package/src/renderer/overlay/preload.js +4 -0
- package/src/shared/inspect-types.js +82 -6
- package/ARCHITECTURE.md +0 -411
- package/CONFIGURATION.md +0 -302
- package/CONTRIBUTING.md +0 -225
- package/ELECTRON_README.md +0 -121
- package/PROJECT_STATUS.md +0 -229
- package/TESTING.md +0 -274
package/src/main/ai-service.js
CHANGED
|
@@ -29,16 +29,116 @@ function getInspectService() {
|
|
|
29
29
|
return inspectService;
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
-
//
|
|
32
|
+
// Shared UI watcher for live UI context (set by index.js after starting)
|
|
33
33
|
let uiWatcher = null;
|
|
34
|
+
let semanticDomSnapshot = null;
|
|
35
|
+
let semanticDomUpdatedAt = 0;
|
|
36
|
+
const SEMANTIC_DOM_MAX_DEPTH = 4;
|
|
37
|
+
const SEMANTIC_DOM_MAX_NODES = 120;
|
|
38
|
+
const SEMANTIC_DOM_MAX_CHARS = 3500;
|
|
39
|
+
const SEMANTIC_DOM_MAX_AGE_MS = 5000;
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Set the shared UI watcher instance (called from index.js)
|
|
43
|
+
*/
|
|
44
|
+
function setUIWatcher(watcher) {
|
|
45
|
+
uiWatcher = watcher;
|
|
46
|
+
console.log('[AI-SERVICE] UI Watcher connected');
|
|
47
|
+
}
|
|
48
|
+
|
|
34
49
|
function getUIWatcher() {
|
|
35
|
-
if (!uiWatcher) {
|
|
36
|
-
const { UIWatcher } = require('./ui-watcher');
|
|
37
|
-
uiWatcher = new UIWatcher();
|
|
38
|
-
}
|
|
39
50
|
return uiWatcher;
|
|
40
51
|
}
|
|
41
52
|
|
|
53
|
+
function setSemanticDOMSnapshot(tree) {
|
|
54
|
+
semanticDomSnapshot = tree || null;
|
|
55
|
+
semanticDomUpdatedAt = Date.now();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function clearSemanticDOMSnapshot() {
|
|
59
|
+
semanticDomSnapshot = null;
|
|
60
|
+
semanticDomUpdatedAt = 0;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function pruneSemanticTree(root) {
|
|
64
|
+
const results = [];
|
|
65
|
+
|
|
66
|
+
function walk(node, depth = 0) {
|
|
67
|
+
if (!node || depth > SEMANTIC_DOM_MAX_DEPTH || results.length >= SEMANTIC_DOM_MAX_NODES) {
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const bounds = node.bounds || {};
|
|
72
|
+
const isInteractive = !!node.isClickable || !!node.isFocusable;
|
|
73
|
+
const hasName = typeof node.name === 'string' && node.name.trim().length > 0;
|
|
74
|
+
const hasValidBounds = [bounds.x, bounds.y, bounds.width, bounds.height].every(Number.isFinite)
|
|
75
|
+
&& bounds.width > 0
|
|
76
|
+
&& bounds.height > 0;
|
|
77
|
+
|
|
78
|
+
if ((isInteractive || hasName) && hasValidBounds) {
|
|
79
|
+
results.push({
|
|
80
|
+
id: node.id || '',
|
|
81
|
+
name: hasName ? node.name.trim().slice(0, 64) : '',
|
|
82
|
+
role: node.role || 'Unknown',
|
|
83
|
+
bounds: {
|
|
84
|
+
x: Math.round(bounds.x),
|
|
85
|
+
y: Math.round(bounds.y),
|
|
86
|
+
width: Math.round(bounds.width),
|
|
87
|
+
height: Math.round(bounds.height)
|
|
88
|
+
},
|
|
89
|
+
isClickable: !!node.isClickable,
|
|
90
|
+
isFocusable: !!node.isFocusable
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (Array.isArray(node.children)) {
|
|
95
|
+
for (const child of node.children) {
|
|
96
|
+
if (results.length >= SEMANTIC_DOM_MAX_NODES) break;
|
|
97
|
+
walk(child, depth + 1);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
walk(root, 0);
|
|
103
|
+
return results;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function getSemanticDOMContextText() {
|
|
107
|
+
if (!semanticDomSnapshot || !semanticDomUpdatedAt) {
|
|
108
|
+
return '';
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if ((Date.now() - semanticDomUpdatedAt) > SEMANTIC_DOM_MAX_AGE_MS) {
|
|
112
|
+
return '';
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const nodes = pruneSemanticTree(semanticDomSnapshot);
|
|
116
|
+
if (!nodes.length) {
|
|
117
|
+
return '';
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const lines = [];
|
|
121
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
122
|
+
const node = nodes[i];
|
|
123
|
+
const namePart = node.name ? ` \"${node.name}\"` : '';
|
|
124
|
+
const idPart = node.id ? ` id=${node.id}` : '';
|
|
125
|
+
const flags = [node.isClickable ? 'clickable' : null, node.isFocusable ? 'focusable' : null]
|
|
126
|
+
.filter(Boolean)
|
|
127
|
+
.join(',');
|
|
128
|
+
const flagPart = flags ? ` [${flags}]` : '';
|
|
129
|
+
lines.push(
|
|
130
|
+
`- [${i + 1}] ${node.role}${namePart}${idPart} at (${node.bounds.x}, ${node.bounds.y}, ${node.bounds.width}, ${node.bounds.height})${flagPart}`
|
|
131
|
+
);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
let text = `\n\n## Semantic DOM (grounded accessibility tree)\n${lines.join('\n')}`;
|
|
135
|
+
if (text.length > SEMANTIC_DOM_MAX_CHARS) {
|
|
136
|
+
text = `${text.slice(0, SEMANTIC_DOM_MAX_CHARS)}\n... (truncated)`;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return text;
|
|
140
|
+
}
|
|
141
|
+
|
|
42
142
|
// ===== CONFIGURATION =====
|
|
43
143
|
|
|
44
144
|
// Available models for GitHub Copilot (based on Copilot CLI changelog)
|
|
@@ -107,8 +207,9 @@ let currentModelMetadata = {
|
|
|
107
207
|
lastUpdated: new Date().toISOString()
|
|
108
208
|
};
|
|
109
209
|
|
|
110
|
-
// Token persistence path
|
|
111
|
-
const
|
|
210
|
+
// Token persistence path — lives inside ~/.liku-cli/ alongside Electron userData
|
|
211
|
+
const LIKU_HOME = path.join(os.homedir(), '.liku-cli');
|
|
212
|
+
const TOKEN_FILE = path.join(LIKU_HOME, 'copilot-token.json');
|
|
112
213
|
|
|
113
214
|
// OAuth state
|
|
114
215
|
let oauthInProgress = false;
|
|
@@ -180,13 +281,29 @@ const SYSTEM_PROMPT = `You are Liku, an intelligent AGENTIC AI assistant integra
|
|
|
180
281
|
|
|
181
282
|
${getPlatformContext()}
|
|
182
283
|
|
|
284
|
+
## LIVE UI AWARENESS (CRITICAL - READ THIS!)
|
|
285
|
+
|
|
286
|
+
The user will provide a **Live UI State** section in their messages. This section lists visible UI elements detected on the screen.
|
|
287
|
+
Format: \`- [Index] Type: "Name" at (x, y)\`
|
|
288
|
+
|
|
289
|
+
⚠️ **HOW TO USE LIVE UI STATE:**
|
|
290
|
+
1. **Identify Elements**: Use the numeric [Index] or Name to identify elements.
|
|
291
|
+
2. **Clicking**: To click an element from the list, PREFER using its coordinates provided in the entry:
|
|
292
|
+
- Example Entry: \`- [42] Button: "Submit" at (500, 300)\`
|
|
293
|
+
- Action: \`{"type": "click", "x": 500, "y": 300, "reason": "Click Submit button [42]"}\`
|
|
294
|
+
- Alternatively: \`{"type": "click_element", "text": "Submit"}\` works if the name is unique.
|
|
295
|
+
3. **Context**: Group elements by their Window header to understand which application they belong to.
|
|
296
|
+
|
|
297
|
+
⚠️ **DO NOT REQUEST SCREENSHOTS** to find standard UI elements - check the Live UI State first.
|
|
298
|
+
|
|
299
|
+
**TO LIST ELEMENTS**: Read the Live UI State section and list what's there (e.g., "I see a 'Save' button at index [15]").
|
|
300
|
+
|
|
183
301
|
## Your Core Capabilities
|
|
184
302
|
|
|
185
|
-
1. **Screen Vision**: When the user captures their screen, you receive it as an image.
|
|
303
|
+
1. **Screen Vision**: When the user captures their screen, you receive it as an image. Use this for spatial/visual tasks. For element-based tasks, the Live UI State is sufficient.
|
|
186
304
|
|
|
187
|
-
2. **SEMANTIC ELEMENT ACTIONS
|
|
305
|
+
2. **SEMANTIC ELEMENT ACTIONS**: You can interact with UI elements by their text/name:
|
|
188
306
|
- \`{"type": "click_element", "text": "Submit", "reason": "Click Submit button"}\` - Finds and clicks element by text
|
|
189
|
-
- \`{"type": "find_element", "text": "Save", "reason": "Locate Save button"}\` - Finds element info
|
|
190
307
|
|
|
191
308
|
3. **Grid Coordinate System**: The screen has a dot grid overlay:
|
|
192
309
|
- **Columns**: Letters A, B, C, D... (left to right), spacing 100px
|
|
@@ -229,6 +346,12 @@ When the user asks you to DO something, respond with a JSON action block:
|
|
|
229
346
|
- \`{"type": "drag", "fromX": <n>, "fromY": <n>, "toX": <n>, "toY": <n>}\` - Drag
|
|
230
347
|
- \`{"type": "wait", "ms": <number>}\` - Wait milliseconds (IMPORTANT: add waits between multi-step actions!)
|
|
231
348
|
- \`{"type": "screenshot"}\` - Take screenshot to verify result
|
|
349
|
+
- \`{"type": "focus_window", "windowHandle": <number>}\` - Bring a window to the foreground (use if target is in background)
|
|
350
|
+
- \`{"type": "bring_window_to_front", "title": "<partial title>"}\` - Bring matching background app to foreground
|
|
351
|
+
- \`{"type": "send_window_to_back", "title": "<partial title>"}\` - Push matching window behind others without activating
|
|
352
|
+
- \`{"type": "minimize_window", "title": "<partial title>"}\` - Minimize a specific window
|
|
353
|
+
- \`{"type": "restore_window", "title": "<partial title>"}\` - Restore a minimized window
|
|
354
|
+
- \`{"type": "run_command", "command": "<shell command>", "cwd": "<optional path>", "shell": "powershell|cmd|bash"}\` - **PREFERRED FOR SHELL TASKS**: Execute shell command directly and return output (timeout: 30s)
|
|
232
355
|
|
|
233
356
|
### Grid to Pixel Conversion:
|
|
234
357
|
- A0 → (50, 50), B0 → (150, 50), C0 → (250, 50)
|
|
@@ -251,15 +374,19 @@ When the user asks you to DO something, respond with a JSON action block:
|
|
|
251
374
|
|
|
252
375
|
**Common Task Patterns**:
|
|
253
376
|
${PLATFORM === 'win32' ? `
|
|
254
|
-
- **
|
|
377
|
+
- **Run shell commands**: Use \`run_command\` action - e.g., \`{"type": "run_command", "command": "Get-Process | Select-Object -First 5"}\`
|
|
378
|
+
- **List files**: \`{"type": "run_command", "command": "dir", "cwd": "C:\\\\Users"}\` or \`{"type": "run_command", "command": "Get-ChildItem"}\`
|
|
379
|
+
- **Open terminal GUI**: Use \`win+x\` then \`i\` (or \`win+r\` → type "wt" → \`enter\`) - only if user wants visible terminal
|
|
255
380
|
- **Open application**: Use \`win\` key, type app name, press \`enter\`
|
|
256
381
|
- **Save file**: \`ctrl+s\`
|
|
257
382
|
- **Copy/Paste**: \`ctrl+c\` / \`ctrl+v\`` : PLATFORM === 'darwin' ? `
|
|
258
|
-
- **
|
|
383
|
+
- **Run shell commands**: Use \`run_command\` action - e.g., \`{"type": "run_command", "command": "ls -la", "shell": "bash"}\`
|
|
384
|
+
- **Open terminal GUI**: \`cmd+space\`, type "Terminal", \`enter\` - only if user wants visible terminal
|
|
259
385
|
- **Open application**: \`cmd+space\`, type app name, \`enter\`
|
|
260
386
|
- **Save file**: \`cmd+s\`
|
|
261
387
|
- **Copy/Paste**: \`cmd+c\` / \`cmd+v\`` : `
|
|
262
|
-
- **
|
|
388
|
+
- **Run shell commands**: Use \`run_command\` action - e.g., \`{"type": "run_command", "command": "ls -la", "shell": "bash"}\`
|
|
389
|
+
- **Open terminal GUI**: \`ctrl+alt+t\` - only if user wants visible terminal
|
|
263
390
|
- **Open application**: \`super\` key, type name, \`enter\`
|
|
264
391
|
- **Save file**: \`ctrl+s\`
|
|
265
392
|
- **Copy/Paste**: \`ctrl+c\` / \`ctrl+v\``}
|
|
@@ -308,6 +435,16 @@ function setCopilotModel(model) {
|
|
|
308
435
|
return false;
|
|
309
436
|
}
|
|
310
437
|
|
|
438
|
+
/**
|
|
439
|
+
* Resolve a requested Copilot model key to a valid configured key.
|
|
440
|
+
*/
|
|
441
|
+
function resolveCopilotModelKey(requestedModel) {
|
|
442
|
+
if (requestedModel && COPILOT_MODELS[requestedModel]) {
|
|
443
|
+
return requestedModel;
|
|
444
|
+
}
|
|
445
|
+
return currentCopilotModel;
|
|
446
|
+
}
|
|
447
|
+
|
|
311
448
|
/**
|
|
312
449
|
* Get available Copilot models
|
|
313
450
|
*/
|
|
@@ -338,13 +475,14 @@ function getCurrentCopilotModel() {
|
|
|
338
475
|
}
|
|
339
476
|
|
|
340
477
|
/**
|
|
341
|
-
* Add visual context (screenshot data)
|
|
478
|
+
* Add visual context (screenshot data) as a typed VisualFrame
|
|
479
|
+
* @param {Object} imageData - Raw image data with dataURL, width, height, etc.
|
|
342
480
|
*/
|
|
343
481
|
function addVisualContext(imageData) {
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
482
|
+
const { createVisualFrame } = require('../shared/inspect-types');
|
|
483
|
+
const frame = createVisualFrame(imageData);
|
|
484
|
+
frame.addedAt = Date.now();
|
|
485
|
+
visualContextBuffer.push(frame);
|
|
348
486
|
|
|
349
487
|
// Keep only recent visual context
|
|
350
488
|
while (visualContextBuffer.length > MAX_VISUAL_CONTEXT) {
|
|
@@ -415,19 +553,24 @@ ${inspectContext.regions.slice(0, 20).map((r, i) =>
|
|
|
415
553
|
let liveUIContextText = '';
|
|
416
554
|
try {
|
|
417
555
|
const watcher = getUIWatcher();
|
|
418
|
-
if (watcher && watcher.
|
|
556
|
+
if (watcher && watcher.isPolling) {
|
|
419
557
|
const uiContext = watcher.getContextForAI();
|
|
420
558
|
if (uiContext && uiContext.trim()) {
|
|
421
|
-
|
|
422
|
-
|
|
559
|
+
// Frame the context as trustworthy real-time data
|
|
560
|
+
liveUIContextText = `\n\n---\n🔴 **LIVE UI STATE** (auto-refreshed every 400ms - TRUST THIS DATA!)\n${uiContext}\n---`;
|
|
561
|
+
console.log('[AI] Including live UI context from watcher (', uiContext.split('\n').length, 'lines)');
|
|
423
562
|
}
|
|
563
|
+
} else {
|
|
564
|
+
console.log('[AI] UI Watcher not available or not running (watcher:', !!watcher, ', polling:', watcher?.isPolling, ')');
|
|
424
565
|
}
|
|
425
566
|
} catch (e) {
|
|
426
567
|
console.warn('[AI] Could not get live UI context:', e.message);
|
|
427
568
|
}
|
|
569
|
+
|
|
570
|
+
const semanticDOMContextText = getSemanticDOMContextText();
|
|
428
571
|
|
|
429
|
-
const enhancedMessage = inspectContextText || liveUIContextText
|
|
430
|
-
? `${userMessage}${inspectContextText}${liveUIContextText}`
|
|
572
|
+
const enhancedMessage = inspectContextText || liveUIContextText || semanticDOMContextText
|
|
573
|
+
? `${userMessage}${inspectContextText}${liveUIContextText}${semanticDOMContextText}`
|
|
431
574
|
: userMessage;
|
|
432
575
|
|
|
433
576
|
if (latestVisual && (currentProvider === 'copilot' || currentProvider === 'openai')) {
|
|
@@ -484,10 +627,26 @@ ${inspectContext.regions.slice(0, 20).map((r, i) =>
|
|
|
484
627
|
// ===== GITHUB COPILOT OAUTH =====
|
|
485
628
|
|
|
486
629
|
/**
|
|
487
|
-
* Load saved Copilot token from disk
|
|
630
|
+
* Load saved Copilot token from disk.
|
|
631
|
+
* On first run after the path migration, copies the token from the
|
|
632
|
+
* legacy location (%APPDATA%/copilot-agent/) to ~/.liku-cli/.
|
|
488
633
|
*/
|
|
489
634
|
function loadCopilotToken() {
|
|
490
635
|
try {
|
|
636
|
+
// Migrate from legacy path if new location is empty
|
|
637
|
+
if (!fs.existsSync(TOKEN_FILE)) {
|
|
638
|
+
const legacyPath = path.join(
|
|
639
|
+
process.env.APPDATA || process.env.HOME || '.',
|
|
640
|
+
'copilot-agent', 'copilot-token.json'
|
|
641
|
+
);
|
|
642
|
+
if (fs.existsSync(legacyPath)) {
|
|
643
|
+
const dir = path.dirname(TOKEN_FILE);
|
|
644
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
645
|
+
fs.copyFileSync(legacyPath, TOKEN_FILE);
|
|
646
|
+
console.log('[COPILOT] Migrated token from legacy path');
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
491
650
|
if (fs.existsSync(TOKEN_FILE)) {
|
|
492
651
|
const data = JSON.parse(fs.readFileSync(TOKEN_FILE, 'utf8'));
|
|
493
652
|
if (data.access_token) {
|
|
@@ -731,7 +890,7 @@ function exchangeForCopilotSession() {
|
|
|
731
890
|
* Call GitHub Copilot API
|
|
732
891
|
* Uses session token (not OAuth token) - exchanges if needed
|
|
733
892
|
*/
|
|
734
|
-
async function callCopilot(messages) {
|
|
893
|
+
async function callCopilot(messages, modelOverride = null) {
|
|
735
894
|
// Ensure we have OAuth token
|
|
736
895
|
if (!apiKeys.copilot) {
|
|
737
896
|
if (!loadCopilotToken()) {
|
|
@@ -750,10 +909,11 @@ async function callCopilot(messages) {
|
|
|
750
909
|
|
|
751
910
|
return new Promise((resolve, reject) => {
|
|
752
911
|
const hasVision = messages.some(m => Array.isArray(m.content));
|
|
753
|
-
const
|
|
912
|
+
const modelKey = resolveCopilotModelKey(modelOverride);
|
|
913
|
+
const modelInfo = COPILOT_MODELS[modelKey] || COPILOT_MODELS['gpt-4o'];
|
|
754
914
|
const modelId = hasVision && !modelInfo.vision ? 'gpt-4o' : modelInfo.id;
|
|
755
915
|
|
|
756
|
-
console.log(`[Copilot] Vision request: ${hasVision}, Model: ${modelId}`);
|
|
916
|
+
console.log(`[Copilot] Vision request: ${hasVision}, Model: ${modelId} (key=${modelKey})`);
|
|
757
917
|
|
|
758
918
|
const data = JSON.stringify({
|
|
759
919
|
model: modelId,
|
|
@@ -1049,10 +1209,36 @@ function callOllama(messages) {
|
|
|
1049
1209
|
}
|
|
1050
1210
|
|
|
1051
1211
|
/**
|
|
1052
|
-
*
|
|
1212
|
+
* Detect if AI response was truncated mid-stream
|
|
1213
|
+
* Uses heuristics to identify incomplete responses
|
|
1214
|
+
*/
|
|
1215
|
+
function detectTruncation(response) {
|
|
1216
|
+
if (!response || response.length < 100) return false;
|
|
1217
|
+
|
|
1218
|
+
const truncationSignals = [
|
|
1219
|
+
// Ends mid-JSON block
|
|
1220
|
+
/```json\s*\{[^}]*$/s.test(response),
|
|
1221
|
+
// Ends with unclosed code block
|
|
1222
|
+
(response.match(/```/g) || []).length % 2 !== 0,
|
|
1223
|
+
// Ends mid-sentence (lowercase letter or comma, no terminal punctuation)
|
|
1224
|
+
/[a-z,]\s*$/i.test(response) && !/[.!?:]\s*$/i.test(response),
|
|
1225
|
+
// Ends with numbered list item starting
|
|
1226
|
+
/\d+\.\s*$/m.test(response),
|
|
1227
|
+
// Ends with "- " suggesting incomplete list item
|
|
1228
|
+
/-\s*$/m.test(response),
|
|
1229
|
+
// Has unclosed parentheses/brackets
|
|
1230
|
+
(response.match(/\(/g) || []).length > (response.match(/\)/g) || []).length,
|
|
1231
|
+
(response.match(/\[/g) || []).length > (response.match(/\]/g) || []).length
|
|
1232
|
+
];
|
|
1233
|
+
|
|
1234
|
+
return truncationSignals.some(Boolean);
|
|
1235
|
+
}
|
|
1236
|
+
|
|
1237
|
+
/**
|
|
1238
|
+
* Send a message and get AI response with auto-continuation
|
|
1053
1239
|
*/
|
|
1054
1240
|
async function sendMessage(userMessage, options = {}) {
|
|
1055
|
-
const { includeVisualContext = false, coordinates = null } = options;
|
|
1241
|
+
const { includeVisualContext = false, coordinates = null, maxContinuations = 2, model = null } = options;
|
|
1056
1242
|
|
|
1057
1243
|
// Enhance message with coordinate context if provided
|
|
1058
1244
|
let enhancedMessage = userMessage;
|
|
@@ -1065,6 +1251,7 @@ async function sendMessage(userMessage, options = {}) {
|
|
|
1065
1251
|
|
|
1066
1252
|
try {
|
|
1067
1253
|
let response;
|
|
1254
|
+
let effectiveModel = currentCopilotModel;
|
|
1068
1255
|
|
|
1069
1256
|
switch (currentProvider) {
|
|
1070
1257
|
case 'copilot':
|
|
@@ -1075,7 +1262,14 @@ async function sendMessage(userMessage, options = {}) {
|
|
|
1075
1262
|
throw new Error('Not authenticated with GitHub Copilot.\n\nTo authenticate:\n1. Type /login and authorize in browser\n2. Or set GH_TOKEN or GITHUB_TOKEN environment variable');
|
|
1076
1263
|
}
|
|
1077
1264
|
}
|
|
1078
|
-
|
|
1265
|
+
effectiveModel = resolveCopilotModelKey(model);
|
|
1266
|
+
// Enforce vision-capable model when visual context is included
|
|
1267
|
+
if (includeVisualContext && COPILOT_MODELS[effectiveModel] && !COPILOT_MODELS[effectiveModel].vision) {
|
|
1268
|
+
const visionFallback = AI_PROVIDERS.copilot.visionModel || 'gpt-4o';
|
|
1269
|
+
console.log(`[AI] Model ${effectiveModel} lacks vision, upgrading to ${visionFallback} for visual context`);
|
|
1270
|
+
effectiveModel = visionFallback;
|
|
1271
|
+
}
|
|
1272
|
+
response = await callCopilot(messages, effectiveModel);
|
|
1079
1273
|
break;
|
|
1080
1274
|
|
|
1081
1275
|
case 'openai':
|
|
@@ -1098,6 +1292,50 @@ async function sendMessage(userMessage, options = {}) {
|
|
|
1098
1292
|
break;
|
|
1099
1293
|
}
|
|
1100
1294
|
|
|
1295
|
+
// Auto-continuation for truncated responses
|
|
1296
|
+
let fullResponse = response;
|
|
1297
|
+
let continuationCount = 0;
|
|
1298
|
+
|
|
1299
|
+
while (detectTruncation(fullResponse) && continuationCount < maxContinuations) {
|
|
1300
|
+
continuationCount++;
|
|
1301
|
+
console.log(`[AI] Response appears truncated, continuing (${continuationCount}/${maxContinuations})...`);
|
|
1302
|
+
|
|
1303
|
+
// Add partial response to history temporarily
|
|
1304
|
+
conversationHistory.push({ role: 'assistant', content: fullResponse });
|
|
1305
|
+
|
|
1306
|
+
// Build continuation request
|
|
1307
|
+
const continueMessages = buildMessages('Continue from where you left off. Do not repeat what you already said.', false);
|
|
1308
|
+
|
|
1309
|
+
try {
|
|
1310
|
+
let continuation;
|
|
1311
|
+
switch (currentProvider) {
|
|
1312
|
+
case 'copilot':
|
|
1313
|
+
continuation = await callCopilot(continueMessages, effectiveModel);
|
|
1314
|
+
break;
|
|
1315
|
+
case 'openai':
|
|
1316
|
+
continuation = await callOpenAI(continueMessages);
|
|
1317
|
+
break;
|
|
1318
|
+
case 'anthropic':
|
|
1319
|
+
continuation = await callAnthropic(continueMessages);
|
|
1320
|
+
break;
|
|
1321
|
+
case 'ollama':
|
|
1322
|
+
default:
|
|
1323
|
+
continuation = await callOllama(continueMessages);
|
|
1324
|
+
}
|
|
1325
|
+
|
|
1326
|
+
// Append continuation
|
|
1327
|
+
fullResponse += '\n' + continuation;
|
|
1328
|
+
|
|
1329
|
+
// Update history with combined response
|
|
1330
|
+
conversationHistory.pop(); // Remove partial
|
|
1331
|
+
} catch (contErr) {
|
|
1332
|
+
console.warn('[AI] Continuation failed:', contErr.message);
|
|
1333
|
+
break;
|
|
1334
|
+
}
|
|
1335
|
+
}
|
|
1336
|
+
|
|
1337
|
+
response = fullResponse;
|
|
1338
|
+
|
|
1101
1339
|
// Add to conversation history
|
|
1102
1340
|
conversationHistory.push({ role: 'user', content: enhancedMessage });
|
|
1103
1341
|
conversationHistory.push({ role: 'assistant', content: response });
|
|
@@ -1111,6 +1349,8 @@ async function sendMessage(userMessage, options = {}) {
|
|
|
1111
1349
|
success: true,
|
|
1112
1350
|
message: response,
|
|
1113
1351
|
provider: currentProvider,
|
|
1352
|
+
model: effectiveModel,
|
|
1353
|
+
modelVersion: COPILOT_MODELS[effectiveModel]?.id || null,
|
|
1114
1354
|
hasVisualContext: includeVisualContext && visualContextBuffer.length > 0
|
|
1115
1355
|
};
|
|
1116
1356
|
|
|
@@ -1118,7 +1358,8 @@ async function sendMessage(userMessage, options = {}) {
|
|
|
1118
1358
|
return {
|
|
1119
1359
|
success: false,
|
|
1120
1360
|
error: error.message,
|
|
1121
|
-
provider: currentProvider
|
|
1361
|
+
provider: currentProvider,
|
|
1362
|
+
model: resolveCopilotModelKey(model)
|
|
1122
1363
|
};
|
|
1123
1364
|
}
|
|
1124
1365
|
}
|
|
@@ -1375,6 +1616,44 @@ function analyzeActionSafety(action, targetInfo = {}) {
|
|
|
1375
1616
|
case 'drag':
|
|
1376
1617
|
result.riskLevel = ActionRiskLevel.MEDIUM;
|
|
1377
1618
|
break;
|
|
1619
|
+
case 'focus_window':
|
|
1620
|
+
case 'bring_window_to_front':
|
|
1621
|
+
result.riskLevel = ActionRiskLevel.LOW;
|
|
1622
|
+
break;
|
|
1623
|
+
case 'send_window_to_back':
|
|
1624
|
+
case 'minimize_window':
|
|
1625
|
+
case 'restore_window':
|
|
1626
|
+
result.riskLevel = ActionRiskLevel.LOW;
|
|
1627
|
+
break;
|
|
1628
|
+
case 'run_command':
|
|
1629
|
+
// Analyze command safety
|
|
1630
|
+
const cmd = (action.command || '').toLowerCase();
|
|
1631
|
+
const dangerousPatterns = [
|
|
1632
|
+
/\b(rm|del|erase|rmdir|rd)\s+(-[rf]+|\/[sq]+|\*)/i,
|
|
1633
|
+
/Remove-Item.*-Recurse.*-Force/i,
|
|
1634
|
+
/\bformat\s+[a-z]:/i, // Match "format C:" but not "Format-Table"
|
|
1635
|
+
/\b(shutdown|restart|reboot)\b/i,
|
|
1636
|
+
/\breg\s+(delete|add)\b/i,
|
|
1637
|
+
/\bnet\s+(user|localgroup)\b/i,
|
|
1638
|
+
/\b(sudo|runas)\b/i,
|
|
1639
|
+
/Start-Process.*-Verb\s+RunAs/i,
|
|
1640
|
+
/Set-ExecutionPolicy/i,
|
|
1641
|
+
/Stop-Process.*-Force/i,
|
|
1642
|
+
];
|
|
1643
|
+
|
|
1644
|
+
const isDangerous = dangerousPatterns.some(p => p.test(action.command || ''));
|
|
1645
|
+
if (isDangerous) {
|
|
1646
|
+
result.riskLevel = ActionRiskLevel.CRITICAL;
|
|
1647
|
+
result.warnings.push('Potentially destructive command');
|
|
1648
|
+
result.requiresConfirmation = true;
|
|
1649
|
+
} else if (cmd.includes('rm ') || cmd.includes('del ') || cmd.includes('remove')) {
|
|
1650
|
+
result.riskLevel = ActionRiskLevel.HIGH;
|
|
1651
|
+
result.warnings.push('Command may delete files');
|
|
1652
|
+
result.requiresConfirmation = true;
|
|
1653
|
+
} else {
|
|
1654
|
+
result.riskLevel = ActionRiskLevel.MEDIUM;
|
|
1655
|
+
}
|
|
1656
|
+
break;
|
|
1378
1657
|
}
|
|
1379
1658
|
|
|
1380
1659
|
// Check target info for dangerous patterns
|
|
@@ -1449,6 +1728,16 @@ function describeAction(action, targetInfo = {}) {
|
|
|
1449
1728
|
return `Scroll ${action.direction} ${action.amount || 3} times`;
|
|
1450
1729
|
case 'drag':
|
|
1451
1730
|
return `Drag from (${action.fromX}, ${action.fromY}) to (${action.toX}, ${action.toY})`;
|
|
1731
|
+
case 'focus_window':
|
|
1732
|
+
return `Focus window ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
|
|
1733
|
+
case 'bring_window_to_front':
|
|
1734
|
+
return `Bring window to front ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
|
|
1735
|
+
case 'send_window_to_back':
|
|
1736
|
+
return `Send window to back ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
|
|
1737
|
+
case 'minimize_window':
|
|
1738
|
+
return `Minimize window ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
|
|
1739
|
+
case 'restore_window':
|
|
1740
|
+
return `Restore window ${action.windowHandle || action.hwnd || action.title || action.processName || ''}`.trim();
|
|
1452
1741
|
case 'wait':
|
|
1453
1742
|
return `Wait ${action.ms}ms`;
|
|
1454
1743
|
case 'screenshot':
|
|
@@ -1539,7 +1828,7 @@ async function executeActions(actionData, onAction = null, onScreenshot = null,
|
|
|
1539
1828
|
return { success: false, error: 'No valid actions provided' };
|
|
1540
1829
|
}
|
|
1541
1830
|
|
|
1542
|
-
const { onRequireConfirmation, targetAnalysis = {}, actionExecutor } = options;
|
|
1831
|
+
const { onRequireConfirmation, targetAnalysis = {}, actionExecutor, skipSafetyConfirmation = false } = options;
|
|
1543
1832
|
|
|
1544
1833
|
console.log('[AI-SERVICE] Executing actions:', actionData.thought || 'No thought provided');
|
|
1545
1834
|
console.log('[AI-SERVICE] Actions:', JSON.stringify(actionData.actions, null, 2));
|
|
@@ -1573,8 +1862,8 @@ async function executeActions(actionData, onAction = null, onScreenshot = null,
|
|
|
1573
1862
|
const safety = analyzeActionSafety(action, targetInfo);
|
|
1574
1863
|
console.log(`[AI-SERVICE] Action ${i} safety: ${safety.riskLevel}`, safety.warnings);
|
|
1575
1864
|
|
|
1576
|
-
// If HIGH or CRITICAL risk, require confirmation
|
|
1577
|
-
if (safety.requiresConfirmation) {
|
|
1865
|
+
// If HIGH or CRITICAL risk, require confirmation (unless user already confirmed via Execute button)
|
|
1866
|
+
if (safety.requiresConfirmation && !skipSafetyConfirmation) {
|
|
1578
1867
|
console.log(`[AI-SERVICE] Action ${i} requires user confirmation`);
|
|
1579
1868
|
|
|
1580
1869
|
// Store as pending action
|
|
@@ -1595,8 +1884,28 @@ async function executeActions(actionData, onAction = null, onScreenshot = null,
|
|
|
1595
1884
|
pendingConfirmation = true;
|
|
1596
1885
|
break; // Stop execution, wait for confirmation
|
|
1597
1886
|
}
|
|
1887
|
+
|
|
1888
|
+
if (skipSafetyConfirmation && safety.requiresConfirmation) {
|
|
1889
|
+
console.log(`[AI-SERVICE] Action ${i} safety bypassed (user pre-confirmed via Execute button)`);
|
|
1890
|
+
}
|
|
1598
1891
|
|
|
1599
1892
|
// Execute the action (SAFE/LOW/MEDIUM risk)
|
|
1893
|
+
// AUTO-FOCUS: Check if this is an interaction that requires window focus (click/type)
|
|
1894
|
+
// and if the target window is in the background.
|
|
1895
|
+
if ((action.type === 'click' || action.type === 'double_click' || action.type === 'right_click') && action.x !== undefined) {
|
|
1896
|
+
if (uiWatcher && uiWatcher.isPolling) {
|
|
1897
|
+
const elementAtPoint = uiWatcher.getElementAtPoint(action.x, action.y);
|
|
1898
|
+
if (elementAtPoint && elementAtPoint.windowHandle) {
|
|
1899
|
+
// Found an element with a known window handle
|
|
1900
|
+
// Focus it first to ensure click goes to the right window (not trapped by overlay or obscuring window)
|
|
1901
|
+
// We can call systemAutomation.focusWindow directly
|
|
1902
|
+
console.log(`[AI-SERVICE] Auto-focusing window handle ${elementAtPoint.windowHandle} for click at (${action.x}, ${action.y})`);
|
|
1903
|
+
await systemAutomation.focusWindow(elementAtPoint.windowHandle);
|
|
1904
|
+
await new Promise(r => setTimeout(r, 450)); // Wait for window animation/focus settling
|
|
1905
|
+
}
|
|
1906
|
+
}
|
|
1907
|
+
}
|
|
1908
|
+
|
|
1600
1909
|
const result = await (actionExecutor ? actionExecutor(action) : systemAutomation.executeAction(action));
|
|
1601
1910
|
result.reason = action.reason || '';
|
|
1602
1911
|
result.safety = safety;
|
|
@@ -1724,5 +2033,10 @@ module.exports = {
|
|
|
1724
2033
|
clearPendingAction,
|
|
1725
2034
|
confirmPendingAction,
|
|
1726
2035
|
rejectPendingAction,
|
|
1727
|
-
resumeAfterConfirmation
|
|
2036
|
+
resumeAfterConfirmation,
|
|
2037
|
+
// UI awareness
|
|
2038
|
+
setUIWatcher,
|
|
2039
|
+
getUIWatcher,
|
|
2040
|
+
setSemanticDOMSnapshot,
|
|
2041
|
+
clearSemanticDOMSnapshot
|
|
1728
2042
|
};
|