copilot-liku-cli 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "copilot-liku-cli",
3
- "version": "0.0.8",
3
+ "version": "0.0.9",
4
4
  "description": "GitHub Copilot CLI with headless agent + ultra-thin overlay architecture",
5
5
  "main": "src/main/index.js",
6
6
  "bin": {
@@ -18,6 +18,7 @@ const { VerifierAgent } = require('./verifier');
18
18
  const { ProducerAgent } = require('./producer');
19
19
  const { ResearcherAgent } = require('./researcher');
20
20
  const { AgentStateManager } = require('./state-manager');
21
+ const { TraceWriter } = require('./trace-writer');
21
22
 
22
23
  module.exports = {
23
24
  AgentOrchestrator,
@@ -27,6 +28,7 @@ module.exports = {
27
28
  ProducerAgent,
28
29
  ResearcherAgent,
29
30
  AgentStateManager,
31
+ TraceWriter,
30
32
 
31
33
  // Factory function for creating configured orchestrator
32
34
  createAgentSystem: (aiService, options = {}) => {
@@ -47,8 +49,11 @@ module.exports = {
47
49
  modelMetadata
48
50
  });
49
51
 
52
+ // Attach persistent flight recorder
53
+ const traceWriter = new TraceWriter(orchestrator);
54
+
50
55
  // Return object with both orchestrator and stateManager
51
- return { orchestrator, stateManager };
56
+ return { orchestrator, stateManager, traceWriter };
52
57
  },
53
58
 
54
59
  // Recovery function for checkpoint restoration
@@ -181,6 +181,33 @@ class AgentOrchestrator extends EventEmitter {
181
181
 
182
182
  // ===== Handoff Management =====
183
183
 
184
+ /**
185
+ * Execute multiple agents in parallel (e.g., Builder + Researcher)
186
+ * Returns array of results in the same order as the roles array.
187
+ */
188
+ async executeParallel(roles, context, message) {
189
+ const agents = roles.map(role => {
190
+ const agent = this.agents.get(role);
191
+ if (!agent) throw new Error(`Agent not found for parallel execution: ${role}`);
192
+ return { role, agent };
193
+ });
194
+
195
+ this.emit('parallel:start', { roles, message });
196
+
197
+ const task = { description: message, context };
198
+ const results = await Promise.all(
199
+ agents.map(({ role, agent }) => {
200
+ this.stateManager.updateAgentActivity(agent.id);
201
+ return agent.process(task, context).catch(err => ({
202
+ success: false, error: err.message, role
203
+ }));
204
+ })
205
+ );
206
+
207
+ this.emit('parallel:complete', { roles, results: results.map((r, i) => ({ role: roles[i], success: r.success })) });
208
+ return results;
209
+ }
210
+
184
211
  async executeHandoff(fromAgent, targetRole, context, message) {
185
212
  const targetAgent = this.agents.get(targetRole);
186
213
 
@@ -0,0 +1,83 @@
1
+ /**
2
+ * Agent Trace Writer — persistent JSONL flight recorder
3
+ *
4
+ * Subscribes to orchestrator events and writes a structured trace log
5
+ * to ~/.liku-cli/traces/<sessionId>.jsonl for post-hoc debugging.
6
+ */
7
+
8
+ const fs = require('fs');
9
+ const path = require('path');
10
+ const os = require('os');
11
+
12
+ const TRACE_DIR = path.join(os.homedir(), '.liku-cli', 'traces');
13
+
14
+ class TraceWriter {
15
+ constructor(orchestrator) {
16
+ this.orchestrator = orchestrator;
17
+ this.stream = null;
18
+ this.sessionId = null;
19
+
20
+ this._bindEvents();
21
+ }
22
+
23
+ _ensureDir() {
24
+ if (!fs.existsSync(TRACE_DIR)) {
25
+ fs.mkdirSync(TRACE_DIR, { recursive: true, mode: 0o700 });
26
+ }
27
+ }
28
+
29
+ _write(event, data) {
30
+ if (!this.stream) return;
31
+ const entry = {
32
+ ts: new Date().toISOString(),
33
+ session: this.sessionId,
34
+ event,
35
+ ...data
36
+ };
37
+ this.stream.write(JSON.stringify(entry) + '\n');
38
+ }
39
+
40
+ _bindEvents() {
41
+ const o = this.orchestrator;
42
+
43
+ o.on('session:start', (session) => {
44
+ this._ensureDir();
45
+ this.sessionId = session.id;
46
+ const filePath = path.join(TRACE_DIR, `${this.sessionId}.jsonl`);
47
+ this.stream = fs.createWriteStream(filePath, { flags: 'a', mode: 0o600 });
48
+ this._write('session:start', { metadata: session.metadata });
49
+ });
50
+
51
+ o.on('session:end', (session) => {
52
+ this._write('session:end', { summary: session.summary });
53
+ this._close();
54
+ });
55
+
56
+ o.on('task:start', (d) => this._write('task:start', { task: d.task, agent: d.agent }));
57
+ o.on('task:complete', (d) => this._write('task:complete', { success: d.result?.success }));
58
+ o.on('task:error', (d) => this._write('task:error', { error: d.error?.message || String(d.error) }));
59
+ o.on('handoff:execute', (h) => this._write('handoff', { from: h.from, to: h.to, message: h.message }));
60
+ o.on('checkpoint', (cp) => this._write('checkpoint', { label: cp.label }));
61
+
62
+ // Agent-level events
63
+ o.on('agent:log', (entry) => this._write('agent:log', entry));
64
+ o.on('agent:proof', (proof) => this._write('agent:proof', proof));
65
+ o.on('agent:handoff', (h) => this._write('agent:handoff', h));
66
+ }
67
+
68
+ _close() {
69
+ if (this.stream) {
70
+ this.stream.end();
71
+ this.stream = null;
72
+ }
73
+ this.sessionId = null;
74
+ }
75
+
76
+ /** Destroy and detach all listeners */
77
+ destroy() {
78
+ this._close();
79
+ this.orchestrator.removeAllListeners();
80
+ }
81
+ }
82
+
83
+ module.exports = { TraceWriter };
@@ -189,6 +189,214 @@ const AI_PROVIDERS = {
189
189
  // GitHub Copilot OAuth Configuration
190
190
  const COPILOT_CLIENT_ID = 'Iv1.b507a08c87ecfe98';
191
191
 
192
+ // ===== TOOL DEFINITIONS FOR NATIVE FUNCTION CALLING =====
193
+ // These map directly to the action types the system already executes.
194
+ const LIKU_TOOLS = [
195
+ {
196
+ type: 'function',
197
+ function: {
198
+ name: 'click_element',
199
+ description: 'Click a UI element by its visible text or name (uses Windows UI Automation). Preferred over coordinate clicks.',
200
+ parameters: {
201
+ type: 'object',
202
+ properties: {
203
+ text: { type: 'string', description: 'The visible text/name of the element to click' },
204
+ reason: { type: 'string', description: 'Why this click is needed' }
205
+ },
206
+ required: ['text']
207
+ }
208
+ }
209
+ },
210
+ {
211
+ type: 'function',
212
+ function: {
213
+ name: 'click',
214
+ description: 'Left click at pixel coordinates on screen. Use as fallback when click_element cannot find the target.',
215
+ parameters: {
216
+ type: 'object',
217
+ properties: {
218
+ x: { type: 'number', description: 'X pixel coordinate' },
219
+ y: { type: 'number', description: 'Y pixel coordinate' },
220
+ reason: { type: 'string', description: 'Why clicking here' }
221
+ },
222
+ required: ['x', 'y']
223
+ }
224
+ }
225
+ },
226
+ {
227
+ type: 'function',
228
+ function: {
229
+ name: 'double_click',
230
+ description: 'Double click at pixel coordinates.',
231
+ parameters: {
232
+ type: 'object',
233
+ properties: {
234
+ x: { type: 'number', description: 'X pixel coordinate' },
235
+ y: { type: 'number', description: 'Y pixel coordinate' }
236
+ },
237
+ required: ['x', 'y']
238
+ }
239
+ }
240
+ },
241
+ {
242
+ type: 'function',
243
+ function: {
244
+ name: 'right_click',
245
+ description: 'Right click at pixel coordinates to open context menu.',
246
+ parameters: {
247
+ type: 'object',
248
+ properties: {
249
+ x: { type: 'number', description: 'X pixel coordinate' },
250
+ y: { type: 'number', description: 'Y pixel coordinate' }
251
+ },
252
+ required: ['x', 'y']
253
+ }
254
+ }
255
+ },
256
+ {
257
+ type: 'function',
258
+ function: {
259
+ name: 'type_text',
260
+ description: 'Type text into the currently focused input field.',
261
+ parameters: {
262
+ type: 'object',
263
+ properties: {
264
+ text: { type: 'string', description: 'The text to type' }
265
+ },
266
+ required: ['text']
267
+ }
268
+ }
269
+ },
270
+ {
271
+ type: 'function',
272
+ function: {
273
+ name: 'press_key',
274
+ description: 'Press a key or keyboard shortcut (e.g., "enter", "ctrl+c", "win+r", "alt+tab").',
275
+ parameters: {
276
+ type: 'object',
277
+ properties: {
278
+ key: { type: 'string', description: 'Key combo string (e.g., "ctrl+s", "enter", "win+d")' },
279
+ reason: { type: 'string', description: 'Why pressing this key' }
280
+ },
281
+ required: ['key']
282
+ }
283
+ }
284
+ },
285
+ {
286
+ type: 'function',
287
+ function: {
288
+ name: 'scroll',
289
+ description: 'Scroll up or down.',
290
+ parameters: {
291
+ type: 'object',
292
+ properties: {
293
+ direction: { type: 'string', enum: ['up', 'down'], description: 'Scroll direction' },
294
+ amount: { type: 'number', description: 'Scroll amount (default 3)' }
295
+ },
296
+ required: ['direction']
297
+ }
298
+ }
299
+ },
300
+ {
301
+ type: 'function',
302
+ function: {
303
+ name: 'drag',
304
+ description: 'Drag from one point to another.',
305
+ parameters: {
306
+ type: 'object',
307
+ properties: {
308
+ fromX: { type: 'number' }, fromY: { type: 'number' },
309
+ toX: { type: 'number' }, toY: { type: 'number' }
310
+ },
311
+ required: ['fromX', 'fromY', 'toX', 'toY']
312
+ }
313
+ }
314
+ },
315
+ {
316
+ type: 'function',
317
+ function: {
318
+ name: 'wait',
319
+ description: 'Wait for a specified number of milliseconds before the next action.',
320
+ parameters: {
321
+ type: 'object',
322
+ properties: {
323
+ ms: { type: 'number', description: 'Milliseconds to wait' }
324
+ },
325
+ required: ['ms']
326
+ }
327
+ }
328
+ },
329
+ {
330
+ type: 'function',
331
+ function: {
332
+ name: 'screenshot',
333
+ description: 'Take a screenshot to see the current screen state. Use for verification or when elements are not in the UI tree.',
334
+ parameters: { type: 'object', properties: {} }
335
+ }
336
+ },
337
+ {
338
+ type: 'function',
339
+ function: {
340
+ name: 'run_command',
341
+ description: 'Execute a shell command and return output. Preferred for any file/system operations.',
342
+ parameters: {
343
+ type: 'object',
344
+ properties: {
345
+ command: { type: 'string', description: 'Shell command to execute' },
346
+ cwd: { type: 'string', description: 'Working directory (optional)' },
347
+ shell: { type: 'string', enum: ['powershell', 'cmd', 'bash'], description: 'Shell to use (default: powershell on Windows)' }
348
+ },
349
+ required: ['command']
350
+ }
351
+ }
352
+ },
353
+ {
354
+ type: 'function',
355
+ function: {
356
+ name: 'focus_window',
357
+ description: 'Bring a window to the foreground by its handle or title.',
358
+ parameters: {
359
+ type: 'object',
360
+ properties: {
361
+ title: { type: 'string', description: 'Partial window title to match' },
362
+ windowHandle: { type: 'number', description: 'Window handle (hwnd)' }
363
+ }
364
+ }
365
+ }
366
+ }
367
+ ];
368
+
369
+ /**
370
+ * Convert tool_calls from API response into the action block format
371
+ * that the existing executeActions pipeline expects.
372
+ */
373
+ function toolCallsToActions(toolCalls) {
374
+ return toolCalls.map(tc => {
375
+ let args;
376
+ try { args = JSON.parse(tc.function.arguments); } catch { args = {}; }
377
+ const name = tc.function.name;
378
+
379
+ // Map tool names back to existing action types
380
+ switch (name) {
381
+ case 'click_element': return { type: 'click_element', ...args };
382
+ case 'click': return { type: 'click', ...args };
383
+ case 'double_click': return { type: 'double_click', ...args };
384
+ case 'right_click': return { type: 'right_click', ...args };
385
+ case 'type_text': return { type: 'type', ...args };
386
+ case 'press_key': return { type: 'key', key: args.key, reason: args.reason };
387
+ case 'scroll': return { type: 'scroll', ...args };
388
+ case 'drag': return { type: 'drag', ...args };
389
+ case 'wait': return { type: 'wait', ...args };
390
+ case 'screenshot': return { type: 'screenshot' };
391
+ case 'run_command': return { type: 'run_command', ...args };
392
+ case 'focus_window':
393
+ if (args.title) return { type: 'bring_window_to_front', title: args.title };
394
+ return { type: 'focus_window', windowHandle: args.windowHandle };
395
+ default: return { type: name, ...args };
396
+ }
397
+ });
398
+ }
399
+
192
400
  // Current configuration
193
401
  let currentProvider = 'copilot'; // Default to GitHub Copilot
194
402
  let apiKeys = {
@@ -218,6 +426,41 @@ let oauthCallback = null;
218
426
  // Conversation history for context
219
427
  let conversationHistory = [];
220
428
  const MAX_HISTORY = 20;
429
+ const HISTORY_FILE = path.join(LIKU_HOME, 'conversation-history.json');
430
+
431
+ /**
432
+ * Load conversation history from disk (survives process restarts)
433
+ */
434
+ function loadConversationHistory() {
435
+ try {
436
+ if (fs.existsSync(HISTORY_FILE)) {
437
+ const data = JSON.parse(fs.readFileSync(HISTORY_FILE, 'utf-8'));
438
+ if (Array.isArray(data)) {
439
+ conversationHistory = data.slice(-MAX_HISTORY * 2);
440
+ console.log(`[AI] Restored ${conversationHistory.length} history entries from disk`);
441
+ }
442
+ }
443
+ } catch (e) {
444
+ console.warn('[AI] Could not load conversation history:', e.message);
445
+ }
446
+ }
447
+
448
+ /**
449
+ * Persist conversation history to disk
450
+ */
451
+ function saveConversationHistory() {
452
+ try {
453
+ if (!fs.existsSync(LIKU_HOME)) {
454
+ fs.mkdirSync(LIKU_HOME, { recursive: true, mode: 0o700 });
455
+ }
456
+ fs.writeFileSync(HISTORY_FILE, JSON.stringify(conversationHistory.slice(-MAX_HISTORY * 2)), { mode: 0o600 });
457
+ } catch (e) {
458
+ console.warn('[AI] Could not save conversation history:', e.message);
459
+ }
460
+ }
461
+
462
+ // Restore history on module load
463
+ loadConversationHistory();
221
464
 
222
465
  // Visual context for AI awareness
223
466
  let visualContextBuffer = [];
@@ -366,11 +609,15 @@ When the user asks you to DO something, respond with a JSON action block:
366
609
  - Be specific about UI elements, text, buttons
367
610
 
368
611
  **For ACTION requests** (click here, type this, open that):
369
- - ALWAYS respond with the JSON action block
612
+ - **YOU MUST respond with the JSON action block — NEVER respond with only a plan or description**
613
+ - **NEVER say "Let me proceed" or "I will click" without including the actual \`\`\`json action block**
614
+ - **If the user says "proceed" or "do it", output the JSON actions immediately — do not ask again**
370
615
  - Use PLATFORM-SPECIFIC shortcuts (see above!)
371
616
  - Prefer \`click_element\` over coordinate clicks when targeting named UI elements
372
617
  - Add \`wait\` actions between steps that need UI to update
373
618
  - Add verification step to confirm success
619
+ - **If an element is NOT in the Live UI State**: Use \`{"type": "screenshot"}\` first, then use coordinates from the screenshot to click. Do NOT give up or say "I can't find the element."
620
+ - **If you need to interact with web content inside an app** (like VS Code panels, browser tabs): Use keyboard shortcuts or coordinate-based clicks since web UI may not appear in UIA tree
374
621
 
375
622
  **Common Task Patterns**:
376
623
  ${PLATFORM === 'win32' ? `
@@ -391,7 +638,14 @@ ${PLATFORM === 'win32' ? `
391
638
  - **Save file**: \`ctrl+s\`
392
639
  - **Copy/Paste**: \`ctrl+c\` / \`ctrl+v\``}
393
640
 
394
- Be precise, use platform-correct shortcuts, and execute actions confidently!`;
641
+ Be precise, use platform-correct shortcuts, and execute actions confidently!
642
+
643
+ ## CRITICAL RULES
644
+ 1. **NEVER describe actions without executing them.** If the user asks you to click/type/open something, output the JSON action block.
645
+ 2. **NEVER say "Let me proceed" or "I'll do this now" without the JSON block.** Words without actions are useless.
646
+ 3. **If user says "proceed" or "go ahead", output the JSON actions IMMEDIATELY.**
647
+ 4. **When you can't find an element in Live UI State, take a screenshot and use pixel coordinates.** Don't give up.
648
+ 5. **One response = one action block.** Don't split actions across multiple messages unless the user asks you to wait.`;
395
649
 
396
650
  /**
397
651
  * Set the AI provider
@@ -668,12 +922,12 @@ function saveCopilotToken(token) {
668
922
  try {
669
923
  const dir = path.dirname(TOKEN_FILE);
670
924
  if (!fs.existsSync(dir)) {
671
- fs.mkdirSync(dir, { recursive: true });
925
+ fs.mkdirSync(dir, { recursive: true, mode: 0o700 });
672
926
  }
673
927
  fs.writeFileSync(TOKEN_FILE, JSON.stringify({
674
928
  access_token: token,
675
929
  saved_at: new Date().toISOString()
676
- }));
930
+ }), { mode: 0o600 });
677
931
  console.log('[COPILOT] Token saved');
678
932
  } catch (e) {
679
933
  console.error('[COPILOT] Failed to save token:', e.message);
@@ -920,7 +1174,9 @@ async function callCopilot(messages, modelOverride = null) {
920
1174
  messages: messages,
921
1175
  max_tokens: 4096,
922
1176
  temperature: 0.7,
923
- stream: false
1177
+ stream: false,
1178
+ tools: LIKU_TOOLS,
1179
+ tool_choice: 'auto'
924
1180
  });
925
1181
 
926
1182
  // Try multiple endpoint formats
@@ -979,7 +1235,22 @@ async function callCopilot(messages, modelOverride = null) {
979
1235
  try {
980
1236
  const result = JSON.parse(body);
981
1237
  if (result.choices && result.choices[0]) {
982
- resolveReq(result.choices[0].message.content);
1238
+ const choice = result.choices[0];
1239
+ const msg = choice.message;
1240
+
1241
+ // Handle native tool calls — convert to action JSON block
1242
+ if (msg.tool_calls && msg.tool_calls.length > 0) {
1243
+ const actions = toolCallsToActions(msg.tool_calls);
1244
+ const actionBlock = JSON.stringify({
1245
+ thought: msg.content || 'Executing requested actions',
1246
+ actions,
1247
+ verification: 'Verify the actions completed successfully'
1248
+ }, null, 2);
1249
+ console.log(`[Copilot] Received ${msg.tool_calls.length} tool_calls, converted to action block`);
1250
+ resolveReq('```json\n' + actionBlock + '\n```');
1251
+ } else {
1252
+ resolveReq(msg.content);
1253
+ }
983
1254
  } else if (result.error) {
984
1255
  rejectReq(new Error(result.error.message || 'Copilot API error'));
985
1256
  } else {
@@ -1237,6 +1508,9 @@ function detectTruncation(response) {
1237
1508
  /**
1238
1509
  * Send a message and get AI response with auto-continuation
1239
1510
  */
1511
+ // Provider fallback priority order
1512
+ const PROVIDER_FALLBACK_ORDER = ['copilot', 'openai', 'anthropic', 'ollama'];
1513
+
1240
1514
  async function sendMessage(userMessage, options = {}) {
1241
1515
  const { includeVisualContext = false, coordinates = null, maxContinuations = 2, model = null } = options;
1242
1516
 
@@ -1253,43 +1527,55 @@ async function sendMessage(userMessage, options = {}) {
1253
1527
  let response;
1254
1528
  let effectiveModel = currentCopilotModel;
1255
1529
 
1256
- switch (currentProvider) {
1257
- case 'copilot':
1258
- // GitHub Copilot - uses OAuth token or env var
1259
- if (!apiKeys.copilot) {
1260
- // Try loading saved token
1261
- if (!loadCopilotToken()) {
1262
- throw new Error('Not authenticated with GitHub Copilot.\n\nTo authenticate:\n1. Type /login and authorize in browser\n2. Or set GH_TOKEN or GITHUB_TOKEN environment variable');
1263
- }
1264
- }
1265
- effectiveModel = resolveCopilotModelKey(model);
1266
- // Enforce vision-capable model when visual context is included
1267
- if (includeVisualContext && COPILOT_MODELS[effectiveModel] && !COPILOT_MODELS[effectiveModel].vision) {
1268
- const visionFallback = AI_PROVIDERS.copilot.visionModel || 'gpt-4o';
1269
- console.log(`[AI] Model ${effectiveModel} lacks vision, upgrading to ${visionFallback} for visual context`);
1270
- effectiveModel = visionFallback;
1271
- }
1272
- response = await callCopilot(messages, effectiveModel);
1273
- break;
1274
-
1275
- case 'openai':
1276
- if (!apiKeys.openai) {
1277
- throw new Error('OpenAI API key not set. Use /setkey openai <key> or set OPENAI_API_KEY environment variable.');
1530
+ // Build fallback chain: current provider first, then remaining in priority order
1531
+ const fallbackChain = [currentProvider, ...PROVIDER_FALLBACK_ORDER.filter(p => p !== currentProvider)];
1532
+ let lastError = null;
1533
+ let usedProvider = currentProvider;
1534
+
1535
+ for (const provider of fallbackChain) {
1536
+ try {
1537
+ switch (provider) {
1538
+ case 'copilot':
1539
+ if (!apiKeys.copilot) {
1540
+ if (!loadCopilotToken()) {
1541
+ throw new Error('Not authenticated with GitHub Copilot.');
1542
+ }
1543
+ }
1544
+ effectiveModel = resolveCopilotModelKey(model);
1545
+ if (includeVisualContext && COPILOT_MODELS[effectiveModel] && !COPILOT_MODELS[effectiveModel].vision) {
1546
+ const visionFallback = AI_PROVIDERS.copilot.visionModel || 'gpt-4o';
1547
+ console.log(`[AI] Model ${effectiveModel} lacks vision, upgrading to ${visionFallback} for visual context`);
1548
+ effectiveModel = visionFallback;
1549
+ }
1550
+ response = await callCopilot(messages, effectiveModel);
1551
+ break;
1552
+ case 'openai':
1553
+ if (!apiKeys.openai) throw new Error('OpenAI API key not set.');
1554
+ response = await callOpenAI(messages);
1555
+ break;
1556
+ case 'anthropic':
1557
+ if (!apiKeys.anthropic) throw new Error('Anthropic API key not set.');
1558
+ response = await callAnthropic(messages);
1559
+ break;
1560
+ case 'ollama':
1561
+ default:
1562
+ response = await callOllama(messages);
1563
+ break;
1278
1564
  }
1279
- response = await callOpenAI(messages);
1280
- break;
1281
-
1282
- case 'anthropic':
1283
- if (!apiKeys.anthropic) {
1284
- throw new Error('Anthropic API key not set. Use /setkey anthropic <key> or set ANTHROPIC_API_KEY environment variable.');
1565
+ usedProvider = provider;
1566
+ if (usedProvider !== currentProvider) {
1567
+ console.log(`[AI] Fallback: ${currentProvider} failed, succeeded with ${usedProvider}`);
1285
1568
  }
1286
- response = await callAnthropic(messages);
1287
- break;
1288
-
1289
- case 'ollama':
1290
- default:
1291
- response = await callOllama(messages);
1292
- break;
1569
+ break; // success — exit fallback loop
1570
+ } catch (providerErr) {
1571
+ lastError = providerErr;
1572
+ console.warn(`[AI] Provider ${provider} failed: ${providerErr.message}`);
1573
+ continue; // try next provider
1574
+ }
1575
+ }
1576
+
1577
+ if (!response) {
1578
+ throw lastError || new Error('All AI providers failed.');
1293
1579
  }
1294
1580
 
1295
1581
  // Auto-continuation for truncated responses
@@ -1345,10 +1631,13 @@ async function sendMessage(userMessage, options = {}) {
1345
1631
  conversationHistory.shift();
1346
1632
  }
1347
1633
 
1634
+ // Persist to disk for session continuity
1635
+ saveConversationHistory();
1636
+
1348
1637
  return {
1349
1638
  success: true,
1350
1639
  message: response,
1351
- provider: currentProvider,
1640
+ provider: usedProvider,
1352
1641
  model: effectiveModel,
1353
1642
  modelVersion: COPILOT_MODELS[effectiveModel]?.id || null,
1354
1643
  hasVisualContext: includeVisualContext && visualContextBuffer.length > 0
@@ -1393,6 +1682,7 @@ function handleCommand(command) {
1393
1682
  case '/clear':
1394
1683
  conversationHistory = [];
1395
1684
  clearVisualContext();
1685
+ saveConversationHistory();
1396
1686
  return { type: 'system', message: 'Conversation and visual context cleared.' };
1397
1687
 
1398
1688
  case '/vision':
@@ -2038,5 +2328,8 @@ module.exports = {
2038
2328
  setUIWatcher,
2039
2329
  getUIWatcher,
2040
2330
  setSemanticDOMSnapshot,
2041
- clearSemanticDOMSnapshot
2331
+ clearSemanticDOMSnapshot,
2332
+ // Tool-calling
2333
+ LIKU_TOOLS,
2334
+ toolCallsToActions
2042
2335
  };
package/src/main/index.js CHANGED
@@ -344,6 +344,7 @@ function createOverlayWindow() {
344
344
  webPreferences: {
345
345
  nodeIntegration: false,
346
346
  contextIsolation: true,
347
+ sandbox: true,
347
348
  preload: path.join(__dirname, '../renderer/overlay/preload.js')
348
349
  }
349
350
  });
@@ -460,6 +461,7 @@ function createChatWindow() {
460
461
  webPreferences: {
461
462
  nodeIntegration: false,
462
463
  contextIsolation: true,
464
+ sandbox: true,
463
465
  preload: path.join(__dirname, '../renderer/chat/preload.js')
464
466
  }
465
467
  });
@@ -1974,10 +1974,10 @@ function parseAIActions(aiResponse) {
1974
1974
  try {
1975
1975
  return JSON.parse(aiResponse);
1976
1976
  } catch (e) {
1977
- // Not JSON - return null
1977
+ // Not JSON - continue
1978
1978
  }
1979
1979
 
1980
- // Try to find inline JSON object
1980
+ // Try to find inline JSON object with actions array
1981
1981
  const inlineMatch = aiResponse.match(/\{[\s\S]*"actions"[\s\S]*\}/);
1982
1982
  if (inlineMatch) {
1983
1983
  try {
@@ -1987,9 +1987,92 @@ function parseAIActions(aiResponse) {
1987
1987
  }
1988
1988
  }
1989
1989
 
1990
+ // Fallback: extract actions from natural language descriptions
1991
+ // This handles cases where AI says "I'll click X at (500, 300)" without JSON
1992
+ const nlActions = parseNaturalLanguageActions(aiResponse);
1993
+ if (nlActions && nlActions.actions.length > 0) {
1994
+ console.log('[AUTOMATION] Extracted', nlActions.actions.length, 'action(s) from natural language');
1995
+ return nlActions;
1996
+ }
1997
+
1990
1998
  return null;
1991
1999
  }
1992
2000
 
2001
+ /**
2002
+ * Parse actions from natural language AI responses as a fallback.
2003
+ * Handles patterns like "click at (500, 300)" or "type 'hello'" in prose.
2004
+ */
2005
+ function parseNaturalLanguageActions(text) {
2006
+ const actions = [];
2007
+ const lines = text.split('\n');
2008
+
2009
+ for (const line of lines) {
2010
+ const lower = line.toLowerCase();
2011
+
2012
+ // Match "click at (x, y)" or "click (x, y)" or "click at coordinates (x, y)"
2013
+ const clickMatch = lower.match(/\b(?:click|tap|press)\b.*?\(\s*(\d+)\s*,\s*(\d+)\s*\)/);
2014
+ if (clickMatch) {
2015
+ actions.push({ type: 'click', x: parseInt(clickMatch[1]), y: parseInt(clickMatch[2]), reason: line.trim() });
2016
+ continue;
2017
+ }
2018
+
2019
+ // Match "double-click at (x, y)"
2020
+ const dblClickMatch = lower.match(/\bdouble[- ]?click\b.*?\(\s*(\d+)\s*,\s*(\d+)\s*\)/);
2021
+ if (dblClickMatch) {
2022
+ actions.push({ type: 'double_click', x: parseInt(dblClickMatch[1]), y: parseInt(dblClickMatch[2]), reason: line.trim() });
2023
+ continue;
2024
+ }
2025
+
2026
+ // Match "right-click at (x, y)"
2027
+ const rightClickMatch = lower.match(/\bright[- ]?click\b.*?\(\s*(\d+)\s*,\s*(\d+)\s*\)/);
2028
+ if (rightClickMatch) {
2029
+ actions.push({ type: 'right_click', x: parseInt(rightClickMatch[1]), y: parseInt(rightClickMatch[2]), reason: line.trim() });
2030
+ continue;
2031
+ }
2032
+
2033
+ // Match 'type "text"' or "type 'text'"
2034
+ const typeMatch = line.match(/\btype\b.*?["']([^"']+)["']/i);
2035
+ if (typeMatch && !lower.includes('action type')) {
2036
+ actions.push({ type: 'type', text: typeMatch[1], reason: line.trim() });
2037
+ continue;
2038
+ }
2039
+
2040
+ // Match "press Enter" or "press Ctrl+C"
2041
+ const keyMatch = lower.match(/\bpress\b\s+([\w+]+(?:\+[\w+]+)*)/);
2042
+ if (keyMatch && !clickMatch) {
2043
+ const key = keyMatch[1].toLowerCase();
2044
+ // Only match plausible key combos
2045
+ if (/^(enter|escape|tab|space|backspace|delete|home|end|up|down|left|right|f\d+|ctrl|alt|shift|win|cmd|super)/.test(key)) {
2046
+ actions.push({ type: 'key', key: key, reason: line.trim() });
2047
+ continue;
2048
+ }
2049
+ }
2050
+
2051
+ // Match "scroll down" or "scroll up 5 lines"
2052
+ const scrollMatch = lower.match(/\bscroll\s+(up|down)(?:\s+(\d+))?\b/);
2053
+ if (scrollMatch) {
2054
+ actions.push({ type: 'scroll', direction: scrollMatch[1], amount: parseInt(scrollMatch[2]) || 3, reason: line.trim() });
2055
+ continue;
2056
+ }
2057
+
2058
+ // Match "click_element" / "click on the X button" pattern
2059
+ const clickElementMatch = line.match(/\bclick\s+(?:on\s+)?(?:the\s+)?["']([^"']+)["']\s*button/i) ||
2060
+ line.match(/\bclick\s+(?:on\s+)?(?:the\s+)?["']([^"']+)["']/i);
2061
+ if (clickElementMatch && !clickMatch) {
2062
+ actions.push({ type: 'click_element', text: clickElementMatch[1], reason: line.trim() });
2063
+ continue;
2064
+ }
2065
+ }
2066
+
2067
+ if (actions.length === 0) return null;
2068
+
2069
+ return {
2070
+ thought: 'Actions extracted from AI natural language response',
2071
+ actions,
2072
+ verification: 'Check that the intended actions completed successfully'
2073
+ };
2074
+ }
2075
+
1993
2076
  /**
1994
2077
  * Convert grid coordinate (like "C3") to screen pixels
1995
2078
  * @param {string} coord - Grid coordinate like "C3", "AB12"
@@ -26,6 +26,15 @@ const MODE = {
26
26
  FALLBACK: 'FALLBACK' // polling after event failure, auto-retry after 30s
27
27
  };
28
28
 
29
+ // Sensitive process denylist — when the active window belongs to one of these,
30
+ // omit element names/text from AI context to prevent prompt leakage.
31
+ const REDACTED_PROCESSES = new Set([
32
+ 'keepassxc', 'keepass', '1password', 'bitwarden', 'lastpass', 'dashlane',
33
+ 'enpass', 'roboform', 'nordpass', // password managers
34
+ 'mstsc', 'vmconnect', 'putty', 'winscp', // remote/admin tools
35
+ 'powershell_ise', // admin consoles
36
+ ]);
37
+
29
38
  class UIWatcher extends EventEmitter {
30
39
  constructor(options = {}) {
31
40
  super();
@@ -438,14 +447,26 @@ $results | ConvertTo-Json -Depth 4 -Compress
438
447
  const { elements, activeWindow, lastUpdate } = this.cache;
439
448
  const age = Date.now() - lastUpdate;
440
449
 
450
+ // Redaction: if the focused window belongs to a sensitive process,
451
+ // suppress element names to avoid leaking passwords/secrets to the LLM.
452
+ const processLower = (activeWindow?.processName || '').toLowerCase();
453
+ const redacted = REDACTED_PROCESSES.has(processLower);
454
+
441
455
  // Build context string with window hierarchy
442
456
  let context = `\n## Live UI State (${age}ms ago)\n`;
443
457
 
444
458
  if (activeWindow) {
445
- context += `**Focused Window**: ${activeWindow.title || 'Unknown'} (${activeWindow.processName})\n`;
459
+ const title = redacted ? '[REDACTED — sensitive application]' : (activeWindow.title || 'Unknown');
460
+ context += `**Focused Window**: ${title} (${activeWindow.processName})\n`;
446
461
  context += `**Cursor**: (${activeWindow.bounds.x}, ${activeWindow.bounds.y}) ${activeWindow.bounds.width}x${activeWindow.bounds.height}\n\n`;
447
462
  }
448
463
 
464
+ if (redacted) {
465
+ context += `**⚠ Privacy mode active** — element names hidden because the focused application handles sensitive data.\n`;
466
+ context += `You can still take screenshots or wait for the user to switch windows.\n`;
467
+ return context;
468
+ }
469
+
449
470
  context += `**Visible Context** (${elements.length} elements detected):\n`;
450
471
 
451
472
  let listed = 0;
@@ -88,6 +88,7 @@ const contextCount = document.getElementById('context-count');
88
88
  const providerSelect = document.getElementById('provider-select');
89
89
  const modelSelect = document.getElementById('model-select');
90
90
  const authStatus = document.getElementById('auth-status');
91
+ const loginBtn = document.getElementById('login-btn');
91
92
  const tokenCount = document.getElementById('token-count');
92
93
 
93
94
  function applyElectronAppRegions() {
@@ -127,6 +128,11 @@ function updateAuthStatus(status, provider) {
127
128
 
128
129
  authStatus.className = 'status-badge';
129
130
 
131
+ // Show login button when disconnected, hide when connected
132
+ if (loginBtn) {
133
+ loginBtn.classList.toggle('hidden', status === 'connected');
134
+ }
135
+
130
136
  switch (status) {
131
137
  case 'connected':
132
138
  authStatus.classList.add('connected');
@@ -226,7 +232,7 @@ const AGENT_TRIGGERS = {
226
232
  research: /\b(research\s+agent|spawn.*research|investigate\s+this|gather\s+info(?:rmation)?)\b/i,
227
233
  verify: /\b(verify\s+agent|spawn.*verif|validate\s+this|verification\s+agent)\b/i,
228
234
  build: /\b(build\s+agent|spawn.*build|builder\s+agent|code\s+agent)\b/i,
229
- produce: /(^\\s*\\/produce\\b)|\\b(agentic\\s+producer|producer\\s+agent)\\b/i,
235
+ produce: /(^\s*\/produce\b)|\b(agentic\s+producer|producer\s+agent)\b/i,
230
236
  orchestrate: /\b(spawn\s+(?:a\s+)?(?:sub)?agent|orchestrat|multi-?agent|agent\s+system|coordinate\s+agents?)\b/i
231
237
  };
232
238
 
@@ -274,7 +280,7 @@ async function routeToAgent(text, agentType) {
274
280
  let result;
275
281
  switch (agentType) {
276
282
  case 'produce': {
277
- const cleaned = text.replace(/^\\s*\\/produce\\b\\s*/i, '');
283
+ const cleaned = text.replace(/^\s*\/produce\b\s*/i, '');
278
284
  const parsed = parseProduceOptions(cleaned || text);
279
285
  const finalPrompt = parsed.prompt || (cleaned || text);
280
286
  const referenceUrl = extractFirstUrl(finalPrompt);
@@ -434,6 +440,28 @@ if (providerSelect) {
434
440
  });
435
441
  }
436
442
 
443
+ // Login button
444
+ if (loginBtn) {
445
+ loginBtn.addEventListener('click', () => {
446
+ window.electronAPI.sendMessage('/login');
447
+ addMessage('/login', 'user');
448
+ });
449
+ }
450
+
451
+ // Auth status badge click - also triggers login when disconnected
452
+ if (authStatus) {
453
+ authStatus.style.cursor = 'pointer';
454
+ authStatus.addEventListener('click', () => {
455
+ if (authStatus.classList.contains('disconnected')) {
456
+ window.electronAPI.sendMessage('/login');
457
+ addMessage('/login', 'user');
458
+ } else {
459
+ window.electronAPI.sendMessage('/status');
460
+ addMessage('/status', 'user');
461
+ }
462
+ });
463
+ }
464
+
437
465
  // Model selection
438
466
  if (modelSelect) {
439
467
  modelSelect.addEventListener('change', (e) => {
@@ -282,6 +282,26 @@
282
282
  color: var(--text-secondary);
283
283
  }
284
284
 
285
+ .login-button {
286
+ padding: 3px 10px;
287
+ border-radius: 10px;
288
+ font-size: 10px;
289
+ font-weight: 600;
290
+ background: var(--accent-blue);
291
+ color: white;
292
+ border: none;
293
+ cursor: pointer;
294
+ transition: background 0.15s;
295
+ }
296
+
297
+ .login-button:hover {
298
+ background: var(--accent-blue-hover);
299
+ }
300
+
301
+ .login-button.hidden {
302
+ display: none;
303
+ }
304
+
285
305
  /* ===== CHAT HISTORY ===== */
286
306
  #chat-history {
287
307
  flex: 1;
@@ -544,6 +564,21 @@
544
564
  justify-content: center;
545
565
  }
546
566
 
567
+ .auth-hint {
568
+ font-size: 12px;
569
+ margin-top: 10px;
570
+ color: var(--text-secondary);
571
+ line-height: 1.6;
572
+ }
573
+
574
+ .auth-hint kbd {
575
+ background: var(--bg-secondary);
576
+ padding: 2px 5px;
577
+ border-radius: 3px;
578
+ border: 1px solid var(--border-color);
579
+ font-family: inherit;
580
+ }
581
+
547
582
  .empty-state .logo svg {
548
583
  width: 32px;
549
584
  height: 32px;
@@ -671,6 +706,7 @@
671
706
  </div>
672
707
  <div id="provider-status">
673
708
  <span id="auth-status" class="status-badge">Not Connected</span>
709
+ <button id="login-btn" class="login-button" title="Login to AI provider">Login</button>
674
710
  <span id="token-count" class="token-badge" title="Estimated tokens">0 tokens</span>
675
711
  </div>
676
712
  </div>
@@ -683,6 +719,7 @@
683
719
  </div>
684
720
  <h2>Copilot Agent</h2>
685
721
  <p>Click "Selection" to interact with screen elements, or type a command below.</p>
722
+ <p id="empty-auth-hint" class="auth-hint">Click <strong>Login</strong> above or type <kbd>/login</kbd> to connect to GitHub Copilot.<br>You can also use <kbd>/help</kbd> to see all commands.</p>
686
723
  <div class="shortcuts">
687
724
  <div class="shortcut"><kbd>Ctrl+Alt+Space</kbd> Toggle chat</div>
688
725
  <div class="shortcut"><kbd>Ctrl+Shift+O</kbd> Toggle overlay</div>