@projectservan8n/cnapse 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ import {
2
+ AgentLearner,
3
+ getLearner,
4
+ learner_default
5
+ } from "./chunk-WSBJFRQH.js";
6
+ import "./chunk-GP73OJCZ.js";
7
+ import "./chunk-TFHK5CYF.js";
8
+ import "./chunk-OIVTPXE4.js";
9
+ import "./chunk-COKO6V5J.js";
10
+ export {
11
+ AgentLearner,
12
+ learner_default as default,
13
+ getLearner
14
+ };
@@ -0,0 +1,19 @@
1
+ import {
2
+ analyzeScreenRegion,
3
+ captureScreenshot,
4
+ describeScreen,
5
+ findElementCoordinates,
6
+ getCurrentDescription,
7
+ getScreenHash,
8
+ screensChanged
9
+ } from "./chunk-OIVTPXE4.js";
10
+ import "./chunk-COKO6V5J.js";
11
+ export {
12
+ analyzeScreenRegion,
13
+ captureScreenshot,
14
+ describeScreen,
15
+ findElementCoordinates,
16
+ getCurrentDescription,
17
+ getScreenHash,
18
+ screensChanged
19
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@projectservan8n/cnapse",
3
- "version": "0.9.0",
3
+ "version": "0.10.0",
4
4
  "description": "Autonomous PC intelligence - AI assistant for desktop automation",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -0,0 +1,515 @@
1
+ /**
2
+ * Autonomous Agent - The brain that pursues goals relentlessly
3
+ * Continuously observes, thinks, and acts until goal is achieved
4
+ */
5
+
6
+ import { EventEmitter } from 'events';
7
+ import { describeScreen, captureScreenshot } from '../lib/vision.js';
8
+ import { chat } from '../lib/api.js';
9
+ import * as computer from '../tools/computer.js';
10
+ import { AgentLearner, getLearner, Suggestion } from './learner.js';
11
+
12
+ export interface ActionRecord {
13
+ timestamp: number;
14
+ action: string;
15
+ value: string;
16
+ result: 'success' | 'failure' | 'pending';
17
+ screenBefore: string;
18
+ screenAfter?: string;
19
+ reasoning: string;
20
+ }
21
+
22
+ export interface AgentState {
23
+ goal: string;
24
+ isActive: boolean;
25
+ isPaused: boolean;
26
+ currentAction: string | null;
27
+ actionHistory: ActionRecord[];
28
+ stuckCount: number;
29
+ attemptCount: number;
30
+ lastScreenHash: string;
31
+ startTime: number;
32
+ confidence: number;
33
+ }
34
+
35
+ export interface AgentConfig {
36
+ maxAttempts: number; // Max attempts before giving up (default 25)
37
+ actionDelayMs: number; // Delay between actions (default 1500)
38
+ stuckThreshold: number; // Same screen count before asking for help (default 3)
39
+ verifyActions: boolean; // Take screenshot after each action to verify (default true)
40
+ humanLikeTiming: boolean; // Use human-like delays (default true)
41
+ learnFromSuccess: boolean; // Save successful actions to memory (default true)
42
+ askForHelpWhenStuck: boolean; // Consult other AIs when stuck (default true)
43
+ }
44
+
45
+ const DEFAULT_CONFIG: AgentConfig = {
46
+ maxAttempts: 25,
47
+ actionDelayMs: 1500,
48
+ stuckThreshold: 3,
49
+ verifyActions: true,
50
+ humanLikeTiming: true,
51
+ learnFromSuccess: true,
52
+ askForHelpWhenStuck: true,
53
+ };
54
+
55
+ export class AutonomousAgent extends EventEmitter {
56
+ private state: AgentState;
57
+ private config: AgentConfig;
58
+ private learner: AgentLearner;
59
+ private abortController: AbortController | null = null;
60
+
61
+ constructor(config: Partial<AgentConfig> = {}) {
62
+ super();
63
+ this.config = { ...DEFAULT_CONFIG, ...config };
64
+ this.learner = getLearner();
65
+ this.state = this.createInitialState('');
66
+ }
67
+
68
+ private createInitialState(goal: string): AgentState {
69
+ return {
70
+ goal,
71
+ isActive: false,
72
+ isPaused: false,
73
+ currentAction: null,
74
+ actionHistory: [],
75
+ stuckCount: 0,
76
+ attemptCount: 0,
77
+ lastScreenHash: '',
78
+ startTime: Date.now(),
79
+ confidence: 100,
80
+ };
81
+ }
82
+
83
+ /**
84
+ * Start pursuing a goal autonomously
85
+ */
86
+ async start(goal: string): Promise<{ success: boolean; message: string }> {
87
+ if (this.state.isActive) {
88
+ return { success: false, message: 'Agent is already running' };
89
+ }
90
+
91
+ this.state = this.createInitialState(goal);
92
+ this.state.isActive = true;
93
+ this.abortController = new AbortController();
94
+
95
+ this.emit('started', { goal });
96
+
97
+ try {
98
+ await this.learner.load();
99
+
100
+ // Main agent loop
101
+ while (this.state.isActive && this.state.attemptCount < this.config.maxAttempts) {
102
+ if (this.state.isPaused) {
103
+ await this.sleep(500);
104
+ continue;
105
+ }
106
+
107
+ this.state.attemptCount++;
108
+ this.emit('attempt', { count: this.state.attemptCount, max: this.config.maxAttempts });
109
+
110
+ // 1. OBSERVE - Capture and analyze current screen
111
+ const observation = await this.observe();
112
+ if (!observation) continue;
113
+
114
+ // 2. CHECK MEMORY - Have we solved something similar before?
115
+ const remembered = await this.learner.recall(goal, observation.description);
116
+ if (remembered) {
117
+ this.emit('recalled', remembered);
118
+ const result = await this.executeAction(remembered.actionType, remembered.actionValue);
119
+ if (result.success) {
120
+ await this.learner.learn(
121
+ observation.description,
122
+ goal,
123
+ remembered.actionType,
124
+ remembered.actionValue,
125
+ 'memory'
126
+ );
127
+ }
128
+ continue;
129
+ }
130
+
131
+ // 3. THINK - Ask AI what to do next
132
+ const decision = await this.think(observation.description);
133
+
134
+ if (decision.action === 'done') {
135
+ this.state.isActive = false;
136
+ this.emit('completed', { success: true, attempts: this.state.attemptCount });
137
+ return { success: true, message: 'Goal accomplished!' };
138
+ }
139
+
140
+ if (decision.action === 'stuck') {
141
+ this.state.stuckCount++;
142
+
143
+ if (this.state.stuckCount >= this.config.stuckThreshold && this.config.askForHelpWhenStuck) {
144
+ // Ask for help from multiple sources
145
+ this.emit('asking_help', { stuckCount: this.state.stuckCount });
146
+ const suggestions = await this.learner.getHelp(
147
+ goal,
148
+ observation.description,
149
+ this.state.actionHistory.slice(-5).map(a => `${a.action}: ${a.value}`)
150
+ );
151
+
152
+ if (suggestions.length > 0) {
153
+ // Try the best suggestion
154
+ const suggestion = suggestions[0];
155
+ this.emit('trying_suggestion', suggestion);
156
+ const result = await this.executeAction(suggestion.action, suggestion.value);
157
+
158
+ if (result.success && this.config.learnFromSuccess) {
159
+ await this.learner.learn(
160
+ observation.description,
161
+ goal,
162
+ suggestion.action,
163
+ suggestion.value,
164
+ suggestion.source
165
+ );
166
+ this.state.stuckCount = 0;
167
+ }
168
+ }
169
+ }
170
+ continue;
171
+ }
172
+
173
+ // 4. ACT - Execute the decided action
174
+ const result = await this.executeAction(decision.action, decision.value);
175
+
176
+ // 5. VERIFY - Check if action had effect
177
+ if (this.config.verifyActions) {
178
+ const afterScreen = await captureScreenshot();
179
+ const screenChanged = afterScreen !== observation.screenshot;
180
+
181
+ if (screenChanged) {
182
+ this.state.stuckCount = 0;
183
+ this.state.confidence = Math.min(100, this.state.confidence + 5);
184
+
185
+ if (result.success && this.config.learnFromSuccess) {
186
+ await this.learner.learn(
187
+ observation.description,
188
+ goal,
189
+ decision.action,
190
+ decision.value,
191
+ 'self'
192
+ );
193
+ }
194
+ } else {
195
+ this.state.stuckCount++;
196
+ this.state.confidence = Math.max(0, this.state.confidence - 10);
197
+ }
198
+ }
199
+
200
+ // Delay between actions
201
+ if (this.config.humanLikeTiming) {
202
+ const delay = this.config.actionDelayMs + (Math.random() * 500);
203
+ await this.sleep(delay);
204
+ } else {
205
+ await this.sleep(this.config.actionDelayMs);
206
+ }
207
+ }
208
+
209
+ // Reached max attempts
210
+ this.state.isActive = false;
211
+ this.emit('completed', { success: false, attempts: this.state.attemptCount });
212
+ return {
213
+ success: false,
214
+ message: `Reached max attempts (${this.config.maxAttempts}). Goal may be partially complete.`,
215
+ };
216
+
217
+ } catch (error) {
218
+ this.state.isActive = false;
219
+ const message = error instanceof Error ? error.message : 'Unknown error';
220
+ this.emit('error', { error: message });
221
+ return { success: false, message };
222
+ }
223
+ }
224
+
225
+ /**
226
+ * Stop the agent
227
+ */
228
+ stop(): void {
229
+ this.state.isActive = false;
230
+ this.abortController?.abort();
231
+ this.emit('stopped', { attempts: this.state.attemptCount });
232
+ }
233
+
234
+ /**
235
+ * Pause the agent
236
+ */
237
+ pause(): void {
238
+ this.state.isPaused = true;
239
+ this.emit('paused');
240
+ }
241
+
242
+ /**
243
+ * Resume the agent
244
+ */
245
+ resume(): void {
246
+ this.state.isPaused = false;
247
+ this.emit('resumed');
248
+ }
249
+
250
+ /**
251
+ * Observe current screen state
252
+ */
253
+ private async observe(): Promise<{ description: string; screenshot: string } | null> {
254
+ try {
255
+ this.emit('observing');
256
+ const result = await describeScreen();
257
+ this.emit('observed', { description: result.description.slice(0, 200) });
258
+ return result;
259
+ } catch (error) {
260
+ this.emit('observe_error', { error });
261
+ return null;
262
+ }
263
+ }
264
+
265
+ /**
266
+ * Think about what action to take next
267
+ */
268
+ private async think(screenDescription: string): Promise<{ action: string; value: string; reasoning: string }> {
269
+ this.emit('thinking');
270
+
271
+ const prompt = this.buildThinkingPrompt(screenDescription);
272
+
273
+ try {
274
+ const response = await chat([{ role: 'user', content: prompt }]);
275
+ const decision = this.parseDecision(response.content);
276
+
277
+ this.emit('decided', decision);
278
+ return decision;
279
+ } catch (error) {
280
+ return { action: 'stuck', value: '', reasoning: 'Failed to get AI decision' };
281
+ }
282
+ }
283
+
284
+ private buildThinkingPrompt(screenDescription: string): string {
285
+ const recentActions = this.state.actionHistory.slice(-5)
286
+ .map(a => `- ${a.action}: ${a.value} (${a.result})`)
287
+ .join('\n');
288
+
289
+ return `GOAL: ${this.state.goal}
290
+
291
+ CURRENT SCREEN: ${screenDescription}
292
+
293
+ PREVIOUS ACTIONS:
294
+ ${recentActions || 'None yet'}
295
+
296
+ ATTEMPT: ${this.state.attemptCount}/${this.config.maxAttempts}
297
+ STUCK COUNT: ${this.state.stuckCount}
298
+
299
+ Based on what you see, what's the SINGLE next action to take?
300
+
301
+ Available actions:
302
+ - click: Click at current mouse position
303
+ - clickAt: Click at specific coordinates (VALUE: x,y)
304
+ - type: Type text (VALUE: text to type)
305
+ - press: Press a key (VALUE: Enter, Tab, Escape, etc.)
306
+ - keyCombo: Press key combination (VALUE: command+s, control+c, etc.)
307
+ - scroll: Scroll (VALUE: up or down)
308
+ - navigate: Open URL (VALUE: full URL)
309
+ - moveTo: Move mouse (VALUE: x,y coordinates)
310
+ - wait: Wait for something (VALUE: seconds)
311
+ - done: Goal is accomplished
312
+ - stuck: Can't figure out what to do
313
+
314
+ Respond EXACTLY in this format:
315
+ ACTION: <action_type>
316
+ VALUE: <parameter>
317
+ REASONING: <brief why>`;
318
+ }
319
+
320
+ private parseDecision(content: string): { action: string; value: string; reasoning: string } {
321
+ const actionMatch = content.match(/ACTION:\s*(\w+)/i);
322
+ const valueMatch = content.match(/VALUE:\s*(.+?)(?:\n|$)/i);
323
+ const reasoningMatch = content.match(/REASONING:\s*(.+?)(?:\n|$)/i);
324
+
325
+ return {
326
+ action: actionMatch?.[1]?.toLowerCase() || 'stuck',
327
+ value: valueMatch?.[1]?.trim() || '',
328
+ reasoning: reasoningMatch?.[1]?.trim() || 'No reasoning provided',
329
+ };
330
+ }
331
+
332
+ /**
333
+ * Execute an action
334
+ */
335
+ private async executeAction(action: string, value: string): Promise<{ success: boolean; error?: string }> {
336
+ const record: ActionRecord = {
337
+ timestamp: Date.now(),
338
+ action,
339
+ value,
340
+ result: 'pending',
341
+ screenBefore: this.state.lastScreenHash,
342
+ reasoning: '',
343
+ };
344
+
345
+ this.state.currentAction = `${action}: ${value}`;
346
+ this.emit('executing', { action, value });
347
+
348
+ try {
349
+ switch (action) {
350
+ case 'click':
351
+ await computer.clickMouse('left');
352
+ break;
353
+
354
+ case 'clickat':
355
+ case 'clickAt': {
356
+ const [x, y] = value.split(',').map(n => parseInt(n.trim()));
357
+ if (!isNaN(x) && !isNaN(y)) {
358
+ await computer.moveMouse(x, y);
359
+ await this.sleep(100);
360
+ await computer.clickMouse('left');
361
+ }
362
+ break;
363
+ }
364
+
365
+ case 'type':
366
+ if (this.config.humanLikeTiming) {
367
+ await this.typeHumanLike(value);
368
+ } else {
369
+ await computer.typeText(value);
370
+ }
371
+ break;
372
+
373
+ case 'press':
374
+ await computer.pressKey(value || 'Return');
375
+ break;
376
+
377
+ case 'keycombo':
378
+ case 'keyCombo': {
379
+ const keys = value.split('+').map(k => k.trim().toLowerCase());
380
+ await computer.keyCombo(keys);
381
+ break;
382
+ }
383
+
384
+ case 'scroll':
385
+ const amount = value.toLowerCase().includes('up') ? 3 : -3;
386
+ await computer.scrollMouse(amount);
387
+ break;
388
+
389
+ case 'navigate': {
390
+ const browser = await import('../services/browser.js');
391
+ const url = value.startsWith('http') ? value : `https://${value}`;
392
+ await browser.openUrl(url);
393
+ break;
394
+ }
395
+
396
+ case 'moveto':
397
+ case 'moveTo': {
398
+ const [mx, my] = value.split(',').map(n => parseInt(n.trim()));
399
+ if (!isNaN(mx) && !isNaN(my)) {
400
+ if (this.config.humanLikeTiming) {
401
+ await this.moveMouseSmooth(mx, my);
402
+ } else {
403
+ await computer.moveMouse(mx, my);
404
+ }
405
+ }
406
+ break;
407
+ }
408
+
409
+ case 'wait': {
410
+ const seconds = parseFloat(value) || 2;
411
+ await this.sleep(seconds * 1000);
412
+ break;
413
+ }
414
+
415
+ case 'done':
416
+ case 'stuck':
417
+ // These are handled in the main loop
418
+ break;
419
+
420
+ default:
421
+ record.result = 'failure';
422
+ this.state.actionHistory.push(record);
423
+ return { success: false, error: `Unknown action: ${action}` };
424
+ }
425
+
426
+ record.result = 'success';
427
+ this.state.actionHistory.push(record);
428
+ this.emit('executed', { action, value, success: true });
429
+ return { success: true };
430
+
431
+ } catch (error) {
432
+ const errorMsg = error instanceof Error ? error.message : 'Unknown error';
433
+ record.result = 'failure';
434
+ this.state.actionHistory.push(record);
435
+ this.emit('executed', { action, value, success: false, error: errorMsg });
436
+ return { success: false, error: errorMsg };
437
+ } finally {
438
+ this.state.currentAction = null;
439
+ }
440
+ }
441
+
442
+ /**
443
+ * Type text with human-like timing
444
+ */
445
+ private async typeHumanLike(text: string): Promise<void> {
446
+ const baseDelay = 50; // ~60 WPM
447
+
448
+ for (const char of text) {
449
+ await computer.typeText(char);
450
+
451
+ // Variable delay
452
+ const delay = baseDelay + (Math.random() * 30);
453
+ await this.sleep(delay);
454
+
455
+ // Occasional longer pause (simulating thinking)
456
+ if (Math.random() < 0.05) {
457
+ await this.sleep(200 + Math.random() * 300);
458
+ }
459
+ }
460
+ }
461
+
462
+ /**
463
+ * Move mouse smoothly (basic linear interpolation)
464
+ */
465
+ private async moveMouseSmooth(targetX: number, targetY: number): Promise<void> {
466
+ const currentPos = await computer.getMousePosition();
467
+ const match = currentPos.output.match(/(\d+),\s*(\d+)/);
468
+ if (!match) {
469
+ await computer.moveMouse(targetX, targetY);
470
+ return;
471
+ }
472
+
473
+ const startX = parseInt(match[1]);
474
+ const startY = parseInt(match[2]);
475
+
476
+ const steps = 10;
477
+ for (let i = 1; i <= steps; i++) {
478
+ const t = i / steps;
479
+ const x = Math.round(startX + (targetX - startX) * t);
480
+ const y = Math.round(startY + (targetY - startY) * t);
481
+ await computer.moveMouse(x, y);
482
+ await this.sleep(20);
483
+ }
484
+ }
485
+
486
+ private sleep(ms: number): Promise<void> {
487
+ return new Promise(resolve => setTimeout(resolve, ms));
488
+ }
489
+
490
+ /**
491
+ * Get current state
492
+ */
493
+ getState(): AgentState {
494
+ return { ...this.state };
495
+ }
496
+
497
+ /**
498
+ * Get action history
499
+ */
500
+ getHistory(): ActionRecord[] {
501
+ return [...this.state.actionHistory];
502
+ }
503
+ }
504
+
505
+ // Singleton instance
506
+ let agentInstance: AutonomousAgent | null = null;
507
+
508
+ export function getAutonomousAgent(config?: Partial<AgentConfig>): AutonomousAgent {
509
+ if (!agentInstance) {
510
+ agentInstance = new AutonomousAgent(config);
511
+ }
512
+ return agentInstance;
513
+ }
514
+
515
+ export default AutonomousAgent;