@projectservan8n/cnapse 0.2.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,483 @@
1
+ /**
2
+ * Task Automation - Multi-step task sequencing
3
+ * Parses natural language into actionable steps and executes them
4
+ * Uses chain-of-thought prompting + learning from past tasks
5
+ */
6
+
7
+ import { chat, Message } from './api.js';
8
+ import * as computer from '../tools/computer.js';
9
+ import { describeScreen } from './vision.js';
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ import * as os from 'os';
13
+
14
+ export type TaskStepStatus = 'pending' | 'running' | 'completed' | 'failed' | 'skipped';
15
+
16
+ // Task memory file location
17
+ const TASK_MEMORY_FILE = path.join(os.homedir(), '.cnapse', 'task-memory.json');
18
+
19
+ interface TaskPattern {
20
+ input: string;
21
+ normalizedInput: string;
22
+ steps: Array<{ description: string; action: string }>;
23
+ successCount: number;
24
+ lastUsed: string;
25
+ }
26
+
27
+ interface TaskMemory {
28
+ patterns: TaskPattern[];
29
+ version: number;
30
+ }
31
+
32
+ /**
33
+ * Load learned task patterns from disk
34
+ */
35
+ function loadTaskMemory(): TaskMemory {
36
+ try {
37
+ if (fs.existsSync(TASK_MEMORY_FILE)) {
38
+ const data = fs.readFileSync(TASK_MEMORY_FILE, 'utf-8');
39
+ return JSON.parse(data);
40
+ }
41
+ } catch {
42
+ // Ignore errors, return empty memory
43
+ }
44
+ return { patterns: [], version: 1 };
45
+ }
46
+
47
+ /**
48
+ * Save task pattern to memory
49
+ */
50
+ function saveTaskPattern(input: string, steps: Array<{ description: string; action: string }>): void {
51
+ try {
52
+ const memory = loadTaskMemory();
53
+ const normalized = normalizeInput(input);
54
+
55
+ // Find existing pattern or create new
56
+ const existing = memory.patterns.find(p => p.normalizedInput === normalized);
57
+ if (existing) {
58
+ existing.steps = steps;
59
+ existing.successCount++;
60
+ existing.lastUsed = new Date().toISOString();
61
+ } else {
62
+ memory.patterns.push({
63
+ input,
64
+ normalizedInput: normalized,
65
+ steps,
66
+ successCount: 1,
67
+ lastUsed: new Date().toISOString(),
68
+ });
69
+ }
70
+
71
+ // Keep only last 100 patterns
72
+ memory.patterns = memory.patterns
73
+ .sort((a, b) => b.successCount - a.successCount)
74
+ .slice(0, 100);
75
+
76
+ // Ensure directory exists
77
+ const dir = path.dirname(TASK_MEMORY_FILE);
78
+ if (!fs.existsSync(dir)) {
79
+ fs.mkdirSync(dir, { recursive: true });
80
+ }
81
+
82
+ fs.writeFileSync(TASK_MEMORY_FILE, JSON.stringify(memory, null, 2));
83
+ } catch {
84
+ // Ignore write errors
85
+ }
86
+ }
87
+
88
+ /**
89
+ * Normalize input for pattern matching
90
+ */
91
+ function normalizeInput(input: string): string {
92
+ return input
93
+ .toLowerCase()
94
+ .replace(/[^\w\s]/g, ' ')
95
+ .replace(/\s+/g, ' ')
96
+ .trim();
97
+ }
98
+
99
+ /**
100
+ * Find similar learned patterns
101
+ */
102
+ function findSimilarPatterns(input: string): TaskPattern[] {
103
+ const memory = loadTaskMemory();
104
+ const normalized = normalizeInput(input);
105
+ const words = normalized.split(' ').filter(w => w.length > 2);
106
+
107
+ return memory.patterns
108
+ .filter(pattern => {
109
+ // Check if patterns share key action words
110
+ const patternWords = pattern.normalizedInput.split(' ');
111
+ const matches = words.filter(w => patternWords.includes(w));
112
+ return matches.length >= Math.min(2, words.length * 0.5);
113
+ })
114
+ .sort((a, b) => b.successCount - a.successCount)
115
+ .slice(0, 3);
116
+ }
117
+
118
+ export interface TaskStep {
119
+ id: string;
120
+ description: string;
121
+ action: string; // The actual action to perform
122
+ status: TaskStepStatus;
123
+ result?: string;
124
+ error?: string;
125
+ }
126
+
127
+ export interface Task {
128
+ id: string;
129
+ description: string;
130
+ steps: TaskStep[];
131
+ status: 'pending' | 'running' | 'completed' | 'failed';
132
+ createdAt: Date;
133
+ completedAt?: Date;
134
+ }
135
+
136
+ export type TaskProgressCallback = (task: Task, step: TaskStep) => void;
137
+
138
+ /**
139
+ * Build chain-of-thought prompt for task parsing
140
+ * This guides small models through systematic reasoning
141
+ */
142
+ function buildChainOfThoughtPrompt(input: string): string {
143
+ // Find similar patterns the model has successfully executed before
144
+ const similarPatterns = findSimilarPatterns(input);
145
+
146
+ let learnedExamples = '';
147
+ if (similarPatterns.length > 0) {
148
+ learnedExamples = `
149
+ ## LEARNED PATTERNS (from successful past tasks)
150
+ These patterns worked before - use them as reference:
151
+
152
+ ${similarPatterns.map((p, i) => `
153
+ Pattern ${i + 1} (used ${p.successCount} times):
154
+ Input: "${p.input}"
155
+ Steps: ${JSON.stringify(p.steps, null, 2)}
156
+ `).join('\n')}
157
+ `;
158
+ }
159
+
160
+ return `You are a task parser for Windows PC automation. Your job is to convert natural language into precise, executable steps.
161
+
162
+ ## THINKING PROCESS
163
+ Before outputting steps, THINK through these questions:
164
+
165
+ 1. **WHAT** is the main goal?
166
+ - What application needs to open?
167
+ - What action needs to happen inside it?
168
+ - What is the expected end result?
169
+
170
+ 2. **HOW** to achieve it on Windows?
171
+ - Use Win+R (meta+r) to open Run dialog for apps
172
+ - Wait 1-3 seconds after opening apps for them to load
173
+ - Use keyboard shortcuts when possible (faster, more reliable)
174
+ - Common shortcuts: Ctrl+S (save), Ctrl+O (open), Ctrl+N (new), Alt+F4 (close)
175
+
176
+ 3. **SEQUENCE** - what order makes sense?
177
+ - Open app FIRST
178
+ - WAIT for it to load
179
+ - THEN interact with it
180
+ - Add waits between actions that need time
181
+
182
+ 4. **EDGE CASES** - what could go wrong?
183
+ - App might already be open -> focus_window first
184
+ - Dialogs might appear -> handle or dismiss them
185
+ - Typing too fast -> add small waits
186
+
187
+ ## AVAILABLE ACTIONS
188
+ - open_app: Open app via Run dialog (e.g., "open_app:notepad", "open_app:code", "open_app:chrome")
189
+ - type_text: Type text string (e.g., "type_text:Hello World")
190
+ - press_key: Single key (e.g., "press_key:enter", "press_key:escape", "press_key:tab")
191
+ - key_combo: Key combination (e.g., "key_combo:control+s", "key_combo:alt+f4", "key_combo:meta+r")
192
+ - click: Mouse click (e.g., "click:left", "click:right")
193
+ - wait: Wait N seconds (e.g., "wait:2" - use 1-3s for app loads)
194
+ - focus_window: Focus by title (e.g., "focus_window:Notepad")
195
+ - screenshot: Capture and describe screen
196
+ ${learnedExamples}
197
+ ## EXAMPLES WITH REASONING
198
+
199
+ ### Example 1: "open notepad and type hello"
200
+ Thinking:
201
+ - Goal: Open Notepad, then type text into it
202
+ - How: Win+R -> notepad -> Enter to open, then type
203
+ - Sequence: Open -> Wait for load -> Type
204
+ - Edge case: Need wait time for Notepad window to be ready
205
+
206
+ Output:
207
+ [
208
+ { "description": "Open Notepad via Run dialog", "action": "open_app:notepad" },
209
+ { "description": "Wait for Notepad to fully load", "action": "wait:2" },
210
+ { "description": "Type the greeting text", "action": "type_text:hello" }
211
+ ]
212
+
213
+ ### Example 2: "save the current document"
214
+ Thinking:
215
+ - Goal: Save whatever is in the current app
216
+ - How: Ctrl+S is universal save shortcut
217
+ - Sequence: Just the key combo, maybe wait for save
218
+ - Edge case: If file is new, Save As dialog might appear
219
+
220
+ Output:
221
+ [
222
+ { "description": "Press Ctrl+S to save", "action": "key_combo:control+s" },
223
+ { "description": "Wait for save to complete", "action": "wait:1" }
224
+ ]
225
+
226
+ ### Example 3: "close this window"
227
+ Thinking:
228
+ - Goal: Close the current active window
229
+ - How: Alt+F4 closes active window on Windows
230
+ - Sequence: Single action
231
+ - Edge case: Might prompt to save - user handles that
232
+
233
+ Output:
234
+ [
235
+ { "description": "Close active window with Alt+F4", "action": "key_combo:alt+f4" }
236
+ ]
237
+
238
+ ## YOUR TASK
239
+ Now parse this request: "${input}"
240
+
241
+ First, briefly think through the 4 questions above, then output ONLY a JSON array:
242
+ [
243
+ { "description": "Human readable step", "action": "action_type:params" },
244
+ ...
245
+ ]`;
246
+ }
247
+
248
+ /**
249
+ * Parse natural language task into executable steps
250
+ */
251
+ export async function parseTask(input: string): Promise<Task> {
252
+ const systemPrompt = buildChainOfThoughtPrompt(input);
253
+
254
+ const messages: Message[] = [
255
+ { role: 'user', content: input }
256
+ ];
257
+
258
+ try {
259
+ const response = await chat(messages, systemPrompt);
260
+ const content = response.content || '[]';
261
+
262
+ // Extract JSON from response
263
+ const jsonMatch = content.match(/\[[\s\S]*\]/);
264
+ if (!jsonMatch) {
265
+ throw new Error('Failed to parse task steps');
266
+ }
267
+
268
+ const parsedSteps = JSON.parse(jsonMatch[0]) as Array<{ description: string; action: string }>;
269
+
270
+ const steps: TaskStep[] = parsedSteps.map((step, index) => ({
271
+ id: `step-${index + 1}`,
272
+ description: step.description,
273
+ action: step.action,
274
+ status: 'pending' as TaskStepStatus,
275
+ }));
276
+
277
+ return {
278
+ id: `task-${Date.now()}`,
279
+ description: input,
280
+ steps,
281
+ status: 'pending',
282
+ createdAt: new Date(),
283
+ };
284
+ } catch (error) {
285
+ // If AI parsing fails, try to create a simple task
286
+ return {
287
+ id: `task-${Date.now()}`,
288
+ description: input,
289
+ steps: [{
290
+ id: 'step-1',
291
+ description: input,
292
+ action: `chat:${input}`,
293
+ status: 'pending',
294
+ }],
295
+ status: 'pending',
296
+ createdAt: new Date(),
297
+ };
298
+ }
299
+ }
300
+
301
+ /**
302
+ * Execute a single task step
303
+ */
304
+ async function executeStep(step: TaskStep): Promise<void> {
305
+ const [actionType, ...paramParts] = step.action.split(':');
306
+ const params = paramParts.join(':'); // Rejoin in case params contain ':'
307
+
308
+ switch (actionType) {
309
+ case 'open_app':
310
+ // Use Windows Run dialog to open apps
311
+ await computer.keyCombo(['meta', 'r']);
312
+ await sleep(500);
313
+ await computer.typeText(params);
314
+ await sleep(300);
315
+ await computer.pressKey('Return');
316
+ step.result = `Opened ${params}`;
317
+ break;
318
+
319
+ case 'type_text':
320
+ await computer.typeText(params);
321
+ step.result = `Typed: ${params}`;
322
+ break;
323
+
324
+ case 'press_key':
325
+ await computer.pressKey(params);
326
+ step.result = `Pressed ${params}`;
327
+ break;
328
+
329
+ case 'key_combo':
330
+ const keys = params.split('+').map(k => k.trim());
331
+ await computer.keyCombo(keys);
332
+ step.result = `Pressed ${params}`;
333
+ break;
334
+
335
+ case 'click':
336
+ const button = (params || 'left') as 'left' | 'right' | 'middle';
337
+ await computer.clickMouse(button);
338
+ step.result = `Clicked ${button}`;
339
+ break;
340
+
341
+ case 'wait':
342
+ const seconds = parseInt(params) || 1;
343
+ await sleep(seconds * 1000);
344
+ step.result = `Waited ${seconds}s`;
345
+ break;
346
+
347
+ case 'focus_window':
348
+ await computer.focusWindow(params);
349
+ step.result = `Focused window: ${params}`;
350
+ break;
351
+
352
+ case 'screenshot':
353
+ const vision = await describeScreen();
354
+ step.result = vision.description;
355
+ break;
356
+
357
+ case 'chat':
358
+ // This is a fallback - just describe what user wants
359
+ step.result = `Task noted: ${params}`;
360
+ break;
361
+
362
+ default:
363
+ throw new Error(`Unknown action: ${actionType}`);
364
+ }
365
+ }
366
+
367
+ /**
368
+ * Execute a complete task with progress callbacks
369
+ */
370
+ export async function executeTask(
371
+ task: Task,
372
+ onProgress?: TaskProgressCallback
373
+ ): Promise<Task> {
374
+ task.status = 'running';
375
+
376
+ for (const step of task.steps) {
377
+ if (task.status === 'failed') {
378
+ step.status = 'skipped';
379
+ continue;
380
+ }
381
+
382
+ step.status = 'running';
383
+ onProgress?.(task, step);
384
+
385
+ try {
386
+ await executeStep(step);
387
+ step.status = 'completed';
388
+ } catch (error) {
389
+ step.status = 'failed';
390
+ step.error = error instanceof Error ? error.message : 'Unknown error';
391
+ task.status = 'failed';
392
+ }
393
+
394
+ onProgress?.(task, step);
395
+ }
396
+
397
+ if (task.status !== 'failed') {
398
+ task.status = 'completed';
399
+
400
+ // Learn from successful tasks - save pattern for future use
401
+ const steps = task.steps.map(s => ({
402
+ description: s.description,
403
+ action: s.action,
404
+ }));
405
+ saveTaskPattern(task.description, steps);
406
+ }
407
+ task.completedAt = new Date();
408
+
409
+ return task;
410
+ }
411
+
412
+ /**
413
+ * Helper sleep function
414
+ */
415
+ function sleep(ms: number): Promise<void> {
416
+ return new Promise(resolve => setTimeout(resolve, ms));
417
+ }
418
+
419
+ /**
420
+ * Get task memory statistics
421
+ */
422
+ export function getTaskMemoryStats(): { patternCount: number; totalUses: number; topPatterns: string[] } {
423
+ const memory = loadTaskMemory();
424
+ const totalUses = memory.patterns.reduce((sum, p) => sum + p.successCount, 0);
425
+ const topPatterns = memory.patterns
426
+ .sort((a, b) => b.successCount - a.successCount)
427
+ .slice(0, 5)
428
+ .map(p => `"${p.input}" (${p.successCount}x)`);
429
+
430
+ return {
431
+ patternCount: memory.patterns.length,
432
+ totalUses,
433
+ topPatterns,
434
+ };
435
+ }
436
+
437
+ /**
438
+ * Clear task memory
439
+ */
440
+ export function clearTaskMemory(): void {
441
+ try {
442
+ if (fs.existsSync(TASK_MEMORY_FILE)) {
443
+ fs.unlinkSync(TASK_MEMORY_FILE);
444
+ }
445
+ } catch {
446
+ // Ignore errors
447
+ }
448
+ }
449
+
450
+ /**
451
+ * Format task for display
452
+ */
453
+ export function formatTask(task: Task): string {
454
+ const statusEmoji = {
455
+ pending: '⏳',
456
+ running: '🔄',
457
+ completed: '✅',
458
+ failed: '❌',
459
+ };
460
+
461
+ const stepStatusEmoji = {
462
+ pending: '○',
463
+ running: '◐',
464
+ completed: '●',
465
+ failed: '✗',
466
+ skipped: '◌',
467
+ };
468
+
469
+ let output = `${statusEmoji[task.status]} Task: ${task.description}\n\n`;
470
+
471
+ for (const step of task.steps) {
472
+ output += ` ${stepStatusEmoji[step.status]} ${step.description}`;
473
+ if (step.result) {
474
+ output += ` → ${step.result}`;
475
+ }
476
+ if (step.error) {
477
+ output += ` (Error: ${step.error})`;
478
+ }
479
+ output += '\n';
480
+ }
481
+
482
+ return output;
483
+ }