pikakit 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,610 @@
1
+ /**
2
+ * AutoLearn v6.0 - Reinforcement Loop
3
+ *
4
+ * Implements reinforcement learning for pattern confidence adjustment.
5
+ * Rewards effective patterns, penalizes ineffective ones, prunes failures.
6
+ *
7
+ * Key concepts:
8
+ * - Reward: Increase confidence when pattern helps
9
+ * - Penalty: Decrease confidence when pattern fails
10
+ * - Prune: Remove patterns below MIN_CONFIDENCE
11
+ * - Quarantine: A/B test patterns in danger zone
12
+ *
13
+ * @version 6.0.0
14
+ * @author PikaKit
15
+ */
16
+
17
+ import fs from 'fs';
18
+ import path from 'path';
19
+ import { recordPatternEvent, recordSkillEvent } from './metrics-collector.js';
20
+
21
+ // ============================================================================
22
+ // CONFIGURATION
23
+ // ============================================================================
24
+
25
+ const KNOWLEDGE_DIR = path.join(process.cwd(), '.agent', 'knowledge');
26
+ const REINFORCEMENT_LOG = path.join(KNOWLEDGE_DIR, 'reinforcement-log.json');
27
+
28
+ // ============================================================================
29
+ // REWARD/PENALTY RULES
30
+ // ============================================================================
31
+
32
+ export const REINFORCEMENT_RULES = {
33
+ // ─────────────────────────────────────────────────────────────────────────
34
+ // REWARD SIGNALS (Increase confidence)
35
+ // ─────────────────────────────────────────────────────────────────────────
36
+
37
+ TASK_SUCCESS_WITH_SKILL: {
38
+ id: 'task_success_with_skill',
39
+ delta: +0.15,
40
+ reason: 'Skill helped task succeed',
41
+ category: 'reward'
42
+ },
43
+
44
+ ERROR_PREVENTED: {
45
+ id: 'error_prevented',
46
+ delta: +0.20,
47
+ reason: 'Pattern prevented known error',
48
+ category: 'reward'
49
+ },
50
+
51
+ TIME_SAVED: {
52
+ id: 'time_saved',
53
+ delta: +0.10,
54
+ reason: 'Task completed faster than average',
55
+ category: 'reward'
56
+ },
57
+
58
+ USER_CONFIRMED_HELPFUL: {
59
+ id: 'user_confirmed_helpful',
60
+ delta: +0.25,
61
+ reason: 'User explicitly confirmed skill was helpful',
62
+ category: 'reward'
63
+ },
64
+
65
+ FIRST_TIME_SUCCESS: {
66
+ id: 'first_time_success',
67
+ delta: +0.12,
68
+ reason: 'Task succeeded on first attempt with skill',
69
+ category: 'reward'
70
+ },
71
+
72
+ // ─────────────────────────────────────────────────────────────────────────
73
+ // PENALTY SIGNALS (Decrease confidence)
74
+ // ─────────────────────────────────────────────────────────────────────────
75
+
76
+ SKILL_IGNORED: {
77
+ id: 'skill_ignored',
78
+ delta: -0.05,
79
+ reason: 'Skill loaded but not applied',
80
+ category: 'penalty'
81
+ },
82
+
83
+ FALSE_POSITIVE: {
84
+ id: 'false_positive',
85
+ delta: -0.15,
86
+ reason: 'Pattern triggered incorrectly',
87
+ category: 'penalty'
88
+ },
89
+
90
+ TASK_FAILED_WITH_SKILL: {
91
+ id: 'task_failed_with_skill',
92
+ delta: -0.20,
93
+ reason: 'Skill did not prevent failure',
94
+ category: 'penalty'
95
+ },
96
+
97
+ USER_REJECTED: {
98
+ id: 'user_rejected',
99
+ delta: -0.25,
100
+ reason: 'User explicitly rejected suggestion',
101
+ category: 'penalty'
102
+ },
103
+
104
+ PATTERN_OUTDATED: {
105
+ id: 'pattern_outdated',
106
+ delta: -0.10,
107
+ reason: 'Pattern has not matched in 30+ days',
108
+ category: 'penalty'
109
+ },
110
+
111
+ // ─────────────────────────────────────────────────────────────────────────
112
+ // THRESHOLD ACTIONS
113
+ // ─────────────────────────────────────────────────────────────────────────
114
+
115
+ THRESHOLDS: {
116
+ MIN_CONFIDENCE: 0.30, // Below this = auto-prune
117
+ QUARANTINE: 0.40, // Below this = A/B test required
118
+ STABLE: 0.70, // Above this = considered stable
119
+ PROMOTE: 0.85, // Above this = promote to core skill
120
+ MAX_CONFIDENCE: 0.99 // Cap confidence
121
+ }
122
+ };
123
+
124
+ // ============================================================================
125
+ // REINFORCEMENT STATE
126
+ // ============================================================================
127
+
128
+ /**
129
+ * Get reinforcement state for a pattern/skill
130
+ * @param {number} confidence - Current confidence
131
+ * @returns {string} - State: 'prune' | 'quarantine' | 'learning' | 'stable' | 'promote'
132
+ */
133
+ export function getReinforcementState(confidence) {
134
+ const T = REINFORCEMENT_RULES.THRESHOLDS;
135
+
136
+ if (confidence < T.MIN_CONFIDENCE) return 'prune';
137
+ if (confidence < T.QUARANTINE) return 'quarantine';
138
+ if (confidence < T.STABLE) return 'learning';
139
+ if (confidence < T.PROMOTE) return 'stable';
140
+ return 'promote';
141
+ }
142
+
143
+ /**
144
+ * Get action recommendation based on state
145
+ * @param {string} state - Reinforcement state
146
+ * @returns {Object} - Action recommendation
147
+ */
148
+ export function getStateAction(state) {
149
+ const actions = {
150
+ prune: {
151
+ action: 'remove',
152
+ message: 'Pattern confidence too low, should be removed',
153
+ urgent: true
154
+ },
155
+ quarantine: {
156
+ action: 'ab_test',
157
+ message: 'Pattern needs A/B testing to validate effectiveness',
158
+ urgent: false
159
+ },
160
+ learning: {
161
+ action: 'continue',
162
+ message: 'Pattern is learning, gathering more evidence',
163
+ urgent: false
164
+ },
165
+ stable: {
166
+ action: 'maintain',
167
+ message: 'Pattern is stable and effective',
168
+ urgent: false
169
+ },
170
+ promote: {
171
+ action: 'promote_to_skill',
172
+ message: 'Pattern ready to be promoted to core skill',
173
+ urgent: false
174
+ }
175
+ };
176
+
177
+ return actions[state] || actions.learning;
178
+ }
179
+
180
+ // ============================================================================
181
+ // CONFIDENCE ADJUSTMENT
182
+ // ============================================================================
183
+
184
+ /**
185
+ * Apply reinforcement signal to a pattern
186
+ * @param {Object} pattern - Pattern object with confidence
187
+ * @param {string} signalId - Signal ID from REINFORCEMENT_RULES
188
+ * @param {Object} context - Additional context
189
+ * @returns {Object} - Updated pattern with adjustment details
190
+ */
191
+ export function applyReinforcement(pattern, signalId, context = {}) {
192
+ const rule = Object.values(REINFORCEMENT_RULES).find(r => r.id === signalId);
193
+
194
+ if (!rule || rule.id === 'THRESHOLDS') {
195
+ return { pattern, applied: false, error: 'Unknown signal' };
196
+ }
197
+
198
+ const T = REINFORCEMENT_RULES.THRESHOLDS;
199
+ const oldConfidence = pattern.confidence || 0.5;
200
+
201
+ // Apply delta
202
+ let newConfidence = oldConfidence + rule.delta;
203
+
204
+ // Clamp to valid range
205
+ newConfidence = Math.max(T.MIN_CONFIDENCE - 0.1, Math.min(newConfidence, T.MAX_CONFIDENCE));
206
+
207
+ // Get old and new states
208
+ const oldState = getReinforcementState(oldConfidence);
209
+ const newState = getReinforcementState(newConfidence);
210
+ const stateChanged = oldState !== newState;
211
+
212
+ // Create reinforcement event
213
+ const event = {
214
+ patternId: pattern.id,
215
+ signalId: rule.id,
216
+ category: rule.category,
217
+ reason: rule.reason,
218
+ oldConfidence,
219
+ newConfidence,
220
+ delta: rule.delta,
221
+ oldState,
222
+ newState,
223
+ stateChanged,
224
+ context,
225
+ timestamp: new Date().toISOString()
226
+ };
227
+
228
+ // Log reinforcement
229
+ logReinforcement(event);
230
+
231
+ // Record for metrics
232
+ recordPatternEvent({
233
+ type: rule.category === 'reward' ? 'true_positive' : 'false_positive',
234
+ confidence: newConfidence,
235
+ newPattern: false
236
+ });
237
+
238
+ // Update pattern
239
+ pattern.confidence = newConfidence;
240
+ pattern.lastReinforcement = event.timestamp;
241
+ pattern.reinforcementHistory = pattern.reinforcementHistory || [];
242
+ pattern.reinforcementHistory.push({
243
+ signal: rule.id,
244
+ delta: rule.delta,
245
+ timestamp: event.timestamp
246
+ });
247
+
248
+ // Keep only last 20 reinforcement events
249
+ if (pattern.reinforcementHistory.length > 20) {
250
+ pattern.reinforcementHistory = pattern.reinforcementHistory.slice(-20);
251
+ }
252
+
253
+ return {
254
+ pattern,
255
+ applied: true,
256
+ event,
257
+ action: getStateAction(newState)
258
+ };
259
+ }
260
+
261
+ /**
262
+ * Batch apply rewards for successful task
263
+ * @param {Object} pattern - Pattern object
264
+ * @param {Object} taskOutcome - Task outcome details
265
+ * @returns {Object} - Updated pattern
266
+ */
267
+ export function rewardSuccess(pattern, taskOutcome) {
268
+ let result = { pattern, applied: false };
269
+
270
+ // Apply all applicable rewards
271
+ if (taskOutcome.success && taskOutcome.skillApplied) {
272
+ result = applyReinforcement(result.pattern, 'task_success_with_skill', taskOutcome);
273
+ }
274
+
275
+ if (taskOutcome.errorPrevented) {
276
+ result = applyReinforcement(result.pattern, 'error_prevented', taskOutcome);
277
+ }
278
+
279
+ if (taskOutcome.fasterThanAverage) {
280
+ result = applyReinforcement(result.pattern, 'time_saved', taskOutcome);
281
+ }
282
+
283
+ if (taskOutcome.firstAttempt && taskOutcome.success) {
284
+ result = applyReinforcement(result.pattern, 'first_time_success', taskOutcome);
285
+ }
286
+
287
+ return result;
288
+ }
289
+
290
+ /**
291
+ * Batch apply penalties for failed task
292
+ * @param {Object} pattern - Pattern object
293
+ * @param {Object} taskOutcome - Task outcome details
294
+ * @returns {Object} - Updated pattern
295
+ */
296
+ export function penalizeFailure(pattern, taskOutcome) {
297
+ let result = { pattern, applied: false };
298
+
299
+ if (!taskOutcome.success && taskOutcome.skillApplied) {
300
+ result = applyReinforcement(result.pattern, 'task_failed_with_skill', taskOutcome);
301
+ }
302
+
303
+ if (taskOutcome.falsePositive) {
304
+ result = applyReinforcement(result.pattern, 'false_positive', taskOutcome);
305
+ }
306
+
307
+ if (taskOutcome.skillIgnored) {
308
+ result = applyReinforcement(result.pattern, 'skill_ignored', taskOutcome);
309
+ }
310
+
311
+ return result;
312
+ }
313
+
314
+ // ============================================================================
315
+ // PRUNING
316
+ // ============================================================================
317
+
318
+ /**
319
+ * Check if pattern should be pruned
320
+ * @param {Object} pattern - Pattern to check
321
+ * @returns {Object} - Prune decision
322
+ */
323
+ export function shouldPrune(pattern) {
324
+ const state = getReinforcementState(pattern.confidence);
325
+
326
+ if (state === 'prune') {
327
+ return {
328
+ shouldPrune: true,
329
+ reason: `Confidence ${pattern.confidence.toFixed(3)} below threshold ${REINFORCEMENT_RULES.THRESHOLDS.MIN_CONFIDENCE}`,
330
+ confidence: pattern.confidence
331
+ };
332
+ }
333
+
334
+ // Check for stale patterns (no hits in 30 days)
335
+ if (pattern.lastHit) {
336
+ const lastHitDate = new Date(pattern.lastHit);
337
+ const daysSinceHit = (Date.now() - lastHitDate.getTime()) / (1000 * 60 * 60 * 24);
338
+
339
+ if (daysSinceHit > 30 && pattern.confidence < REINFORCEMENT_RULES.THRESHOLDS.STABLE) {
340
+ return {
341
+ shouldPrune: true,
342
+ reason: `Pattern stale (${Math.floor(daysSinceHit)} days since last hit)`,
343
+ confidence: pattern.confidence
344
+ };
345
+ }
346
+ }
347
+
348
+ return { shouldPrune: false };
349
+ }
350
+
351
+ /**
352
+ * Prune low-confidence patterns from a collection
353
+ * @param {Array} patterns - Array of patterns
354
+ * @returns {Object} - { kept: Array, pruned: Array }
355
+ */
356
+ export function prunePatterns(patterns) {
357
+ const kept = [];
358
+ const pruned = [];
359
+
360
+ for (const pattern of patterns) {
361
+ const decision = shouldPrune(pattern);
362
+
363
+ if (decision.shouldPrune) {
364
+ pruned.push({
365
+ ...pattern,
366
+ prunedAt: new Date().toISOString(),
367
+ pruneReason: decision.reason
368
+ });
369
+
370
+ // Record for metrics
371
+ recordSkillEvent({ type: 'pruned' });
372
+ } else {
373
+ kept.push(pattern);
374
+ }
375
+ }
376
+
377
+ return { kept, pruned };
378
+ }
379
+
380
+ // ============================================================================
381
+ // PROMOTION
382
+ // ============================================================================
383
+
384
+ /**
385
+ * Check if pattern should be promoted to skill
386
+ * @param {Object} pattern - Pattern to check
387
+ * @returns {Object} - Promotion decision
388
+ */
389
+ export function shouldPromote(pattern) {
390
+ const state = getReinforcementState(pattern.confidence);
391
+
392
+ if (state !== 'promote') {
393
+ return { shouldPromote: false, reason: `State is ${state}, not promote` };
394
+ }
395
+
396
+ // Require minimum evidence
397
+ const evidenceCount = pattern.evidence?.length || pattern.hitCount || 0;
398
+ if (evidenceCount < 5) {
399
+ return {
400
+ shouldPromote: false,
401
+ reason: `Insufficient evidence (${evidenceCount}/5 required)`
402
+ };
403
+ }
404
+
405
+ // Require consistent positive reinforcement
406
+ const recentReinforcements = pattern.reinforcementHistory?.slice(-5) || [];
407
+ const positiveCount = recentReinforcements.filter(r => r.delta > 0).length;
408
+
409
+ if (positiveCount < 3) {
410
+ return {
411
+ shouldPromote: false,
412
+ reason: `Insufficient positive reinforcement (${positiveCount}/3 required in last 5)`
413
+ };
414
+ }
415
+
416
+ return {
417
+ shouldPromote: true,
418
+ reason: 'Pattern meets all promotion criteria',
419
+ confidence: pattern.confidence,
420
+ evidenceCount,
421
+ positiveReinforcements: positiveCount
422
+ };
423
+ }
424
+
425
+ // ============================================================================
426
+ // QUARANTINE & A/B TESTING
427
+ // ============================================================================
428
+
429
+ /**
430
+ * Check if pattern should be quarantined for A/B testing
431
+ * @param {Object} pattern - Pattern to check
432
+ * @returns {Object} - Quarantine decision
433
+ */
434
+ export function shouldQuarantine(pattern) {
435
+ const state = getReinforcementState(pattern.confidence);
436
+
437
+ if (state === 'quarantine') {
438
+ return {
439
+ shouldQuarantine: true,
440
+ reason: `Confidence ${pattern.confidence.toFixed(3)} in quarantine zone`,
441
+ suggestedTest: 'Compare against alternative pattern or baseline'
442
+ };
443
+ }
444
+
445
+ return { shouldQuarantine: false };
446
+ }
447
+
448
+ /**
449
+ * Queue pattern for A/B testing
450
+ * @param {Object} pattern - Pattern to test
451
+ * @param {Object} competitor - Alternative pattern (optional)
452
+ * @returns {Object} - A/B test configuration
453
+ */
454
+ export function queueForABTest(pattern, competitor = null) {
455
+ const abTest = {
456
+ id: `AB-${Date.now()}`,
457
+ createdAt: new Date().toISOString(),
458
+ status: 'pending',
459
+ patternA: {
460
+ id: pattern.id,
461
+ confidence: pattern.confidence
462
+ },
463
+ patternB: competitor ? {
464
+ id: competitor.id,
465
+ confidence: competitor.confidence
466
+ } : {
467
+ id: 'baseline',
468
+ confidence: null
469
+ },
470
+ allocation: 0.5, // 50% split
471
+ metrics: {
472
+ patternA: { applied: 0, success: 0 },
473
+ patternB: { applied: 0, success: 0 }
474
+ },
475
+ minSamples: 10,
476
+ maxDuration: 7 * 24 * 60 * 60 * 1000 // 7 days
477
+ };
478
+
479
+ // Save to A/B test queue
480
+ saveABTest(abTest);
481
+
482
+ return abTest;
483
+ }
484
+
485
+ // ============================================================================
486
+ // LOGGING
487
+ // ============================================================================
488
+
489
+ /**
490
+ * Log reinforcement event
491
+ * @param {Object} event - Reinforcement event
492
+ */
493
+ function logReinforcement(event) {
494
+ try {
495
+ if (!fs.existsSync(KNOWLEDGE_DIR)) {
496
+ fs.mkdirSync(KNOWLEDGE_DIR, { recursive: true });
497
+ }
498
+
499
+ let log = [];
500
+ if (fs.existsSync(REINFORCEMENT_LOG)) {
501
+ log = JSON.parse(fs.readFileSync(REINFORCEMENT_LOG, 'utf8'));
502
+ }
503
+
504
+ log.push(event);
505
+
506
+ // Keep only last 1000 events
507
+ if (log.length > 1000) {
508
+ log = log.slice(-1000);
509
+ }
510
+
511
+ fs.writeFileSync(REINFORCEMENT_LOG, JSON.stringify(log, null, 2), 'utf8');
512
+ } catch (error) {
513
+ console.error('Error logging reinforcement:', error.message);
514
+ }
515
+ }
516
+
517
+ /**
518
+ * Get reinforcement statistics
519
+ * @returns {Object} - Statistics
520
+ */
521
+ export function getReinforcementStats() {
522
+ try {
523
+ if (!fs.existsSync(REINFORCEMENT_LOG)) {
524
+ return { total: 0, rewards: 0, penalties: 0 };
525
+ }
526
+
527
+ const log = JSON.parse(fs.readFileSync(REINFORCEMENT_LOG, 'utf8'));
528
+
529
+ const rewards = log.filter(e => e.category === 'reward').length;
530
+ const penalties = log.filter(e => e.category === 'penalty').length;
531
+
532
+ // Calculate average confidence change
533
+ const deltas = log.map(e => e.delta);
534
+ const avgDelta = deltas.reduce((a, b) => a + b, 0) / deltas.length;
535
+
536
+ // State transitions
537
+ const stateChanges = log.filter(e => e.stateChanged).length;
538
+
539
+ return {
540
+ total: log.length,
541
+ rewards,
542
+ penalties,
543
+ rewardRatio: rewards / (rewards + penalties) || 0,
544
+ avgDelta,
545
+ stateChanges,
546
+ lastEvent: log[log.length - 1]?.timestamp || null
547
+ };
548
+ } catch (error) {
549
+ return { total: 0, rewards: 0, penalties: 0, error: error.message };
550
+ }
551
+ }
552
+
553
+ // ============================================================================
554
+ // A/B TEST STORAGE
555
+ // ============================================================================
556
+
557
+ const AB_TESTS_FILE = path.join(KNOWLEDGE_DIR, 'ab-tests.json');
558
+
559
+ function saveABTest(test) {
560
+ try {
561
+ if (!fs.existsSync(KNOWLEDGE_DIR)) {
562
+ fs.mkdirSync(KNOWLEDGE_DIR, { recursive: true });
563
+ }
564
+
565
+ let tests = [];
566
+ if (fs.existsSync(AB_TESTS_FILE)) {
567
+ tests = JSON.parse(fs.readFileSync(AB_TESTS_FILE, 'utf8'));
568
+ }
569
+
570
+ const existingIndex = tests.findIndex(t => t.id === test.id);
571
+ if (existingIndex >= 0) {
572
+ tests[existingIndex] = test;
573
+ } else {
574
+ tests.push(test);
575
+ }
576
+
577
+ fs.writeFileSync(AB_TESTS_FILE, JSON.stringify(tests, null, 2), 'utf8');
578
+ } catch (error) {
579
+ console.error('Error saving A/B test:', error.message);
580
+ }
581
+ }
582
+
583
+ export function loadABTests() {
584
+ try {
585
+ if (!fs.existsSync(AB_TESTS_FILE)) return [];
586
+ return JSON.parse(fs.readFileSync(AB_TESTS_FILE, 'utf8'));
587
+ } catch {
588
+ return [];
589
+ }
590
+ }
591
+
592
+ // ============================================================================
593
+ // EXPORTS
594
+ // ============================================================================
595
+
596
+ export default {
597
+ REINFORCEMENT_RULES,
598
+ getReinforcementState,
599
+ getStateAction,
600
+ applyReinforcement,
601
+ rewardSuccess,
602
+ penalizeFailure,
603
+ shouldPrune,
604
+ prunePatterns,
605
+ shouldPromote,
606
+ shouldQuarantine,
607
+ queueForABTest,
608
+ getReinforcementStats,
609
+ loadABTests
610
+ };