pikakit 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,508 @@
1
+ /**
2
+ * AutoLearn v6.0 - A/B Testing Engine
3
+ *
4
+ * Compares patterns to determine which is more effective.
5
+ * Uses statistical significance to select winners.
6
+ *
7
+ * Key concepts:
8
+ * - Split Traffic: 50/50 allocation between patterns
9
+ * - Track Outcomes: Success rate per pattern
10
+ * - Statistical Significance: Chi-square test
11
+ * - Winner Selection: Auto-select when significant
12
+ *
13
+ * @version 6.0.0
14
+ * @author PikaKit
15
+ */
16
+
17
+ import fs from 'fs';
18
+ import path from 'path';
19
+ import { recordABTestEvent } from './metrics-collector.js';
20
+ import { applyReinforcement, REINFORCEMENT_RULES } from './reinforcement.js';
21
+
22
+ // ============================================================================
23
+ // CONFIGURATION
24
+ // ============================================================================
25
+
26
+ const KNOWLEDGE_DIR = path.join(process.cwd(), '.agent', 'knowledge');
27
+ const AB_TESTS_FILE = path.join(KNOWLEDGE_DIR, 'ab-tests.json');
28
+
29
+ // Minimum samples before we can determine winner
30
+ const MIN_SAMPLES_PER_VARIANT = 10;
31
+
32
+ // Confidence level for statistical significance (95%)
33
+ const SIGNIFICANCE_LEVEL = 0.95;
34
+
35
+ // Default test duration (7 days)
36
+ const DEFAULT_TEST_DURATION_MS = 7 * 24 * 60 * 60 * 1000;
37
+
38
+ // ============================================================================
39
+ // A/B TEST DATA STRUCTURE
40
+ // ============================================================================
41
+
42
+ /**
43
+ * @typedef {Object} ABTest
44
+ * @property {string} id - Test ID
45
+ * @property {string} status - 'pending' | 'running' | 'completed' | 'cancelled'
46
+ * @property {Object} patternA - Pattern A details
47
+ * @property {Object} patternB - Pattern B details (or baseline)
48
+ * @property {number} allocation - Traffic split (0.5 = 50/50)
49
+ * @property {Object} metrics - Success metrics per variant
50
+ * @property {Object} result - Test result when completed
51
+ */
52
+
53
+ // ============================================================================
54
+ // TEST MANAGEMENT
55
+ // ============================================================================
56
+
57
+ /**
58
+ * Create a new A/B test
59
+ * @param {Object} patternA - First pattern
60
+ * @param {Object} patternB - Second pattern (or null for baseline)
61
+ * @param {Object} options - Test options
62
+ * @returns {Object} - Created test
63
+ */
64
+ export function createABTest(patternA, patternB = null, options = {}) {
65
+ const test = {
66
+ id: `AB-${Date.now()}`,
67
+ createdAt: new Date().toISOString(),
68
+ startedAt: null,
69
+ endedAt: null,
70
+ status: 'pending',
71
+
72
+ // Patterns
73
+ patternA: {
74
+ id: patternA.id,
75
+ confidence: patternA.confidence,
76
+ name: patternA.name || patternA.id
77
+ },
78
+ patternB: patternB ? {
79
+ id: patternB.id,
80
+ confidence: patternB.confidence,
81
+ name: patternB.name || patternB.id
82
+ } : {
83
+ id: 'baseline',
84
+ confidence: null,
85
+ name: 'No pattern (baseline)'
86
+ },
87
+
88
+ // Configuration
89
+ allocation: options.allocation || 0.5,
90
+ minSamples: options.minSamples || MIN_SAMPLES_PER_VARIANT,
91
+ maxDuration: options.maxDuration || DEFAULT_TEST_DURATION_MS,
92
+
93
+ // Metrics
94
+ metrics: {
95
+ patternA: { applied: 0, success: 0, failure: 0, totalTime: 0 },
96
+ patternB: { applied: 0, success: 0, failure: 0, totalTime: 0 }
97
+ },
98
+
99
+ // Result (filled when completed)
100
+ result: null
101
+ };
102
+
103
+ saveABTest(test);
104
+ return test;
105
+ }
106
+
107
+ /**
108
+ * Start an A/B test
109
+ * @param {string} testId - Test ID
110
+ * @returns {Object} - Updated test
111
+ */
112
+ export function startABTest(testId) {
113
+ const test = loadABTest(testId);
114
+ if (!test) return null;
115
+
116
+ test.status = 'running';
117
+ test.startedAt = new Date().toISOString();
118
+
119
+ saveABTest(test);
120
+ return test;
121
+ }
122
+
123
+ /**
124
+ * Get which variant to use for a task
125
+ * @param {string} testId - Test ID
126
+ * @returns {string} - 'patternA' | 'patternB'
127
+ */
128
+ export function getVariantForTask(testId) {
129
+ const test = loadABTest(testId);
130
+ if (!test || test.status !== 'running') {
131
+ return null;
132
+ }
133
+
134
+ // Simple random allocation
135
+ return Math.random() < test.allocation ? 'patternA' : 'patternB';
136
+ }
137
+
138
+ /**
139
+ * Record outcome for an A/B test
140
+ * @param {string} testId - Test ID
141
+ * @param {string} variant - 'patternA' | 'patternB'
142
+ * @param {Object} outcome - Task outcome
143
+ */
144
+ export function recordABOutcome(testId, variant, outcome) {
145
+ const test = loadABTest(testId);
146
+ if (!test || test.status !== 'running') return null;
147
+
148
+ const metrics = test.metrics[variant];
149
+ if (!metrics) return null;
150
+
151
+ metrics.applied++;
152
+
153
+ if (outcome.success) {
154
+ metrics.success++;
155
+ } else {
156
+ metrics.failure++;
157
+ }
158
+
159
+ if (outcome.duration) {
160
+ metrics.totalTime += outcome.duration;
161
+ }
162
+
163
+ // Check if test should complete
164
+ const shouldComplete = checkTestCompletion(test);
165
+ if (shouldComplete.complete) {
166
+ completeABTest(testId, shouldComplete.reason);
167
+ } else {
168
+ saveABTest(test);
169
+ }
170
+
171
+ return test;
172
+ }
173
+
174
+ // ============================================================================
175
+ // STATISTICAL ANALYSIS
176
+ // ============================================================================
177
+
178
+ /**
179
+ * Calculate success rate for a variant
180
+ * @param {Object} metrics - Variant metrics
181
+ * @returns {number} - Success rate 0.0 to 1.0
182
+ */
183
+ function calculateSuccessRate(metrics) {
184
+ if (metrics.applied === 0) return 0;
185
+ return metrics.success / metrics.applied;
186
+ }
187
+
188
+ /**
189
+ * Calculate chi-square statistic for A/B comparison
190
+ * @param {Object} metricsA - Pattern A metrics
191
+ * @param {Object} metricsB - Pattern B metrics
192
+ * @returns {Object} - Chi-square result
193
+ */
194
+ function calculateChiSquare(metricsA, metricsB) {
195
+ const totalA = metricsA.success + metricsA.failure;
196
+ const totalB = metricsB.success + metricsB.failure;
197
+ const total = totalA + totalB;
198
+
199
+ if (total === 0) return { chiSquare: 0, significant: false };
200
+
201
+ const successTotal = metricsA.success + metricsB.success;
202
+ const failureTotal = metricsA.failure + metricsB.failure;
203
+
204
+ // Expected values
205
+ const expectedASuccess = (totalA * successTotal) / total;
206
+ const expectedAFailure = (totalA * failureTotal) / total;
207
+ const expectedBSuccess = (totalB * successTotal) / total;
208
+ const expectedBFailure = (totalB * failureTotal) / total;
209
+
210
+ // Chi-square calculation
211
+ let chiSquare = 0;
212
+
213
+ if (expectedASuccess > 0) {
214
+ chiSquare += Math.pow(metricsA.success - expectedASuccess, 2) / expectedASuccess;
215
+ }
216
+ if (expectedAFailure > 0) {
217
+ chiSquare += Math.pow(metricsA.failure - expectedAFailure, 2) / expectedAFailure;
218
+ }
219
+ if (expectedBSuccess > 0) {
220
+ chiSquare += Math.pow(metricsB.success - expectedBSuccess, 2) / expectedBSuccess;
221
+ }
222
+ if (expectedBFailure > 0) {
223
+ chiSquare += Math.pow(metricsB.failure - expectedBFailure, 2) / expectedBFailure;
224
+ }
225
+
226
+ // Critical value for 95% confidence, 1 degree of freedom
227
+ const criticalValue = 3.841;
228
+ const significant = chiSquare > criticalValue;
229
+
230
+ return {
231
+ chiSquare,
232
+ criticalValue,
233
+ significant,
234
+ confidence: significant ? 0.95 : chiSquare / criticalValue * 0.95
235
+ };
236
+ }
237
+
238
+ /**
239
+ * Analyze A/B test results
240
+ * @param {Object} test - A/B test object
241
+ * @returns {Object} - Analysis result
242
+ */
243
+ export function analyzeABTest(test) {
244
+ const metricsA = test.metrics.patternA;
245
+ const metricsB = test.metrics.patternB;
246
+
247
+ const rateA = calculateSuccessRate(metricsA);
248
+ const rateB = calculateSuccessRate(metricsB);
249
+
250
+ const chiSquareResult = calculateChiSquare(metricsA, metricsB);
251
+
252
+ const avgTimeA = metricsA.applied > 0 ? metricsA.totalTime / metricsA.applied : 0;
253
+ const avgTimeB = metricsB.applied > 0 ? metricsB.totalTime / metricsB.applied : 0;
254
+
255
+ // Determine winner
256
+ let winner = null;
257
+ let winnerReason = '';
258
+ let margin = 0;
259
+
260
+ if (chiSquareResult.significant) {
261
+ if (rateA > rateB) {
262
+ winner = 'patternA';
263
+ margin = rateA - rateB;
264
+ winnerReason = `Higher success rate by ${(margin * 100).toFixed(1)}%`;
265
+ } else if (rateB > rateA) {
266
+ winner = 'patternB';
267
+ margin = rateB - rateA;
268
+ winnerReason = `Higher success rate by ${(margin * 100).toFixed(1)}%`;
269
+ }
270
+ } else {
271
+ winnerReason = 'No statistically significant difference';
272
+ }
273
+
274
+ return {
275
+ patternA: {
276
+ id: test.patternA.id,
277
+ samples: metricsA.applied,
278
+ successRate: rateA,
279
+ avgTime: avgTimeA
280
+ },
281
+ patternB: {
282
+ id: test.patternB.id,
283
+ samples: metricsB.applied,
284
+ successRate: rateB,
285
+ avgTime: avgTimeB
286
+ },
287
+ statistics: chiSquareResult,
288
+ winner,
289
+ winnerReason,
290
+ margin,
291
+ analyzedAt: new Date().toISOString()
292
+ };
293
+ }
294
+
295
+ // ============================================================================
296
+ // TEST COMPLETION
297
+ // ============================================================================
298
+
299
+ /**
300
+ * Check if test should be completed
301
+ * @param {Object} test - A/B test
302
+ * @returns {Object} - { complete: boolean, reason: string }
303
+ */
304
+ function checkTestCompletion(test) {
305
+ const metricsA = test.metrics.patternA;
306
+ const metricsB = test.metrics.patternB;
307
+
308
+ // Check minimum samples
309
+ if (metricsA.applied >= test.minSamples && metricsB.applied >= test.minSamples) {
310
+ const analysis = analyzeABTest(test);
311
+
312
+ if (analysis.statistics.significant) {
313
+ return {
314
+ complete: true,
315
+ reason: 'Statistical significance reached'
316
+ };
317
+ }
318
+ }
319
+
320
+ // Check max duration
321
+ if (test.startedAt) {
322
+ const duration = Date.now() - new Date(test.startedAt).getTime();
323
+ if (duration > test.maxDuration) {
324
+ return {
325
+ complete: true,
326
+ reason: 'Max duration reached'
327
+ };
328
+ }
329
+ }
330
+
331
+ // Check if one variant is clearly better (early stopping)
332
+ const totalSamples = metricsA.applied + metricsB.applied;
333
+ if (totalSamples >= 20) {
334
+ const rateA = calculateSuccessRate(metricsA);
335
+ const rateB = calculateSuccessRate(metricsB);
336
+ const diff = Math.abs(rateA - rateB);
337
+
338
+ // Early stop if difference > 30%
339
+ if (diff > 0.3) {
340
+ return {
341
+ complete: true,
342
+ reason: 'Clear winner detected (early stopping)'
343
+ };
344
+ }
345
+ }
346
+
347
+ return { complete: false };
348
+ }
349
+
350
+ /**
351
+ * Complete an A/B test and apply results
352
+ * @param {string} testId - Test ID
353
+ * @param {string} reason - Completion reason
354
+ * @returns {Object} - Completed test with results
355
+ */
356
+ export function completeABTest(testId, reason) {
357
+ const test = loadABTest(testId);
358
+ if (!test) return null;
359
+
360
+ const analysis = analyzeABTest(test);
361
+
362
+ test.status = 'completed';
363
+ test.endedAt = new Date().toISOString();
364
+ test.result = {
365
+ ...analysis,
366
+ completionReason: reason
367
+ };
368
+
369
+ saveABTest(test);
370
+
371
+ // Record for metrics
372
+ recordABTestEvent({
373
+ hasWinner: !!analysis.winner
374
+ });
375
+
376
+ // Apply reinforcement to winner/loser
377
+ if (analysis.winner) {
378
+ applyABTestReinforcement(test, analysis);
379
+ }
380
+
381
+ return test;
382
+ }
383
+
384
+ /**
385
+ * Apply reinforcement based on A/B test results
386
+ * @param {Object} test - Completed test
387
+ * @param {Object} analysis - Test analysis
388
+ */
389
+ function applyABTestReinforcement(test, analysis) {
390
+ // Winner gets reward proportional to margin
391
+ const winnerReward = Math.min(0.15, analysis.margin * 0.5);
392
+
393
+ // Loser gets penalty
394
+ const loserPenalty = -Math.min(0.10, analysis.margin * 0.3);
395
+
396
+ console.log(`A/B Test ${test.id} completed:`);
397
+ console.log(` Winner: ${analysis.winner} (+${winnerReward.toFixed(2)} confidence)`);
398
+ console.log(` Reason: ${analysis.winnerReason}`);
399
+
400
+ // Note: Actual pattern updates would be done by caller
401
+ // This just logs the recommended adjustments
402
+ return {
403
+ winnerId: analysis.winner === 'patternA' ? test.patternA.id : test.patternB.id,
404
+ loserId: analysis.winner === 'patternA' ? test.patternB.id : test.patternA.id,
405
+ winnerReward,
406
+ loserPenalty
407
+ };
408
+ }
409
+
410
+ // ============================================================================
411
+ // STORAGE
412
+ // ============================================================================
413
+
414
+ /**
415
+ * Load A/B test from disk
416
+ * @param {string} testId - Test ID
417
+ * @returns {Object|null} - Test or null
418
+ */
419
+ export function loadABTest(testId) {
420
+ const tests = loadAllABTests();
421
+ return tests.find(t => t.id === testId) || null;
422
+ }
423
+
424
+ /**
425
+ * Load all A/B tests
426
+ * @returns {Array} - All tests
427
+ */
428
+ export function loadAllABTests() {
429
+ try {
430
+ if (!fs.existsSync(AB_TESTS_FILE)) return [];
431
+ return JSON.parse(fs.readFileSync(AB_TESTS_FILE, 'utf8'));
432
+ } catch {
433
+ return [];
434
+ }
435
+ }
436
+
437
+ /**
438
+ * Save A/B test
439
+ * @param {Object} test - Test to save
440
+ */
441
+ export function saveABTest(test) {
442
+ try {
443
+ if (!fs.existsSync(KNOWLEDGE_DIR)) {
444
+ fs.mkdirSync(KNOWLEDGE_DIR, { recursive: true });
445
+ }
446
+
447
+ const tests = loadAllABTests();
448
+ const existingIndex = tests.findIndex(t => t.id === test.id);
449
+
450
+ if (existingIndex >= 0) {
451
+ tests[existingIndex] = test;
452
+ } else {
453
+ tests.push(test);
454
+ }
455
+
456
+ fs.writeFileSync(AB_TESTS_FILE, JSON.stringify(tests, null, 2), 'utf8');
457
+ } catch (error) {
458
+ console.error('Error saving A/B test:', error.message);
459
+ }
460
+ }
461
+
462
+ /**
463
+ * Get active A/B tests
464
+ * @returns {Array} - Running tests
465
+ */
466
+ export function getActiveTests() {
467
+ return loadAllABTests().filter(t => t.status === 'running');
468
+ }
469
+
470
+ /**
471
+ * Get A/B test statistics
472
+ * @returns {Object} - Statistics
473
+ */
474
+ export function getABTestStats() {
475
+ const tests = loadAllABTests();
476
+
477
+ const completed = tests.filter(t => t.status === 'completed');
478
+ const withWinner = completed.filter(t => t.result?.winner);
479
+
480
+ return {
481
+ total: tests.length,
482
+ running: tests.filter(t => t.status === 'running').length,
483
+ completed: completed.length,
484
+ withWinner: withWinner.length,
485
+ winRate: completed.length > 0 ? withWinner.length / completed.length : 0,
486
+ pending: tests.filter(t => t.status === 'pending').length
487
+ };
488
+ }
489
+
490
+ // ============================================================================
491
+ // EXPORTS
492
+ // ============================================================================
493
+
494
+ export default {
495
+ createABTest,
496
+ startABTest,
497
+ getVariantForTask,
498
+ recordABOutcome,
499
+ analyzeABTest,
500
+ completeABTest,
501
+ loadABTest,
502
+ loadAllABTests,
503
+ saveABTest,
504
+ getActiveTests,
505
+ getABTestStats,
506
+ MIN_SAMPLES_PER_VARIANT,
507
+ SIGNIFICANCE_LEVEL
508
+ };