pikakit 1.0.7 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -6
- package/bin/kit.mjs +6 -2
- package/bin/lib/commands/help.js +13 -8
- package/bin/lib/config.js +4 -2
- package/lib/agent-cli/lib/ab-testing.js +508 -0
- package/lib/agent-cli/lib/causality-engine.js +623 -0
- package/lib/agent-cli/lib/dashboard-data.js +365 -0
- package/lib/agent-cli/lib/fix.js +1 -1
- package/lib/agent-cli/lib/metrics-collector.js +523 -0
- package/lib/agent-cli/lib/metrics-schema.js +410 -0
- package/lib/agent-cli/lib/precision-skill-generator.js +584 -0
- package/lib/agent-cli/lib/recall.js +1 -1
- package/lib/agent-cli/lib/reinforcement.js +610 -0
- package/lib/agent-cli/lib/ui/index.js +37 -14
- package/package.json +4 -2
- package/lib/agent-cli/lib/auto-learn.js +0 -319
- package/lib/agent-cli/scripts/adaptive_engine.js +0 -381
- package/lib/agent-cli/scripts/error_sensor.js +0 -565
- package/lib/agent-cli/scripts/learn_from_failure.js +0 -225
- package/lib/agent-cli/scripts/pattern_analyzer.js +0 -781
- package/lib/agent-cli/scripts/skill_injector.js +0 -387
- package/lib/agent-cli/scripts/success_sensor.js +0 -500
- package/lib/agent-cli/scripts/user_correction_sensor.js +0 -426
- package/lib/agent-cli/services/auto-learn-service.js +0 -247
package/README.md
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
## ⚡ Installation
|
|
13
13
|
|
|
14
14
|
```bash
|
|
15
|
-
npx pikakit
|
|
15
|
+
npx pikakit
|
|
16
16
|
```
|
|
17
17
|
|
|
18
18
|
**That's it!** One command installs everything:
|
|
@@ -34,6 +34,10 @@ npx pikakit add pikakit/agent-skills
|
|
|
34
34
|
### 1. Add Skills from GitHub
|
|
35
35
|
|
|
36
36
|
```bash
|
|
37
|
+
# Recommended: One command install
|
|
38
|
+
npx pikakit
|
|
39
|
+
|
|
40
|
+
# Or specify a repo:
|
|
37
41
|
npx pikakit add <owner>/<repo>
|
|
38
42
|
|
|
39
43
|
# Examples:
|
|
@@ -126,19 +130,19 @@ agent watch # Real-time monitor
|
|
|
126
130
|
### Install Official Skills
|
|
127
131
|
|
|
128
132
|
```bash
|
|
129
|
-
npx pikakit add pikakit
|
|
133
|
+
npx pikakit add pikakit-agent-skills
|
|
130
134
|
```
|
|
131
135
|
|
|
132
136
|
### Install to Global Location
|
|
133
137
|
|
|
134
138
|
```bash
|
|
135
|
-
npx pikakit add pikakit
|
|
139
|
+
npx pikakit add pikakit-agent-skills --global
|
|
136
140
|
```
|
|
137
141
|
|
|
138
142
|
### Force Reinstall
|
|
139
143
|
|
|
140
144
|
```bash
|
|
141
|
-
npx pikakit add pikakit
|
|
145
|
+
npx pikakit add pikakit-agent-skills --force
|
|
142
146
|
```
|
|
143
147
|
|
|
144
148
|
---
|
|
@@ -193,7 +197,7 @@ npm install # Node.js dependencies
|
|
|
193
197
|
|
|
194
198
|
| Package | Purpose |
|
|
195
199
|
|---------|---------|
|
|
196
|
-
| [agent-skills](https://
|
|
200
|
+
| [pikakit-agent-skills](https://www.npmjs.com/package/pikakit-agent-skills) | Main skills repository |
|
|
197
201
|
| [pikakit](https://www.npmjs.com/package/pikakit) | This CLI installer |
|
|
198
202
|
|
|
199
203
|
---
|
|
@@ -220,7 +224,7 @@ cd pikakit
|
|
|
220
224
|
npm install
|
|
221
225
|
|
|
222
226
|
# Run locally
|
|
223
|
-
node bin/cli.mjs add pikakit
|
|
227
|
+
node bin/cli.mjs add pikakit-agent-skills
|
|
224
228
|
```
|
|
225
229
|
|
|
226
230
|
---
|
package/bin/kit.mjs
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
3
|
* Install Agent Skill CLI
|
|
4
4
|
* @description Package manager for AI Agent Skills
|
|
5
5
|
*/
|
|
6
6
|
import { c, brandedIntro } from "./lib/ui.js";
|
|
7
|
-
import { command, params, VERSION } from "./lib/config.js";
|
|
7
|
+
import { command, params, VERSION, DEFAULT_REPO } from "./lib/config.js";
|
|
8
8
|
|
|
9
9
|
// --- Command Registry ---
|
|
10
10
|
const COMMANDS = {
|
|
@@ -67,6 +67,10 @@ async function main() {
|
|
|
67
67
|
if (found) {
|
|
68
68
|
const cmdModule = await import(found.config.module);
|
|
69
69
|
await cmdModule.run(found.config.hasParam ? params[0] : undefined);
|
|
70
|
+
} else if (command === "" || command === undefined) {
|
|
71
|
+
// NEW: No command = default install from pikakit/agent-skills
|
|
72
|
+
const cmdModule = await import("./lib/commands/install.js");
|
|
73
|
+
await cmdModule.run(DEFAULT_REPO);
|
|
70
74
|
} else if (command.includes("/")) {
|
|
71
75
|
// Direct install via org/repo syntax
|
|
72
76
|
const cmdModule = await import("./lib/commands/install.js");
|
package/bin/lib/commands/help.js
CHANGED
|
@@ -137,19 +137,24 @@ function showQuickStart() {
|
|
|
137
137
|
step(c.bold("Quick Start Guide"), S.diamondFilled, "cyan");
|
|
138
138
|
stepLine();
|
|
139
139
|
|
|
140
|
-
step(c.bold("1. Install
|
|
141
|
-
step(" " + c.cyan("
|
|
140
|
+
step(c.bold("1. Install PikaKit (Recommended)"));
|
|
141
|
+
step(" " + c.cyan("npx pikakit"));
|
|
142
|
+
step(" " + c.dim("→ Installs all skills from pikakit/agent-skills"));
|
|
142
143
|
stepLine();
|
|
143
144
|
|
|
144
|
-
step(c.bold("2.
|
|
145
|
-
step(" " + c.
|
|
146
|
-
step(" " + c.dim("
|
|
145
|
+
step(c.bold("2. Or install from specific repo"));
|
|
146
|
+
step(" " + c.cyan("npx pikakit add <org/repo>"));
|
|
147
|
+
step(" " + c.dim("Example: npx pikakit add pikakit/agent-skills"));
|
|
147
148
|
stepLine();
|
|
148
149
|
|
|
149
|
-
step(c.bold("3.
|
|
150
|
-
step(" " + c.cyan("
|
|
150
|
+
step(c.bold("3. Initialize directory only"));
|
|
151
|
+
step(" " + c.cyan("npx pikakit init"));
|
|
151
152
|
stepLine();
|
|
152
153
|
|
|
153
|
-
step(c.bold("4.
|
|
154
|
+
step(c.bold("4. Check installation"));
|
|
155
|
+
step(" " + c.cyan("npx pikakit doctor"));
|
|
156
|
+
stepLine();
|
|
157
|
+
|
|
158
|
+
step(c.bold("5. Use in your AI"));
|
|
154
159
|
step(" " + c.dim("Skills are now available in .agent/skills/"));
|
|
155
160
|
}
|
package/bin/lib/config.js
CHANGED
|
@@ -30,8 +30,8 @@ export const BACKUP_DIR = path.join(CACHE_ROOT, "backups");
|
|
|
30
30
|
|
|
31
31
|
const args = process.argv.slice(2);
|
|
32
32
|
|
|
33
|
-
/** Command name (first non-flag argument) */
|
|
34
|
-
export const command = args[0] || "
|
|
33
|
+
/** Command name (first non-flag argument, empty string if none) */
|
|
34
|
+
export const command = args[0] || "";
|
|
35
35
|
|
|
36
36
|
/** All flags (starting with --) */
|
|
37
37
|
export const flags = new Set(args.filter((a) => a.startsWith("--")));
|
|
@@ -79,3 +79,5 @@ export const VERSION = (() => {
|
|
|
79
79
|
catch { return "1.2.0"; }
|
|
80
80
|
})();
|
|
81
81
|
|
|
82
|
+
/** Default skills repository for npx pikakit shorthand */
|
|
83
|
+
export const DEFAULT_REPO = "pikakit/agent-skills";
|
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AutoLearn v6.0 - A/B Testing Engine
|
|
3
|
+
*
|
|
4
|
+
* Compares patterns to determine which is more effective.
|
|
5
|
+
* Uses statistical significance to select winners.
|
|
6
|
+
*
|
|
7
|
+
* Key concepts:
|
|
8
|
+
* - Split Traffic: 50/50 allocation between patterns
|
|
9
|
+
* - Track Outcomes: Success rate per pattern
|
|
10
|
+
* - Statistical Significance: Chi-square test
|
|
11
|
+
* - Winner Selection: Auto-select when significant
|
|
12
|
+
*
|
|
13
|
+
* @version 6.0.0
|
|
14
|
+
* @author PikaKit
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import fs from 'fs';
|
|
18
|
+
import path from 'path';
|
|
19
|
+
import { recordABTestEvent } from './metrics-collector.js';
|
|
20
|
+
import { applyReinforcement, REINFORCEMENT_RULES } from './reinforcement.js';
|
|
21
|
+
|
|
22
|
+
// ============================================================================
|
|
23
|
+
// CONFIGURATION
|
|
24
|
+
// ============================================================================
|
|
25
|
+
|
|
26
|
+
const KNOWLEDGE_DIR = path.join(process.cwd(), '.agent', 'knowledge');
|
|
27
|
+
const AB_TESTS_FILE = path.join(KNOWLEDGE_DIR, 'ab-tests.json');
|
|
28
|
+
|
|
29
|
+
// Minimum samples before we can determine winner
|
|
30
|
+
const MIN_SAMPLES_PER_VARIANT = 10;
|
|
31
|
+
|
|
32
|
+
// Confidence level for statistical significance (95%)
|
|
33
|
+
const SIGNIFICANCE_LEVEL = 0.95;
|
|
34
|
+
|
|
35
|
+
// Default test duration (7 days)
|
|
36
|
+
const DEFAULT_TEST_DURATION_MS = 7 * 24 * 60 * 60 * 1000;
|
|
37
|
+
|
|
38
|
+
// ============================================================================
|
|
39
|
+
// A/B TEST DATA STRUCTURE
|
|
40
|
+
// ============================================================================
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* @typedef {Object} ABTest
|
|
44
|
+
* @property {string} id - Test ID
|
|
45
|
+
* @property {string} status - 'pending' | 'running' | 'completed' | 'cancelled'
|
|
46
|
+
* @property {Object} patternA - Pattern A details
|
|
47
|
+
* @property {Object} patternB - Pattern B details (or baseline)
|
|
48
|
+
* @property {number} allocation - Traffic split (0.5 = 50/50)
|
|
49
|
+
* @property {Object} metrics - Success metrics per variant
|
|
50
|
+
* @property {Object} result - Test result when completed
|
|
51
|
+
*/
|
|
52
|
+
|
|
53
|
+
// ============================================================================
|
|
54
|
+
// TEST MANAGEMENT
|
|
55
|
+
// ============================================================================
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Create a new A/B test
|
|
59
|
+
* @param {Object} patternA - First pattern
|
|
60
|
+
* @param {Object} patternB - Second pattern (or null for baseline)
|
|
61
|
+
* @param {Object} options - Test options
|
|
62
|
+
* @returns {Object} - Created test
|
|
63
|
+
*/
|
|
64
|
+
export function createABTest(patternA, patternB = null, options = {}) {
|
|
65
|
+
const test = {
|
|
66
|
+
id: `AB-${Date.now()}`,
|
|
67
|
+
createdAt: new Date().toISOString(),
|
|
68
|
+
startedAt: null,
|
|
69
|
+
endedAt: null,
|
|
70
|
+
status: 'pending',
|
|
71
|
+
|
|
72
|
+
// Patterns
|
|
73
|
+
patternA: {
|
|
74
|
+
id: patternA.id,
|
|
75
|
+
confidence: patternA.confidence,
|
|
76
|
+
name: patternA.name || patternA.id
|
|
77
|
+
},
|
|
78
|
+
patternB: patternB ? {
|
|
79
|
+
id: patternB.id,
|
|
80
|
+
confidence: patternB.confidence,
|
|
81
|
+
name: patternB.name || patternB.id
|
|
82
|
+
} : {
|
|
83
|
+
id: 'baseline',
|
|
84
|
+
confidence: null,
|
|
85
|
+
name: 'No pattern (baseline)'
|
|
86
|
+
},
|
|
87
|
+
|
|
88
|
+
// Configuration
|
|
89
|
+
allocation: options.allocation || 0.5,
|
|
90
|
+
minSamples: options.minSamples || MIN_SAMPLES_PER_VARIANT,
|
|
91
|
+
maxDuration: options.maxDuration || DEFAULT_TEST_DURATION_MS,
|
|
92
|
+
|
|
93
|
+
// Metrics
|
|
94
|
+
metrics: {
|
|
95
|
+
patternA: { applied: 0, success: 0, failure: 0, totalTime: 0 },
|
|
96
|
+
patternB: { applied: 0, success: 0, failure: 0, totalTime: 0 }
|
|
97
|
+
},
|
|
98
|
+
|
|
99
|
+
// Result (filled when completed)
|
|
100
|
+
result: null
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
saveABTest(test);
|
|
104
|
+
return test;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Start an A/B test
|
|
109
|
+
* @param {string} testId - Test ID
|
|
110
|
+
* @returns {Object} - Updated test
|
|
111
|
+
*/
|
|
112
|
+
export function startABTest(testId) {
|
|
113
|
+
const test = loadABTest(testId);
|
|
114
|
+
if (!test) return null;
|
|
115
|
+
|
|
116
|
+
test.status = 'running';
|
|
117
|
+
test.startedAt = new Date().toISOString();
|
|
118
|
+
|
|
119
|
+
saveABTest(test);
|
|
120
|
+
return test;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Get which variant to use for a task
|
|
125
|
+
* @param {string} testId - Test ID
|
|
126
|
+
* @returns {string} - 'patternA' | 'patternB'
|
|
127
|
+
*/
|
|
128
|
+
export function getVariantForTask(testId) {
|
|
129
|
+
const test = loadABTest(testId);
|
|
130
|
+
if (!test || test.status !== 'running') {
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Simple random allocation
|
|
135
|
+
return Math.random() < test.allocation ? 'patternA' : 'patternB';
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Record outcome for an A/B test
|
|
140
|
+
* @param {string} testId - Test ID
|
|
141
|
+
* @param {string} variant - 'patternA' | 'patternB'
|
|
142
|
+
* @param {Object} outcome - Task outcome
|
|
143
|
+
*/
|
|
144
|
+
export function recordABOutcome(testId, variant, outcome) {
|
|
145
|
+
const test = loadABTest(testId);
|
|
146
|
+
if (!test || test.status !== 'running') return null;
|
|
147
|
+
|
|
148
|
+
const metrics = test.metrics[variant];
|
|
149
|
+
if (!metrics) return null;
|
|
150
|
+
|
|
151
|
+
metrics.applied++;
|
|
152
|
+
|
|
153
|
+
if (outcome.success) {
|
|
154
|
+
metrics.success++;
|
|
155
|
+
} else {
|
|
156
|
+
metrics.failure++;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (outcome.duration) {
|
|
160
|
+
metrics.totalTime += outcome.duration;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Check if test should complete
|
|
164
|
+
const shouldComplete = checkTestCompletion(test);
|
|
165
|
+
if (shouldComplete.complete) {
|
|
166
|
+
completeABTest(testId, shouldComplete.reason);
|
|
167
|
+
} else {
|
|
168
|
+
saveABTest(test);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return test;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// ============================================================================
|
|
175
|
+
// STATISTICAL ANALYSIS
|
|
176
|
+
// ============================================================================
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Calculate success rate for a variant
|
|
180
|
+
* @param {Object} metrics - Variant metrics
|
|
181
|
+
* @returns {number} - Success rate 0.0 to 1.0
|
|
182
|
+
*/
|
|
183
|
+
function calculateSuccessRate(metrics) {
|
|
184
|
+
if (metrics.applied === 0) return 0;
|
|
185
|
+
return metrics.success / metrics.applied;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Calculate chi-square statistic for A/B comparison
|
|
190
|
+
* @param {Object} metricsA - Pattern A metrics
|
|
191
|
+
* @param {Object} metricsB - Pattern B metrics
|
|
192
|
+
* @returns {Object} - Chi-square result
|
|
193
|
+
*/
|
|
194
|
+
function calculateChiSquare(metricsA, metricsB) {
|
|
195
|
+
const totalA = metricsA.success + metricsA.failure;
|
|
196
|
+
const totalB = metricsB.success + metricsB.failure;
|
|
197
|
+
const total = totalA + totalB;
|
|
198
|
+
|
|
199
|
+
if (total === 0) return { chiSquare: 0, significant: false };
|
|
200
|
+
|
|
201
|
+
const successTotal = metricsA.success + metricsB.success;
|
|
202
|
+
const failureTotal = metricsA.failure + metricsB.failure;
|
|
203
|
+
|
|
204
|
+
// Expected values
|
|
205
|
+
const expectedASuccess = (totalA * successTotal) / total;
|
|
206
|
+
const expectedAFailure = (totalA * failureTotal) / total;
|
|
207
|
+
const expectedBSuccess = (totalB * successTotal) / total;
|
|
208
|
+
const expectedBFailure = (totalB * failureTotal) / total;
|
|
209
|
+
|
|
210
|
+
// Chi-square calculation
|
|
211
|
+
let chiSquare = 0;
|
|
212
|
+
|
|
213
|
+
if (expectedASuccess > 0) {
|
|
214
|
+
chiSquare += Math.pow(metricsA.success - expectedASuccess, 2) / expectedASuccess;
|
|
215
|
+
}
|
|
216
|
+
if (expectedAFailure > 0) {
|
|
217
|
+
chiSquare += Math.pow(metricsA.failure - expectedAFailure, 2) / expectedAFailure;
|
|
218
|
+
}
|
|
219
|
+
if (expectedBSuccess > 0) {
|
|
220
|
+
chiSquare += Math.pow(metricsB.success - expectedBSuccess, 2) / expectedBSuccess;
|
|
221
|
+
}
|
|
222
|
+
if (expectedBFailure > 0) {
|
|
223
|
+
chiSquare += Math.pow(metricsB.failure - expectedBFailure, 2) / expectedBFailure;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Critical value for 95% confidence, 1 degree of freedom
|
|
227
|
+
const criticalValue = 3.841;
|
|
228
|
+
const significant = chiSquare > criticalValue;
|
|
229
|
+
|
|
230
|
+
return {
|
|
231
|
+
chiSquare,
|
|
232
|
+
criticalValue,
|
|
233
|
+
significant,
|
|
234
|
+
confidence: significant ? 0.95 : chiSquare / criticalValue * 0.95
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Analyze A/B test results
|
|
240
|
+
* @param {Object} test - A/B test object
|
|
241
|
+
* @returns {Object} - Analysis result
|
|
242
|
+
*/
|
|
243
|
+
export function analyzeABTest(test) {
|
|
244
|
+
const metricsA = test.metrics.patternA;
|
|
245
|
+
const metricsB = test.metrics.patternB;
|
|
246
|
+
|
|
247
|
+
const rateA = calculateSuccessRate(metricsA);
|
|
248
|
+
const rateB = calculateSuccessRate(metricsB);
|
|
249
|
+
|
|
250
|
+
const chiSquareResult = calculateChiSquare(metricsA, metricsB);
|
|
251
|
+
|
|
252
|
+
const avgTimeA = metricsA.applied > 0 ? metricsA.totalTime / metricsA.applied : 0;
|
|
253
|
+
const avgTimeB = metricsB.applied > 0 ? metricsB.totalTime / metricsB.applied : 0;
|
|
254
|
+
|
|
255
|
+
// Determine winner
|
|
256
|
+
let winner = null;
|
|
257
|
+
let winnerReason = '';
|
|
258
|
+
let margin = 0;
|
|
259
|
+
|
|
260
|
+
if (chiSquareResult.significant) {
|
|
261
|
+
if (rateA > rateB) {
|
|
262
|
+
winner = 'patternA';
|
|
263
|
+
margin = rateA - rateB;
|
|
264
|
+
winnerReason = `Higher success rate by ${(margin * 100).toFixed(1)}%`;
|
|
265
|
+
} else if (rateB > rateA) {
|
|
266
|
+
winner = 'patternB';
|
|
267
|
+
margin = rateB - rateA;
|
|
268
|
+
winnerReason = `Higher success rate by ${(margin * 100).toFixed(1)}%`;
|
|
269
|
+
}
|
|
270
|
+
} else {
|
|
271
|
+
winnerReason = 'No statistically significant difference';
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return {
|
|
275
|
+
patternA: {
|
|
276
|
+
id: test.patternA.id,
|
|
277
|
+
samples: metricsA.applied,
|
|
278
|
+
successRate: rateA,
|
|
279
|
+
avgTime: avgTimeA
|
|
280
|
+
},
|
|
281
|
+
patternB: {
|
|
282
|
+
id: test.patternB.id,
|
|
283
|
+
samples: metricsB.applied,
|
|
284
|
+
successRate: rateB,
|
|
285
|
+
avgTime: avgTimeB
|
|
286
|
+
},
|
|
287
|
+
statistics: chiSquareResult,
|
|
288
|
+
winner,
|
|
289
|
+
winnerReason,
|
|
290
|
+
margin,
|
|
291
|
+
analyzedAt: new Date().toISOString()
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// ============================================================================
|
|
296
|
+
// TEST COMPLETION
|
|
297
|
+
// ============================================================================
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Check if test should be completed
|
|
301
|
+
* @param {Object} test - A/B test
|
|
302
|
+
* @returns {Object} - { complete: boolean, reason: string }
|
|
303
|
+
*/
|
|
304
|
+
function checkTestCompletion(test) {
|
|
305
|
+
const metricsA = test.metrics.patternA;
|
|
306
|
+
const metricsB = test.metrics.patternB;
|
|
307
|
+
|
|
308
|
+
// Check minimum samples
|
|
309
|
+
if (metricsA.applied >= test.minSamples && metricsB.applied >= test.minSamples) {
|
|
310
|
+
const analysis = analyzeABTest(test);
|
|
311
|
+
|
|
312
|
+
if (analysis.statistics.significant) {
|
|
313
|
+
return {
|
|
314
|
+
complete: true,
|
|
315
|
+
reason: 'Statistical significance reached'
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Check max duration
|
|
321
|
+
if (test.startedAt) {
|
|
322
|
+
const duration = Date.now() - new Date(test.startedAt).getTime();
|
|
323
|
+
if (duration > test.maxDuration) {
|
|
324
|
+
return {
|
|
325
|
+
complete: true,
|
|
326
|
+
reason: 'Max duration reached'
|
|
327
|
+
};
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Check if one variant is clearly better (early stopping)
|
|
332
|
+
const totalSamples = metricsA.applied + metricsB.applied;
|
|
333
|
+
if (totalSamples >= 20) {
|
|
334
|
+
const rateA = calculateSuccessRate(metricsA);
|
|
335
|
+
const rateB = calculateSuccessRate(metricsB);
|
|
336
|
+
const diff = Math.abs(rateA - rateB);
|
|
337
|
+
|
|
338
|
+
// Early stop if difference > 30%
|
|
339
|
+
if (diff > 0.3) {
|
|
340
|
+
return {
|
|
341
|
+
complete: true,
|
|
342
|
+
reason: 'Clear winner detected (early stopping)'
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
return { complete: false };
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* Complete an A/B test and apply results
|
|
352
|
+
* @param {string} testId - Test ID
|
|
353
|
+
* @param {string} reason - Completion reason
|
|
354
|
+
* @returns {Object} - Completed test with results
|
|
355
|
+
*/
|
|
356
|
+
export function completeABTest(testId, reason) {
|
|
357
|
+
const test = loadABTest(testId);
|
|
358
|
+
if (!test) return null;
|
|
359
|
+
|
|
360
|
+
const analysis = analyzeABTest(test);
|
|
361
|
+
|
|
362
|
+
test.status = 'completed';
|
|
363
|
+
test.endedAt = new Date().toISOString();
|
|
364
|
+
test.result = {
|
|
365
|
+
...analysis,
|
|
366
|
+
completionReason: reason
|
|
367
|
+
};
|
|
368
|
+
|
|
369
|
+
saveABTest(test);
|
|
370
|
+
|
|
371
|
+
// Record for metrics
|
|
372
|
+
recordABTestEvent({
|
|
373
|
+
hasWinner: !!analysis.winner
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
// Apply reinforcement to winner/loser
|
|
377
|
+
if (analysis.winner) {
|
|
378
|
+
applyABTestReinforcement(test, analysis);
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
return test;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Apply reinforcement based on A/B test results
|
|
386
|
+
* @param {Object} test - Completed test
|
|
387
|
+
* @param {Object} analysis - Test analysis
|
|
388
|
+
*/
|
|
389
|
+
function applyABTestReinforcement(test, analysis) {
|
|
390
|
+
// Winner gets reward proportional to margin
|
|
391
|
+
const winnerReward = Math.min(0.15, analysis.margin * 0.5);
|
|
392
|
+
|
|
393
|
+
// Loser gets penalty
|
|
394
|
+
const loserPenalty = -Math.min(0.10, analysis.margin * 0.3);
|
|
395
|
+
|
|
396
|
+
console.log(`A/B Test ${test.id} completed:`);
|
|
397
|
+
console.log(` Winner: ${analysis.winner} (+${winnerReward.toFixed(2)} confidence)`);
|
|
398
|
+
console.log(` Reason: ${analysis.winnerReason}`);
|
|
399
|
+
|
|
400
|
+
// Note: Actual pattern updates would be done by caller
|
|
401
|
+
// This just logs the recommended adjustments
|
|
402
|
+
return {
|
|
403
|
+
winnerId: analysis.winner === 'patternA' ? test.patternA.id : test.patternB.id,
|
|
404
|
+
loserId: analysis.winner === 'patternA' ? test.patternB.id : test.patternA.id,
|
|
405
|
+
winnerReward,
|
|
406
|
+
loserPenalty
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// ============================================================================
|
|
411
|
+
// STORAGE
|
|
412
|
+
// ============================================================================
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Load A/B test from disk
|
|
416
|
+
* @param {string} testId - Test ID
|
|
417
|
+
* @returns {Object|null} - Test or null
|
|
418
|
+
*/
|
|
419
|
+
export function loadABTest(testId) {
|
|
420
|
+
const tests = loadAllABTests();
|
|
421
|
+
return tests.find(t => t.id === testId) || null;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* Load all A/B tests
|
|
426
|
+
* @returns {Array} - All tests
|
|
427
|
+
*/
|
|
428
|
+
export function loadAllABTests() {
|
|
429
|
+
try {
|
|
430
|
+
if (!fs.existsSync(AB_TESTS_FILE)) return [];
|
|
431
|
+
return JSON.parse(fs.readFileSync(AB_TESTS_FILE, 'utf8'));
|
|
432
|
+
} catch {
|
|
433
|
+
return [];
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
/**
|
|
438
|
+
* Save A/B test
|
|
439
|
+
* @param {Object} test - Test to save
|
|
440
|
+
*/
|
|
441
|
+
export function saveABTest(test) {
|
|
442
|
+
try {
|
|
443
|
+
if (!fs.existsSync(KNOWLEDGE_DIR)) {
|
|
444
|
+
fs.mkdirSync(KNOWLEDGE_DIR, { recursive: true });
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
const tests = loadAllABTests();
|
|
448
|
+
const existingIndex = tests.findIndex(t => t.id === test.id);
|
|
449
|
+
|
|
450
|
+
if (existingIndex >= 0) {
|
|
451
|
+
tests[existingIndex] = test;
|
|
452
|
+
} else {
|
|
453
|
+
tests.push(test);
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
fs.writeFileSync(AB_TESTS_FILE, JSON.stringify(tests, null, 2), 'utf8');
|
|
457
|
+
} catch (error) {
|
|
458
|
+
console.error('Error saving A/B test:', error.message);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
/**
|
|
463
|
+
* Get active A/B tests
|
|
464
|
+
* @returns {Array} - Running tests
|
|
465
|
+
*/
|
|
466
|
+
export function getActiveTests() {
|
|
467
|
+
return loadAllABTests().filter(t => t.status === 'running');
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
/**
|
|
471
|
+
* Get A/B test statistics
|
|
472
|
+
* @returns {Object} - Statistics
|
|
473
|
+
*/
|
|
474
|
+
export function getABTestStats() {
|
|
475
|
+
const tests = loadAllABTests();
|
|
476
|
+
|
|
477
|
+
const completed = tests.filter(t => t.status === 'completed');
|
|
478
|
+
const withWinner = completed.filter(t => t.result?.winner);
|
|
479
|
+
|
|
480
|
+
return {
|
|
481
|
+
total: tests.length,
|
|
482
|
+
running: tests.filter(t => t.status === 'running').length,
|
|
483
|
+
completed: completed.length,
|
|
484
|
+
withWinner: withWinner.length,
|
|
485
|
+
winRate: completed.length > 0 ? withWinner.length / completed.length : 0,
|
|
486
|
+
pending: tests.filter(t => t.status === 'pending').length
|
|
487
|
+
};
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// ============================================================================
|
|
491
|
+
// EXPORTS
|
|
492
|
+
// ============================================================================
|
|
493
|
+
|
|
494
|
+
export default {
|
|
495
|
+
createABTest,
|
|
496
|
+
startABTest,
|
|
497
|
+
getVariantForTask,
|
|
498
|
+
recordABOutcome,
|
|
499
|
+
analyzeABTest,
|
|
500
|
+
completeABTest,
|
|
501
|
+
loadABTest,
|
|
502
|
+
loadAllABTests,
|
|
503
|
+
saveABTest,
|
|
504
|
+
getActiveTests,
|
|
505
|
+
getABTestStats,
|
|
506
|
+
MIN_SAMPLES_PER_VARIANT,
|
|
507
|
+
SIGNIFICANCE_LEVEL
|
|
508
|
+
};
|