pikakit 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -2
- package/lib/agent-cli/lib/ab-testing.js +508 -0
- package/lib/agent-cli/lib/causality-engine.js +623 -0
- package/lib/agent-cli/lib/dashboard-data.js +365 -0
- package/lib/agent-cli/lib/fix.js +1 -1
- package/lib/agent-cli/lib/metrics-collector.js +523 -0
- package/lib/agent-cli/lib/metrics-schema.js +410 -0
- package/lib/agent-cli/lib/precision-skill-generator.js +584 -0
- package/lib/agent-cli/lib/recall.js +1 -1
- package/lib/agent-cli/lib/reinforcement.js +610 -0
- package/lib/agent-cli/lib/ui/index.js +37 -14
- package/package.json +2 -2
- package/lib/agent-cli/lib/auto-learn.js +0 -319
- package/lib/agent-cli/scripts/adaptive_engine.js +0 -381
- package/lib/agent-cli/scripts/error_sensor.js +0 -565
- package/lib/agent-cli/scripts/learn_from_failure.js +0 -225
- package/lib/agent-cli/scripts/pattern_analyzer.js +0 -781
- package/lib/agent-cli/scripts/skill_injector.js +0 -387
- package/lib/agent-cli/scripts/success_sensor.js +0 -500
- package/lib/agent-cli/scripts/user_correction_sensor.js +0 -426
- package/lib/agent-cli/services/auto-learn-service.js +0 -247
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AutoLearn v6.0 - Metrics Schema
|
|
3
|
+
*
|
|
4
|
+
* Defines all 18 KPIs for the Precision Learning Engine.
|
|
5
|
+
* These metrics are measurable and displayed on Dashboard.
|
|
6
|
+
*
|
|
7
|
+
* @version 6.0.0
|
|
8
|
+
* @author PikaKit
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
// ============================================================================
|
|
12
|
+
// METRIC CATEGORIES
|
|
13
|
+
// ============================================================================
|
|
14
|
+
|
|
15
|
+
export const METRIC_CATEGORIES = {
|
|
16
|
+
CORE: 'core', // Task success/failure metrics
|
|
17
|
+
LEARNING: 'learning', // Pattern learning effectiveness
|
|
18
|
+
EVOLUTION: 'evolution', // Skill evolution tracking
|
|
19
|
+
IMPROVEMENT: 'improvement' // Week-over-week improvements
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
// ============================================================================
|
|
23
|
+
// 18 KPIs DEFINITION
|
|
24
|
+
// ============================================================================
|
|
25
|
+
|
|
26
|
+
export const METRICS_SCHEMA = {
|
|
27
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
28
|
+
// CORE METRICS (1-5)
|
|
29
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
30
|
+
|
|
31
|
+
TASK_SUCCESS_RATE: {
|
|
32
|
+
id: 'task_success_rate',
|
|
33
|
+
name: 'Task Success Rate',
|
|
34
|
+
description: 'Percentage of tasks completed without errors',
|
|
35
|
+
category: METRIC_CATEGORIES.CORE,
|
|
36
|
+
formula: 'successful_tasks / total_tasks * 100',
|
|
37
|
+
unit: '%',
|
|
38
|
+
target: 90,
|
|
39
|
+
direction: 'higher_is_better',
|
|
40
|
+
dashboard: {
|
|
41
|
+
widget: 'gauge',
|
|
42
|
+
color: 'green',
|
|
43
|
+
position: { row: 1, col: 1 }
|
|
44
|
+
}
|
|
45
|
+
},
|
|
46
|
+
|
|
47
|
+
ERROR_REPEAT_RATE: {
|
|
48
|
+
id: 'error_repeat_rate',
|
|
49
|
+
name: 'Error Repeat Rate',
|
|
50
|
+
description: 'Percentage of errors that occurred more than once',
|
|
51
|
+
category: METRIC_CATEGORIES.CORE,
|
|
52
|
+
formula: 'repeated_errors / total_errors * 100',
|
|
53
|
+
unit: '%',
|
|
54
|
+
target: 5,
|
|
55
|
+
direction: 'lower_is_better',
|
|
56
|
+
dashboard: {
|
|
57
|
+
widget: 'gauge',
|
|
58
|
+
color: 'red',
|
|
59
|
+
position: { row: 1, col: 2 }
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
|
|
63
|
+
FIRST_TIME_SUCCESS: {
|
|
64
|
+
id: 'first_time_success',
|
|
65
|
+
name: 'First-Time Success',
|
|
66
|
+
description: 'Percentage of tasks completed on first attempt',
|
|
67
|
+
category: METRIC_CATEGORIES.CORE,
|
|
68
|
+
formula: 'no_retry_tasks / total_tasks * 100',
|
|
69
|
+
unit: '%',
|
|
70
|
+
target: 85,
|
|
71
|
+
direction: 'higher_is_better',
|
|
72
|
+
dashboard: {
|
|
73
|
+
widget: 'gauge',
|
|
74
|
+
color: 'green',
|
|
75
|
+
position: { row: 1, col: 3 }
|
|
76
|
+
}
|
|
77
|
+
},
|
|
78
|
+
|
|
79
|
+
TIME_TO_COMPLETION: {
|
|
80
|
+
id: 'time_to_completion',
|
|
81
|
+
name: 'Time to Completion',
|
|
82
|
+
description: 'Average time to complete a task',
|
|
83
|
+
category: METRIC_CATEGORIES.CORE,
|
|
84
|
+
formula: 'avg(task_end - task_start)',
|
|
85
|
+
unit: 'seconds',
|
|
86
|
+
target: null, // Track trend, no fixed target
|
|
87
|
+
direction: 'lower_is_better',
|
|
88
|
+
dashboard: {
|
|
89
|
+
widget: 'trend_line',
|
|
90
|
+
color: 'blue',
|
|
91
|
+
position: { row: 2, col: 1 }
|
|
92
|
+
}
|
|
93
|
+
},
|
|
94
|
+
|
|
95
|
+
HUMAN_INTERVENTION_RATE: {
|
|
96
|
+
id: 'human_intervention_rate',
|
|
97
|
+
name: 'Human Intervention Rate',
|
|
98
|
+
description: 'Percentage of tasks requiring manual user fix',
|
|
99
|
+
category: METRIC_CATEGORIES.CORE,
|
|
100
|
+
formula: 'manual_fixes / total_tasks * 100',
|
|
101
|
+
unit: '%',
|
|
102
|
+
target: 10,
|
|
103
|
+
direction: 'lower_is_better',
|
|
104
|
+
dashboard: {
|
|
105
|
+
widget: 'gauge',
|
|
106
|
+
color: 'orange',
|
|
107
|
+
position: { row: 2, col: 2 }
|
|
108
|
+
}
|
|
109
|
+
},
|
|
110
|
+
|
|
111
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
112
|
+
// LEARNING METRICS (6-10)
|
|
113
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
114
|
+
|
|
115
|
+
PATTERN_PRECISION: {
|
|
116
|
+
id: 'pattern_precision',
|
|
117
|
+
name: 'Pattern Precision',
|
|
118
|
+
description: 'How often detected patterns are true positives',
|
|
119
|
+
category: METRIC_CATEGORIES.LEARNING,
|
|
120
|
+
formula: 'true_positives / (true_positives + false_positives)',
|
|
121
|
+
unit: '%',
|
|
122
|
+
target: 80,
|
|
123
|
+
direction: 'higher_is_better',
|
|
124
|
+
dashboard: {
|
|
125
|
+
widget: 'gauge',
|
|
126
|
+
color: 'purple',
|
|
127
|
+
position: { row: 3, col: 1 }
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
|
|
131
|
+
PATTERN_RECALL: {
|
|
132
|
+
id: 'pattern_recall',
|
|
133
|
+
name: 'Pattern Recall',
|
|
134
|
+
description: 'How many actual issues are caught by patterns',
|
|
135
|
+
category: METRIC_CATEGORIES.LEARNING,
|
|
136
|
+
formula: 'true_positives / (true_positives + false_negatives)',
|
|
137
|
+
unit: '%',
|
|
138
|
+
target: 70,
|
|
139
|
+
direction: 'higher_is_better',
|
|
140
|
+
dashboard: {
|
|
141
|
+
widget: 'gauge',
|
|
142
|
+
color: 'purple',
|
|
143
|
+
position: { row: 3, col: 2 }
|
|
144
|
+
}
|
|
145
|
+
},
|
|
146
|
+
|
|
147
|
+
SKILL_EFFECTIVENESS: {
|
|
148
|
+
id: 'skill_effectiveness',
|
|
149
|
+
name: 'Skill Effectiveness',
|
|
150
|
+
description: 'Percentage of times a skill actually helped',
|
|
151
|
+
category: METRIC_CATEGORIES.LEARNING,
|
|
152
|
+
formula: 'tasks_helped / tasks_where_skill_applied * 100',
|
|
153
|
+
unit: '%',
|
|
154
|
+
target: 75,
|
|
155
|
+
direction: 'higher_is_better',
|
|
156
|
+
dashboard: {
|
|
157
|
+
widget: 'bar_chart',
|
|
158
|
+
color: 'teal',
|
|
159
|
+
position: { row: 4, col: 1 }
|
|
160
|
+
}
|
|
161
|
+
},
|
|
162
|
+
|
|
163
|
+
SKILL_COVERAGE: {
|
|
164
|
+
id: 'skill_coverage',
|
|
165
|
+
name: 'Skill Coverage',
|
|
166
|
+
description: 'Percentage of tasks that had relevant skills loaded',
|
|
167
|
+
category: METRIC_CATEGORIES.LEARNING,
|
|
168
|
+
formula: 'tasks_with_skill / total_tasks * 100',
|
|
169
|
+
unit: '%',
|
|
170
|
+
target: 60,
|
|
171
|
+
direction: 'higher_is_better',
|
|
172
|
+
dashboard: {
|
|
173
|
+
widget: 'pie_chart',
|
|
174
|
+
color: 'cyan',
|
|
175
|
+
position: { row: 4, col: 2 }
|
|
176
|
+
}
|
|
177
|
+
},
|
|
178
|
+
|
|
179
|
+
FALSE_POSITIVE_RATE: {
|
|
180
|
+
id: 'false_positive_rate',
|
|
181
|
+
name: 'False Positive Rate',
|
|
182
|
+
description: 'Percentage of alerts that were incorrect',
|
|
183
|
+
category: METRIC_CATEGORIES.LEARNING,
|
|
184
|
+
formula: 'false_positives / total_alerts * 100',
|
|
185
|
+
unit: '%',
|
|
186
|
+
target: 10,
|
|
187
|
+
direction: 'lower_is_better',
|
|
188
|
+
dashboard: {
|
|
189
|
+
widget: 'gauge',
|
|
190
|
+
color: 'red',
|
|
191
|
+
position: { row: 3, col: 3 }
|
|
192
|
+
}
|
|
193
|
+
},
|
|
194
|
+
|
|
195
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
196
|
+
// EVOLUTION METRICS (11-15)
|
|
197
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
198
|
+
|
|
199
|
+
SKILLS_AUTO_GENERATED: {
|
|
200
|
+
id: 'skills_auto_generated',
|
|
201
|
+
name: 'Skills Auto-Generated',
|
|
202
|
+
description: 'Total number of skills created automatically',
|
|
203
|
+
category: METRIC_CATEGORIES.EVOLUTION,
|
|
204
|
+
formula: 'count(auto_skills)',
|
|
205
|
+
unit: 'count',
|
|
206
|
+
target: null, // Track absolute number
|
|
207
|
+
direction: 'higher_is_better',
|
|
208
|
+
dashboard: {
|
|
209
|
+
widget: 'counter',
|
|
210
|
+
color: 'green',
|
|
211
|
+
position: { row: 5, col: 1 }
|
|
212
|
+
}
|
|
213
|
+
},
|
|
214
|
+
|
|
215
|
+
SKILLS_PRUNED: {
|
|
216
|
+
id: 'skills_pruned',
|
|
217
|
+
name: 'Skills Pruned',
|
|
218
|
+
description: 'Total number of ineffective skills removed',
|
|
219
|
+
category: METRIC_CATEGORIES.EVOLUTION,
|
|
220
|
+
formula: 'count(pruned_skills)',
|
|
221
|
+
unit: 'count',
|
|
222
|
+
target: null, // Track absolute number
|
|
223
|
+
direction: 'neutral',
|
|
224
|
+
dashboard: {
|
|
225
|
+
widget: 'counter',
|
|
226
|
+
color: 'gray',
|
|
227
|
+
position: { row: 5, col: 2 }
|
|
228
|
+
}
|
|
229
|
+
},
|
|
230
|
+
|
|
231
|
+
PATTERN_CONFIDENCE_AVG: {
|
|
232
|
+
id: 'pattern_confidence_avg',
|
|
233
|
+
name: 'Average Pattern Confidence',
|
|
234
|
+
description: 'Mean confidence score across all patterns',
|
|
235
|
+
category: METRIC_CATEGORIES.EVOLUTION,
|
|
236
|
+
formula: 'avg(pattern.confidence)',
|
|
237
|
+
unit: 'score',
|
|
238
|
+
target: 0.7,
|
|
239
|
+
direction: 'higher_is_better',
|
|
240
|
+
dashboard: {
|
|
241
|
+
widget: 'gauge',
|
|
242
|
+
color: 'blue',
|
|
243
|
+
position: { row: 5, col: 3 }
|
|
244
|
+
}
|
|
245
|
+
},
|
|
246
|
+
|
|
247
|
+
AB_TEST_WIN_RATE: {
|
|
248
|
+
id: 'ab_test_win_rate',
|
|
249
|
+
name: 'A/B Test Win Rate',
|
|
250
|
+
description: 'Percentage of A/B tests that selected a clear winner',
|
|
251
|
+
category: METRIC_CATEGORIES.EVOLUTION,
|
|
252
|
+
formula: 'winner_selected / total_ab_tests * 100',
|
|
253
|
+
unit: '%',
|
|
254
|
+
target: 60,
|
|
255
|
+
direction: 'higher_is_better',
|
|
256
|
+
dashboard: {
|
|
257
|
+
widget: 'pie_chart',
|
|
258
|
+
color: 'indigo',
|
|
259
|
+
position: { row: 6, col: 1 }
|
|
260
|
+
}
|
|
261
|
+
},
|
|
262
|
+
|
|
263
|
+
LEARNING_VELOCITY: {
|
|
264
|
+
id: 'learning_velocity',
|
|
265
|
+
name: 'Learning Velocity',
|
|
266
|
+
description: 'Number of new patterns learned per week',
|
|
267
|
+
category: METRIC_CATEGORIES.EVOLUTION,
|
|
268
|
+
formula: 'new_patterns_this_week',
|
|
269
|
+
unit: 'patterns/week',
|
|
270
|
+
target: null, // Track trend
|
|
271
|
+
direction: 'higher_is_better',
|
|
272
|
+
dashboard: {
|
|
273
|
+
widget: 'trend_line',
|
|
274
|
+
color: 'green',
|
|
275
|
+
position: { row: 6, col: 2 }
|
|
276
|
+
}
|
|
277
|
+
},
|
|
278
|
+
|
|
279
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
280
|
+
// IMPROVEMENT METRICS (16-18)
|
|
281
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
282
|
+
|
|
283
|
+
WEEK_OVER_WEEK_IMPROVEMENT: {
|
|
284
|
+
id: 'week_over_week_improvement',
|
|
285
|
+
name: 'Week-over-Week Improvement',
|
|
286
|
+
description: 'Percentage improvement in success rate vs last week',
|
|
287
|
+
category: METRIC_CATEGORIES.IMPROVEMENT,
|
|
288
|
+
formula: '(this_week_success - last_week_success) / last_week_success * 100',
|
|
289
|
+
unit: '%',
|
|
290
|
+
target: 0, // Any positive is good
|
|
291
|
+
direction: 'higher_is_better',
|
|
292
|
+
dashboard: {
|
|
293
|
+
widget: 'trend_line',
|
|
294
|
+
color: 'green',
|
|
295
|
+
position: { row: 7, col: 1 }
|
|
296
|
+
}
|
|
297
|
+
},
|
|
298
|
+
|
|
299
|
+
ERROR_REDUCTION_RATE: {
|
|
300
|
+
id: 'error_reduction_rate',
|
|
301
|
+
name: 'Error Reduction Rate',
|
|
302
|
+
description: 'Percentage reduction in errors compared to baseline',
|
|
303
|
+
category: METRIC_CATEGORIES.IMPROVEMENT,
|
|
304
|
+
formula: '1 - (current_errors / baseline_errors) * 100',
|
|
305
|
+
unit: '%',
|
|
306
|
+
target: 50,
|
|
307
|
+
direction: 'higher_is_better',
|
|
308
|
+
dashboard: {
|
|
309
|
+
widget: 'gauge',
|
|
310
|
+
color: 'green',
|
|
311
|
+
position: { row: 7, col: 2 }
|
|
312
|
+
}
|
|
313
|
+
},
|
|
314
|
+
|
|
315
|
+
SKILL_ROI: {
|
|
316
|
+
id: 'skill_roi',
|
|
317
|
+
name: 'Skill ROI',
|
|
318
|
+
description: 'Return on investment for skill creation (time saved / time to create)',
|
|
319
|
+
category: METRIC_CATEGORIES.IMPROVEMENT,
|
|
320
|
+
formula: 'total_time_saved / total_time_to_create_skills',
|
|
321
|
+
unit: 'x',
|
|
322
|
+
target: 10,
|
|
323
|
+
direction: 'higher_is_better',
|
|
324
|
+
dashboard: {
|
|
325
|
+
widget: 'number',
|
|
326
|
+
color: 'gold',
|
|
327
|
+
position: { row: 7, col: 3 }
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
};
|
|
331
|
+
|
|
332
|
+
// ============================================================================
|
|
333
|
+
// HELPER FUNCTIONS
|
|
334
|
+
// ============================================================================
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Get all metrics in a category
|
|
338
|
+
* @param {string} category - Category name
|
|
339
|
+
* @returns {Array} - Array of metric definitions
|
|
340
|
+
*/
|
|
341
|
+
export function getMetricsByCategory(category) {
|
|
342
|
+
return Object.values(METRICS_SCHEMA).filter(m => m.category === category);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Get metric by ID
|
|
347
|
+
* @param {string} id - Metric ID
|
|
348
|
+
* @returns {Object|null} - Metric definition or null
|
|
349
|
+
*/
|
|
350
|
+
export function getMetricById(id) {
|
|
351
|
+
return Object.values(METRICS_SCHEMA).find(m => m.id === id) || null;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Get all metric IDs
|
|
356
|
+
* @returns {Array<string>} - Array of metric IDs
|
|
357
|
+
*/
|
|
358
|
+
export function getAllMetricIds() {
|
|
359
|
+
return Object.values(METRICS_SCHEMA).map(m => m.id);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Get metrics for dashboard by position
|
|
364
|
+
* @returns {Array} - Metrics sorted by dashboard position
|
|
365
|
+
*/
|
|
366
|
+
export function getMetricsForDashboard() {
|
|
367
|
+
return Object.values(METRICS_SCHEMA)
|
|
368
|
+
.sort((a, b) => {
|
|
369
|
+
const rowDiff = a.dashboard.position.row - b.dashboard.position.row;
|
|
370
|
+
if (rowDiff !== 0) return rowDiff;
|
|
371
|
+
return a.dashboard.position.col - b.dashboard.position.col;
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
/**
|
|
376
|
+
* Validate a metric value against its target
|
|
377
|
+
* @param {string} metricId - Metric ID
|
|
378
|
+
* @param {number} value - Current value
|
|
379
|
+
* @returns {{ status: 'good'|'warning'|'critical', message: string }}
|
|
380
|
+
*/
|
|
381
|
+
export function validateMetric(metricId, value) {
|
|
382
|
+
const metric = getMetricById(metricId);
|
|
383
|
+
if (!metric || metric.target === null) {
|
|
384
|
+
return { status: 'neutral', message: 'No target defined' };
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const isHigherBetter = metric.direction === 'higher_is_better';
|
|
388
|
+
const target = metric.target;
|
|
389
|
+
const threshold = target * 0.2; // 20% tolerance
|
|
390
|
+
|
|
391
|
+
if (isHigherBetter) {
|
|
392
|
+
if (value >= target) return { status: 'good', message: `Above target (${target}${metric.unit})` };
|
|
393
|
+
if (value >= target - threshold) return { status: 'warning', message: `Near target (${target}${metric.unit})` };
|
|
394
|
+
return { status: 'critical', message: `Below target (${target}${metric.unit})` };
|
|
395
|
+
} else {
|
|
396
|
+
if (value <= target) return { status: 'good', message: `Below target (${target}${metric.unit})` };
|
|
397
|
+
if (value <= target + threshold) return { status: 'warning', message: `Near target (${target}${metric.unit})` };
|
|
398
|
+
return { status: 'critical', message: `Above target (${target}${metric.unit})` };
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// ============================================================================
|
|
403
|
+
// EXPORTS
|
|
404
|
+
// ============================================================================
|
|
405
|
+
|
|
406
|
+
export default METRICS_SCHEMA;
|
|
407
|
+
|
|
408
|
+
// Re-export for CommonJS compatibility
|
|
409
|
+
export const VERSION = '6.0.0';
|
|
410
|
+
export const TOTAL_METRICS = Object.keys(METRICS_SCHEMA).length;
|