agentic-qe 3.7.12 → 3.7.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/.validation/schemas/skill-frontmatter.schema.json +5 -0
- package/.claude/skills/skills-manifest.json +1 -1
- package/CHANGELOG.md +13 -0
- package/assets/skills/.validation/schemas/skill-frontmatter.schema.json +5 -0
- package/dist/cli/bundle.js +159 -150
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -1
- package/dist/index.js.map +1 -1
- package/dist/mcp/bundle.js +1 -1
- package/dist/validation/index.d.ts +4 -0
- package/dist/validation/index.d.ts.map +1 -1
- package/dist/validation/index.js +8 -0
- package/dist/validation/index.js.map +1 -1
- package/dist/validation/trigger-optimizer.d.ts +61 -0
- package/dist/validation/trigger-optimizer.d.ts.map +1 -0
- package/dist/validation/trigger-optimizer.js +356 -0
- package/dist/validation/trigger-optimizer.js.map +1 -0
- package/dist/validation/version-comparator.d.ts +115 -0
- package/dist/validation/version-comparator.d.ts.map +1 -0
- package/dist/validation/version-comparator.js +322 -0
- package/dist/validation/version-comparator.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Version Comparator for A/B Testing Between Skill Versions
|
|
3
|
+
* ADR-056: Skill validation system - version comparison support
|
|
4
|
+
*
|
|
5
|
+
* Enables side-by-side comparison of two skill versions using statistical
|
|
6
|
+
* methods (Cohen's d effect size, confidence scoring) to determine whether
|
|
7
|
+
* a proposed skill version is a meaningful improvement over the current one.
|
|
8
|
+
*
|
|
9
|
+
* Inspired by the Claude Blog skill-creator's comparator agent feature.
|
|
10
|
+
*
|
|
11
|
+
* @module validation/version-comparator
|
|
12
|
+
*/
|
|
13
|
+
import type { SkillValidationOutcome, TestCaseResult } from '../learning/skill-validation-learner.js';
|
|
14
|
+
export interface VersionComparisonConfig {
|
|
15
|
+
/** Minimum test cases to consider comparison valid */
|
|
16
|
+
minTestCases: number;
|
|
17
|
+
/** Significance threshold for declaring a winner */
|
|
18
|
+
significanceThreshold: number;
|
|
19
|
+
/** Whether to run both versions in parallel */
|
|
20
|
+
parallel: boolean;
|
|
21
|
+
}
|
|
22
|
+
export interface SkillVersion {
|
|
23
|
+
/** Version identifier (e.g., 'v1.2.0', 'current', 'proposed') */
|
|
24
|
+
versionId: string;
|
|
25
|
+
/** Skill name */
|
|
26
|
+
skillName: string;
|
|
27
|
+
/** Path to the SKILL.md file for this version */
|
|
28
|
+
skillPath: string;
|
|
29
|
+
/** Optional: eval suite path override */
|
|
30
|
+
evalPath?: string;
|
|
31
|
+
/** Metadata about this version */
|
|
32
|
+
metadata?: Record<string, unknown>;
|
|
33
|
+
}
|
|
34
|
+
export interface VersionComparisonResult {
|
|
35
|
+
/** Unique comparison run ID */
|
|
36
|
+
comparisonId: string;
|
|
37
|
+
/** Timestamp of comparison */
|
|
38
|
+
timestamp: Date;
|
|
39
|
+
/** Version A details and results */
|
|
40
|
+
versionA: VersionResult;
|
|
41
|
+
/** Version B details and results */
|
|
42
|
+
versionB: VersionResult;
|
|
43
|
+
/** Statistical comparison */
|
|
44
|
+
comparison: ComparisonStats;
|
|
45
|
+
/** Overall winner (null if no significant difference) */
|
|
46
|
+
winner: 'A' | 'B' | null;
|
|
47
|
+
/** Human-readable summary */
|
|
48
|
+
summary: string;
|
|
49
|
+
/** Detailed per-test-case comparison */
|
|
50
|
+
testCaseComparisons: TestCaseComparison[];
|
|
51
|
+
}
|
|
52
|
+
export interface VersionResult {
|
|
53
|
+
version: SkillVersion;
|
|
54
|
+
passRate: number;
|
|
55
|
+
avgScore: number;
|
|
56
|
+
avgReasoningQuality: number;
|
|
57
|
+
avgExecutionTimeMs: number;
|
|
58
|
+
totalTokens: number;
|
|
59
|
+
testCaseResults: TestCaseResult[];
|
|
60
|
+
}
|
|
61
|
+
export interface ComparisonStats {
|
|
62
|
+
passRateDiff: number;
|
|
63
|
+
scoreDiff: number;
|
|
64
|
+
reasoningQualityDiff: number;
|
|
65
|
+
executionTimeDiff: number;
|
|
66
|
+
isSignificant: boolean;
|
|
67
|
+
effectSize: number;
|
|
68
|
+
confidence: number;
|
|
69
|
+
}
|
|
70
|
+
export interface TestCaseComparison {
|
|
71
|
+
testId: string;
|
|
72
|
+
versionAResult: TestCaseResult;
|
|
73
|
+
versionBResult: TestCaseResult;
|
|
74
|
+
scoreDiff: number;
|
|
75
|
+
winner: 'A' | 'B' | 'tie';
|
|
76
|
+
}
|
|
77
|
+
export declare class VersionComparator {
|
|
78
|
+
private readonly config;
|
|
79
|
+
constructor(config?: Partial<VersionComparisonConfig>);
|
|
80
|
+
/**
|
|
81
|
+
* Compare two skill versions using pre-computed validation outcomes.
|
|
82
|
+
*/
|
|
83
|
+
compare(versionA: SkillVersion, versionB: SkillVersion, outcomes: {
|
|
84
|
+
a: SkillValidationOutcome;
|
|
85
|
+
b: SkillValidationOutcome;
|
|
86
|
+
}): VersionComparisonResult;
|
|
87
|
+
/**
|
|
88
|
+
* Compare two skill versions from raw test case results.
|
|
89
|
+
*/
|
|
90
|
+
compareFromResults(versionA: SkillVersion, resultsA: TestCaseResult[], versionB: SkillVersion, resultsB: TestCaseResult[]): VersionComparisonResult;
|
|
91
|
+
/**
|
|
92
|
+
* Calculate Cohen's d effect size between two score arrays.
|
|
93
|
+
* Returns 0 if both arrays are empty or have zero pooled variance.
|
|
94
|
+
*/
|
|
95
|
+
calculateEffectSize(scoresA: number[], scoresB: number[]): number;
|
|
96
|
+
/**
|
|
97
|
+
* Calculate confidence in the comparison based on sample size and variance.
|
|
98
|
+
* Higher sample sizes and lower variance produce higher confidence.
|
|
99
|
+
*/
|
|
100
|
+
calculateConfidence(resultsA: TestCaseResult[], resultsB: TestCaseResult[]): number;
|
|
101
|
+
/**
|
|
102
|
+
* Generate a human-readable summary of the comparison.
|
|
103
|
+
*/
|
|
104
|
+
generateSummary(result: VersionComparisonResult): string;
|
|
105
|
+
/**
|
|
106
|
+
* Generate a Markdown-formatted report of the comparison.
|
|
107
|
+
*/
|
|
108
|
+
formatReport(result: VersionComparisonResult): string;
|
|
109
|
+
private buildVersionResult;
|
|
110
|
+
private buildTestCaseComparisons;
|
|
111
|
+
private buildComparisonStats;
|
|
112
|
+
private determineWinner;
|
|
113
|
+
}
|
|
114
|
+
export declare function createVersionComparator(config?: Partial<VersionComparisonConfig>): VersionComparator;
|
|
115
|
+
//# sourceMappingURL=version-comparator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"version-comparator.d.ts","sourceRoot":"","sources":["../../src/validation/version-comparator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAGH,OAAO,KAAK,EACV,sBAAsB,EACtB,cAAc,EACf,MAAM,yCAAyC,CAAC;AASjD,MAAM,WAAW,uBAAuB;IACtC,sDAAsD;IACtD,YAAY,EAAE,MAAM,CAAC;IACrB,oDAAoD;IACpD,qBAAqB,EAAE,MAAM,CAAC;IAC9B,+CAA+C;IAC/C,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,YAAY;IAC3B,iEAAiE;IACjE,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,iDAAiD;IACjD,SAAS,EAAE,MAAM,CAAC;IAClB,yCAAyC;IACzC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,kCAAkC;IAClC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,MAAM,WAAW,uBAAuB;IACtC,+BAA+B;IAC/B,YAAY,EAAE,MAAM,CAAC;IACrB,8BAA8B;IAC9B,SAAS,EAAE,IAAI,CAAC;IAChB,oCAAoC;IACpC,QAAQ,EAAE,aAAa,CAAC;IACxB,oCAAoC;IACpC,QAAQ,EAAE,aAAa,CAAC;IACxB,6BAA6B;IAC7B,UAAU,EAAE,eAAe,CAAC;IAC5B,yDAAyD;IACzD,MAAM,EAAE,GAAG,GAAG,GAAG,GAAG,IAAI,CAAC;IACzB,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,wCAAwC;IACxC,mBAAmB,EAAE,kBAAkB,EAAE,CAAC;CAC3C;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,YAAY,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,cAAc,EAAE,CAAC;CACnC;AAED,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,OAAO,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE,cAAc,CAAC;IAC/B,cAAc,EAAE,cAAc,CAAC;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,GAAG,GAAG,GAAG,GAAG,KAAK,CAAC;CAC3B;AAkBD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA0B;gBAErC,MAAM,GAAE,OAAO,CAAC,uBAAuB,CAAM;IAIzD;;OAEG;IACH,OAAO,CACL,QAAQ,EAAE,YAAY,EACtB,QAAQ,EAAE,YAAY,EACtB,QAAQ,EAAE;QAAE,CAAC,EAAE,sBAAsB,CAAC;QAAC,CAAC,EAAE,sBAAsB,CAAA;KAAE,GACjE,uBAAuB;IAe1B;;OAEG;IACH,kBAAkB,CAChB,QAAQ,EAAE,YAAY,EACtB,QAAQ,EAAE,cAAc,EAAE,EAC1B,QAAQ,EAAE,YAAY,EACtB,QAAQ,EAAE,cAAc,EAAE,GACzB,uBAAuB;IAyC1B;;;OAGG;IACH,mBAAmB,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM;IAgBjE;;;OAGG;IACH,mBAAmB,CAAC,QAAQ,EAAE,cAAc,EAAE,EAAE,QAAQ,EAAE,cAAc,EAAE,GAAG,MAAM;IAuBnF;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,uBAAuB,GAAG,MAAM;IAgCxD;;OAEG;IACH,YAAY,CAAC,MAAM,EAAE,uBAAuB,GAAG,MAAM;IA+DrD,OAAO,CAAC,kBAAkB;IAqB1B,OAAO,CAAC,wBAAwB;IAiChC,OAAO,CAAC,oBAAoB;IAyB5B,OAAO,CAAC,eAAe;CAqBxB;AAMD,wBAAgB,uBAAuB,CACrC,MAAM,CAAC,EAAE,OAAO,CAAC,uBAAuB,CAAC,GACxC,iBAAiB,CAEnB"}
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Version Comparator for A/B Testing Between Skill Versions
|
|
3
|
+
* ADR-056: Skill validation system - version comparison support
|
|
4
|
+
*
|
|
5
|
+
* Enables side-by-side comparison of two skill versions using statistical
|
|
6
|
+
* methods (Cohen's d effect size, confidence scoring) to determine whether
|
|
7
|
+
* a proposed skill version is a meaningful improvement over the current one.
|
|
8
|
+
*
|
|
9
|
+
* Inspired by the Claude Blog skill-creator's comparator agent feature.
|
|
10
|
+
*
|
|
11
|
+
* @module validation/version-comparator
|
|
12
|
+
*/
|
|
13
|
+
import { randomUUID } from 'crypto';
|
|
14
|
+
import { LoggerFactory } from '../logging/index.js';
|
|
15
|
+
const logger = LoggerFactory.create('version-comparator');
|
|
16
|
+
// ============================================================================
|
|
17
|
+
// Constants
|
|
18
|
+
// ============================================================================
|
|
19
|
+
const DEFAULT_CONFIG = {
|
|
20
|
+
minTestCases: 5,
|
|
21
|
+
significanceThreshold: 0.05,
|
|
22
|
+
parallel: true,
|
|
23
|
+
};
|
|
24
|
+
const SCORE_TIE_EPSILON = 1e-6;
|
|
25
|
+
// ============================================================================
|
|
26
|
+
// VersionComparator
|
|
27
|
+
// ============================================================================
|
|
28
|
+
export class VersionComparator {
|
|
29
|
+
config;
|
|
30
|
+
constructor(config = {}) {
|
|
31
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Compare two skill versions using pre-computed validation outcomes.
|
|
35
|
+
*/
|
|
36
|
+
compare(versionA, versionB, outcomes) {
|
|
37
|
+
logger.info('Comparing skill versions from outcomes', {
|
|
38
|
+
skillName: versionA.skillName,
|
|
39
|
+
versionA: versionA.versionId,
|
|
40
|
+
versionB: versionB.versionId,
|
|
41
|
+
});
|
|
42
|
+
return this.compareFromResults(versionA, outcomes.a.testCaseResults, versionB, outcomes.b.testCaseResults);
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Compare two skill versions from raw test case results.
|
|
46
|
+
*/
|
|
47
|
+
compareFromResults(versionA, resultsA, versionB, resultsB) {
|
|
48
|
+
const comparisonId = `cmp-${randomUUID().slice(0, 12)}`;
|
|
49
|
+
const timestamp = new Date();
|
|
50
|
+
logger.info('Comparing skill versions from results', {
|
|
51
|
+
comparisonId,
|
|
52
|
+
versionA: versionA.versionId,
|
|
53
|
+
versionB: versionB.versionId,
|
|
54
|
+
testCasesA: resultsA.length,
|
|
55
|
+
testCasesB: resultsB.length,
|
|
56
|
+
});
|
|
57
|
+
const versionAResult = this.buildVersionResult(versionA, resultsA);
|
|
58
|
+
const versionBResult = this.buildVersionResult(versionB, resultsB);
|
|
59
|
+
const testCaseComparisons = this.buildTestCaseComparisons(resultsA, resultsB);
|
|
60
|
+
const comparison = this.buildComparisonStats(versionAResult, versionBResult, resultsA, resultsB);
|
|
61
|
+
const winner = this.determineWinner(comparison, resultsA, resultsB);
|
|
62
|
+
const result = {
|
|
63
|
+
comparisonId,
|
|
64
|
+
timestamp,
|
|
65
|
+
versionA: versionAResult,
|
|
66
|
+
versionB: versionBResult,
|
|
67
|
+
comparison,
|
|
68
|
+
winner,
|
|
69
|
+
summary: '',
|
|
70
|
+
testCaseComparisons,
|
|
71
|
+
};
|
|
72
|
+
result.summary = this.generateSummary(result);
|
|
73
|
+
logger.info('Comparison complete', {
|
|
74
|
+
comparisonId,
|
|
75
|
+
winner,
|
|
76
|
+
effectSize: comparison.effectSize,
|
|
77
|
+
isSignificant: comparison.isSignificant,
|
|
78
|
+
});
|
|
79
|
+
return result;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Calculate Cohen's d effect size between two score arrays.
|
|
83
|
+
* Returns 0 if both arrays are empty or have zero pooled variance.
|
|
84
|
+
*/
|
|
85
|
+
calculateEffectSize(scoresA, scoresB) {
|
|
86
|
+
if (scoresA.length === 0 && scoresB.length === 0) {
|
|
87
|
+
return 0;
|
|
88
|
+
}
|
|
89
|
+
const meanA = mean(scoresA);
|
|
90
|
+
const meanB = mean(scoresB);
|
|
91
|
+
const pooledStd = pooledStdDev(scoresA, scoresB);
|
|
92
|
+
if (pooledStd === 0) {
|
|
93
|
+
return 0;
|
|
94
|
+
}
|
|
95
|
+
return (meanB - meanA) / pooledStd;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Calculate confidence in the comparison based on sample size and variance.
|
|
99
|
+
* Higher sample sizes and lower variance produce higher confidence.
|
|
100
|
+
*/
|
|
101
|
+
calculateConfidence(resultsA, resultsB) {
|
|
102
|
+
const n = Math.min(resultsA.length, resultsB.length);
|
|
103
|
+
if (n === 0) {
|
|
104
|
+
return 0;
|
|
105
|
+
}
|
|
106
|
+
// Sample size factor: asymptotic approach to 1, reaching ~0.9 at n=50
|
|
107
|
+
const sizeFactor = 1 - Math.exp(-n / 20);
|
|
108
|
+
// Variance factor: lower combined variance = higher confidence
|
|
109
|
+
const scoresA = resultsA.map(r => r.reasoningQuality);
|
|
110
|
+
const scoresB = resultsB.map(r => r.reasoningQuality);
|
|
111
|
+
const combinedVariance = (variance(scoresA) + variance(scoresB)) / 2;
|
|
112
|
+
const varianceFactor = 1 / (1 + combinedVariance * 4);
|
|
113
|
+
// Balance factor: penalize unequal sample sizes
|
|
114
|
+
const balanceFactor = Math.min(resultsA.length, resultsB.length) /
|
|
115
|
+
Math.max(resultsA.length, resultsB.length);
|
|
116
|
+
const confidence = sizeFactor * varianceFactor * balanceFactor;
|
|
117
|
+
return clamp(confidence, 0, 1);
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Generate a human-readable summary of the comparison.
|
|
121
|
+
*/
|
|
122
|
+
generateSummary(result) {
|
|
123
|
+
const { versionA, versionB, comparison, winner } = result;
|
|
124
|
+
const nameA = versionA.version.versionId;
|
|
125
|
+
const nameB = versionB.version.versionId;
|
|
126
|
+
const lines = [];
|
|
127
|
+
if (winner === null) {
|
|
128
|
+
lines.push(`No significant difference between ${nameA} and ${nameB}.`);
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
const winnerName = winner === 'A' ? nameA : nameB;
|
|
132
|
+
const effectLabel = effectSizeLabel(Math.abs(comparison.effectSize));
|
|
133
|
+
lines.push(`${winnerName} is the winner with a ${effectLabel} effect size (d=${comparison.effectSize.toFixed(3)}).`);
|
|
134
|
+
}
|
|
135
|
+
lines.push(`Pass rate: ${nameA}=${pct(versionA.passRate)} vs ${nameB}=${pct(versionB.passRate)} (diff: ${signedPct(comparison.passRateDiff)}).`);
|
|
136
|
+
lines.push(`Avg reasoning quality: ${nameA}=${versionA.avgReasoningQuality.toFixed(3)} vs ${nameB}=${versionB.avgReasoningQuality.toFixed(3)} (diff: ${signed(comparison.reasoningQualityDiff)}).`);
|
|
137
|
+
lines.push(`Confidence: ${pct(comparison.confidence)}.`);
|
|
138
|
+
return lines.join(' ');
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Generate a Markdown-formatted report of the comparison.
|
|
142
|
+
*/
|
|
143
|
+
formatReport(result) {
|
|
144
|
+
const { versionA, versionB, comparison, winner, testCaseComparisons } = result;
|
|
145
|
+
const nameA = versionA.version.versionId;
|
|
146
|
+
const nameB = versionB.version.versionId;
|
|
147
|
+
const lines = [];
|
|
148
|
+
lines.push(`# Skill Version Comparison Report`);
|
|
149
|
+
lines.push('');
|
|
150
|
+
lines.push(`**Comparison ID:** ${result.comparisonId}`);
|
|
151
|
+
lines.push(`**Skill:** ${versionA.version.skillName}`);
|
|
152
|
+
lines.push(`**Date:** ${result.timestamp.toISOString()}`);
|
|
153
|
+
lines.push(`**Winner:** ${winner ?? 'No significant difference'}`);
|
|
154
|
+
lines.push('');
|
|
155
|
+
// Summary
|
|
156
|
+
lines.push(`## Summary`);
|
|
157
|
+
lines.push('');
|
|
158
|
+
lines.push(result.summary);
|
|
159
|
+
lines.push('');
|
|
160
|
+
// Metrics table
|
|
161
|
+
lines.push(`## Metrics`);
|
|
162
|
+
lines.push('');
|
|
163
|
+
lines.push(`| Metric | ${nameA} | ${nameB} | Diff |`);
|
|
164
|
+
lines.push(`|--------|---------|---------|------|`);
|
|
165
|
+
lines.push(`| Pass Rate | ${pct(versionA.passRate)} | ${pct(versionB.passRate)} | ${signedPct(comparison.passRateDiff)} |`);
|
|
166
|
+
lines.push(`| Avg Score | ${versionA.avgScore.toFixed(3)} | ${versionB.avgScore.toFixed(3)} | ${signed(comparison.scoreDiff)} |`);
|
|
167
|
+
lines.push(`| Reasoning Quality | ${versionA.avgReasoningQuality.toFixed(3)} | ${versionB.avgReasoningQuality.toFixed(3)} | ${signed(comparison.reasoningQualityDiff)} |`);
|
|
168
|
+
lines.push(`| Avg Execution (ms) | ${versionA.avgExecutionTimeMs.toFixed(0)} | ${versionB.avgExecutionTimeMs.toFixed(0)} | ${comparison.executionTimeDiff.toFixed(0)} |`);
|
|
169
|
+
lines.push(`| Total Tokens | ${versionA.totalTokens} | ${versionB.totalTokens} | ${versionB.totalTokens - versionA.totalTokens} |`);
|
|
170
|
+
lines.push('');
|
|
171
|
+
// Statistics
|
|
172
|
+
lines.push(`## Statistics`);
|
|
173
|
+
lines.push('');
|
|
174
|
+
lines.push(`| Statistic | Value |`);
|
|
175
|
+
lines.push(`|-----------|-------|`);
|
|
176
|
+
lines.push(`| Effect Size (Cohen's d) | ${comparison.effectSize.toFixed(4)} |`);
|
|
177
|
+
lines.push(`| Significant | ${comparison.isSignificant ? 'Yes' : 'No'} |`);
|
|
178
|
+
lines.push(`| Confidence | ${pct(comparison.confidence)} |`);
|
|
179
|
+
lines.push('');
|
|
180
|
+
// Per-test comparison
|
|
181
|
+
if (testCaseComparisons.length > 0) {
|
|
182
|
+
lines.push(`## Per-Test Comparison`);
|
|
183
|
+
lines.push('');
|
|
184
|
+
lines.push(`| Test ID | ${nameA} Score | ${nameB} Score | Diff | Winner |`);
|
|
185
|
+
lines.push(`|---------|--------------|--------------|------|--------|`);
|
|
186
|
+
for (const tc of testCaseComparisons) {
|
|
187
|
+
lines.push(`| ${tc.testId} | ${tc.versionAResult.reasoningQuality.toFixed(3)} | ${tc.versionBResult.reasoningQuality.toFixed(3)} | ${signed(tc.scoreDiff)} | ${tc.winner} |`);
|
|
188
|
+
}
|
|
189
|
+
lines.push('');
|
|
190
|
+
}
|
|
191
|
+
return lines.join('\n');
|
|
192
|
+
}
|
|
193
|
+
// ==========================================================================
|
|
194
|
+
// Private helpers
|
|
195
|
+
// ==========================================================================
|
|
196
|
+
buildVersionResult(version, results) {
|
|
197
|
+
const passed = results.filter(r => r.passed).length;
|
|
198
|
+
const passRate = results.length > 0 ? passed / results.length : 0;
|
|
199
|
+
const avgScore = mean(results.map(r => r.reasoningQuality));
|
|
200
|
+
const avgReasoningQuality = avgScore;
|
|
201
|
+
const executionTimes = results
|
|
202
|
+
.map(r => r.executionTimeMs ?? 0)
|
|
203
|
+
.filter(t => t > 0);
|
|
204
|
+
const avgExecutionTimeMs = executionTimes.length > 0 ? mean(executionTimes) : 0;
|
|
205
|
+
return {
|
|
206
|
+
version,
|
|
207
|
+
passRate,
|
|
208
|
+
avgScore,
|
|
209
|
+
avgReasoningQuality,
|
|
210
|
+
avgExecutionTimeMs,
|
|
211
|
+
totalTokens: 0,
|
|
212
|
+
testCaseResults: results,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
buildTestCaseComparisons(resultsA, resultsB) {
|
|
216
|
+
const mapB = new Map(resultsB.map(r => [r.testId, r]));
|
|
217
|
+
const comparisons = [];
|
|
218
|
+
for (const rA of resultsA) {
|
|
219
|
+
const rB = mapB.get(rA.testId);
|
|
220
|
+
if (!rB) {
|
|
221
|
+
continue;
|
|
222
|
+
}
|
|
223
|
+
const scoreDiff = rB.reasoningQuality - rA.reasoningQuality;
|
|
224
|
+
let winner;
|
|
225
|
+
if (Math.abs(scoreDiff) < SCORE_TIE_EPSILON) {
|
|
226
|
+
winner = 'tie';
|
|
227
|
+
}
|
|
228
|
+
else {
|
|
229
|
+
winner = scoreDiff > 0 ? 'B' : 'A';
|
|
230
|
+
}
|
|
231
|
+
comparisons.push({
|
|
232
|
+
testId: rA.testId,
|
|
233
|
+
versionAResult: rA,
|
|
234
|
+
versionBResult: rB,
|
|
235
|
+
scoreDiff,
|
|
236
|
+
winner,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
return comparisons;
|
|
240
|
+
}
|
|
241
|
+
buildComparisonStats(versionAResult, versionBResult, resultsA, resultsB) {
|
|
242
|
+
const scoresA = resultsA.map(r => r.reasoningQuality);
|
|
243
|
+
const scoresB = resultsB.map(r => r.reasoningQuality);
|
|
244
|
+
const effectSize = this.calculateEffectSize(scoresA, scoresB);
|
|
245
|
+
const confidence = this.calculateConfidence(resultsA, resultsB);
|
|
246
|
+
const sampleSufficient = Math.min(resultsA.length, resultsB.length) >= this.config.minTestCases;
|
|
247
|
+
const isSignificant = sampleSufficient && Math.abs(effectSize) > this.config.significanceThreshold;
|
|
248
|
+
return {
|
|
249
|
+
passRateDiff: versionBResult.passRate - versionAResult.passRate,
|
|
250
|
+
scoreDiff: versionBResult.avgScore - versionAResult.avgScore,
|
|
251
|
+
reasoningQualityDiff: versionBResult.avgReasoningQuality - versionAResult.avgReasoningQuality,
|
|
252
|
+
executionTimeDiff: versionBResult.avgExecutionTimeMs - versionAResult.avgExecutionTimeMs,
|
|
253
|
+
isSignificant,
|
|
254
|
+
effectSize,
|
|
255
|
+
confidence,
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
determineWinner(comparison, resultsA, resultsB) {
|
|
259
|
+
if (!comparison.isSignificant) {
|
|
260
|
+
return null;
|
|
261
|
+
}
|
|
262
|
+
if (Math.min(resultsA.length, resultsB.length) < this.config.minTestCases) {
|
|
263
|
+
return null;
|
|
264
|
+
}
|
|
265
|
+
if (comparison.scoreDiff > 0) {
|
|
266
|
+
return 'B';
|
|
267
|
+
}
|
|
268
|
+
else if (comparison.scoreDiff < 0) {
|
|
269
|
+
return 'A';
|
|
270
|
+
}
|
|
271
|
+
return null;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
// ============================================================================
|
|
275
|
+
// Factory
|
|
276
|
+
// ============================================================================
|
|
277
|
+
export function createVersionComparator(config) {
|
|
278
|
+
return new VersionComparator(config);
|
|
279
|
+
}
|
|
280
|
+
// ============================================================================
|
|
281
|
+
// Statistical helpers
|
|
282
|
+
// ============================================================================
|
|
283
|
+
function mean(values) {
|
|
284
|
+
if (values.length === 0)
|
|
285
|
+
return 0;
|
|
286
|
+
return values.reduce((sum, v) => sum + v, 0) / values.length;
|
|
287
|
+
}
|
|
288
|
+
function variance(values) {
|
|
289
|
+
if (values.length < 2)
|
|
290
|
+
return 0;
|
|
291
|
+
const m = mean(values);
|
|
292
|
+
return values.reduce((sum, v) => sum + (v - m) ** 2, 0) / (values.length - 1);
|
|
293
|
+
}
|
|
294
|
+
function stdDev(values) {
|
|
295
|
+
return Math.sqrt(variance(values));
|
|
296
|
+
}
|
|
297
|
+
function pooledStdDev(a, b) {
|
|
298
|
+
const nA = a.length;
|
|
299
|
+
const nB = b.length;
|
|
300
|
+
if (nA + nB < 2)
|
|
301
|
+
return 0;
|
|
302
|
+
const varA = variance(a);
|
|
303
|
+
const varB = variance(b);
|
|
304
|
+
const pooledVar = ((nA - 1) * varA + (nB - 1) * varB) / (nA + nB - 2);
|
|
305
|
+
return Math.sqrt(pooledVar);
|
|
306
|
+
}
|
|
307
|
+
function clamp(value, min, max) {
|
|
308
|
+
return Math.min(Math.max(value, min), max);
|
|
309
|
+
}
|
|
310
|
+
function effectSizeLabel(d) {
|
|
311
|
+
if (d < 0.2)
|
|
312
|
+
return 'negligible';
|
|
313
|
+
if (d < 0.5)
|
|
314
|
+
return 'small';
|
|
315
|
+
if (d < 0.8)
|
|
316
|
+
return 'medium';
|
|
317
|
+
return 'large';
|
|
318
|
+
}
|
|
319
|
+
const pct = (v) => `${(v * 100).toFixed(1)}%`;
|
|
320
|
+
const signedPct = (v) => { const s = pct(v); return v > 0 ? `+${s}` : s; };
|
|
321
|
+
const signed = (v) => { const s = v.toFixed(3); return v > 0 ? `+${s}` : s; };
|
|
322
|
+
//# sourceMappingURL=version-comparator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"version-comparator.js","sourceRoot":"","sources":["../../src/validation/version-comparator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAKpC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEpD,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC;AA2E1D,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,MAAM,cAAc,GAA4B;IAC9C,YAAY,EAAE,CAAC;IACf,qBAAqB,EAAE,IAAI;IAC3B,QAAQ,EAAE,IAAI;CACf,CAAC;AAEF,MAAM,iBAAiB,GAAG,IAAI,CAAC;AAE/B,+EAA+E;AAC/E,oBAAoB;AACpB,+EAA+E;AAE/E,MAAM,OAAO,iBAAiB;IACX,MAAM,CAA0B;IAEjD,YAAY,SAA2C,EAAE;QACvD,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,OAAO,CACL,QAAsB,EACtB,QAAsB,EACtB,QAAkE;QAElE,MAAM,CAAC,IAAI,CAAC,wCAAwC,EAAE;YACpD,SAAS,EAAE,QAAQ,CAAC,SAAS;YAC7B,QAAQ,EAAE,QAAQ,CAAC,SAAS;YAC5B,QAAQ,EAAE,QAAQ,CAAC,SAAS;SAC7B,CAAC,CAAC;QAEH,OAAO,IAAI,CAAC,kBAAkB,CAC5B,QAAQ,EACR,QAAQ,CAAC,CAAC,CAAC,eAAe,EAC1B,QAAQ,EACR,QAAQ,CAAC,CAAC,CAAC,eAAe,CAC3B,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,kBAAkB,CAChB,QAAsB,EACtB,QAA0B,EAC1B,QAAsB,EACtB,QAA0B;QAE1B,MAAM,YAAY,GAAG,OAAO,UAAU,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;QACxD,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC;QAE7B,MAAM,CAAC,IAAI,CAAC,uCAAuC,EAAE;YACnD,YAAY;YACZ,QAAQ,EAAE,QAAQ,CAAC,SAAS;YAC5B,QAAQ,EAAE,QAAQ,CAAC,SAAS;YAC5B,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,UAAU,EAAE,QAAQ,CAAC,MAAM;SAC5B,CAAC,CAAC;QAEH,MAAM,cAAc,GAAG,IAAI,CAAC,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACnE,MAAM,cAAc,GAAG,IAAI,CAAC,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACnE,MAAM,mBAAmB,GAAG,IAAI,CAAC,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAC9E,MAAM,UAAU,GAAG,IAAI,CAAC,oBAAoB,CAAC,cAAc,EAAE,cAAc,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACjG,MAAM,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,UAAU,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAEpE,MAAM,MAAM,GAA4B;YACtC,YAAY;YACZ,SAAS;YACT,QAAQ,EAAE,cAAc;YACxB,QAAQ,EAAE,cAAc;YACxB,UAAU;YACV,MAAM;YACN,OAAO,EAAE,EAAE;YACX,mBAAmB;SACpB,CAAC;QAEF,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC;QAE9C,MAAM,CAAC,IAAI,CAAC,qBAAqB,EAAE;YACjC,YAAY;YACZ,MAAM;YACN,UAAU,EAAE,UAAU,CAAC,UAAU;YACjC,aAAa,EAAE,UAAU,CAAC,aAAa;SACxC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACH,mBAAmB,CAAC,OAAiB,EAAE,OAAiB;QACtD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjD,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5B,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5B,MAAM,SAAS,GAAG,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAEjD,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;YACpB,OAAO,CAAC,CAAC;QACX,CAAC;QAED,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,GAAG,SAAS,CAAC;IACrC,CAAC;IAED;;;OAGG;IACH,mBAAmB,CAAC,QAA0B,EAAE,QAA0B;QACxE,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;QACrD,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACZ,OAAO,CAAC,CAAC;QACX,CAAC;QAED,sEAAsE;QACtE,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QAEzC,+DAA+D;QAC/D,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC;QACtD,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC;QACtD,MAAM,gBAAgB,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC;QACrE,MAAM,cAAc,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,gBAAgB,GAAG,CAAC,CAAC,CAAC;QAEtD,gDAAgD;QAChD,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC;YAC9D,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;QAE7C,MAAM,UAAU,GAAG,UAAU,GAAG,cAAc,GAAG,aAAa,CAAC;QAC/D,OAAO,KAAK,CAAC,UAAU,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,MAA+B;QAC7C,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;QAC1D,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC;QACzC,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC;QAEzC,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;YACpB,KAAK,CAAC,IAAI,CACR,qCAAqC,KAAK,QAAQ,KAAK,GAAG,CAC3D,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,MAAM,UAAU,GAAG,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;YAClD,MAAM,WAAW,GAAG,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;YACrE,KAAK,CAAC,IAAI,CACR,GAAG,UAAU,yBAAyB,WAAW,mBAAmB,UAAU,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CACzG,CAAC;QACJ,CAAC;QAED,KAAK,CAAC,IAAI,CACR,cAAc,KAAK,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,KAAK,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,WAAW,SAAS,CAAC,UAAU,CAAC,YAAY,CAAC,IAAI,CACrI,CAAC;QACF,KAAK,CAAC,IAAI,CACR,0BAA0B,KAAK,IAAI,QAAQ,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,QAAQ,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,MAAM,CAAC,UAAU,CAAC,oBAAoB,CAAC,IAAI,CACxL,CAAC;QACF,KAAK,CAAC,IAAI,CACR,eAAe,GAAG,CAAC,UAAU,CAAC,UAAU,CAAC,GAAG,CAC7C,CAAC;QAEF,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,MAA+B;QAC1C,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,EAAE,mBAAmB,EAAE,GAAG,MAAM,CAAC;QAC/E,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC;QACzC,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC;QAEzC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;QAChD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,sBAAsB,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QACxD,KAAK,CAAC,IAAI,CAAC,cAAc,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;QACvD,KAAK,CAAC,IAAI,CAAC,aAAa,MAAM,CAAC,SAAS,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;QAC1D,KAAK,CAAC,IAAI,CAAC,eAAe,MAAM,IAAI,2BAA2B,EAAE,CAAC,CAAC;QACnE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,UAAU;QACV,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACzB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,gBAAgB;QAChB,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACzB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,cAAc,KAAK,MAAM,KAAK,WAAW,CAAC,CAAC;QACtD,KAAK,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;QACpD,KAAK,CAAC,IAAI,CAAC,iBAAiB,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,SAAS,CAAC,UAAU,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QAC5H,KAAK,CAAC,IAAI,CAAC,iBAAiB,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAClI,KAAK,CAAC,IAAI,CAAC,yBAAyB,QAAQ,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,UAAU,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC;QAC3K,KAAK,CAAC,IAAI,CAAC,0BAA0B,QAAQ,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,UAAU,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC1K,KAAK,CAAC,IAAI,CAAC,oBAAoB,QAAQ,CAAC,WAAW,MAAM,QAAQ,CAAC,WAAW,MAAM,QAAQ,CAAC,WAAW,GAAG,QAAQ,CAAC,WAAW,IAAI,CAAC,CAAC;QACpI,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,aAAa;QACb,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;QACpC,KAAK,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;QACpC,KAAK,CAAC,IAAI,CAAC,+BAA+B,UAAU,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAChF,KAAK,CAAC,IAAI,CAAC,mBAAmB,UAAU,CAAC,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;QAC3E,KAAK,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAC7D,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,sBAAsB;QACtB,IAAI,mBAAmB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,KAAK,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;YACrC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,eAAe,KAAK,YAAY,KAAK,0BAA0B,CAAC,CAAC;YAC5E,KAAK,CAAC,IAAI,CAAC,2DAA2D,CAAC,CAAC;YACxE,KAAK,MAAM,EAAE,IAAI,mBAAmB,EAAE,CAAC;gBACrC,KAAK,CAAC,IAAI,CACR,KAAK,EAAE,CAAC,MAAM,MAAM,EAAE,CAAC,cAAc,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,cAAc,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,EAAE,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,MAAM,IAAI,CAClK,CAAC;YACJ,CAAC;YACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjB,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED,6EAA6E;IAC7E,kBAAkB;IAClB,6EAA6E;IAErE,kBAAkB,CAAC,OAAqB,EAAE,OAAyB;QACzE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QACpD,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC;QAC5D,MAAM,mBAAmB,GAAG,QAAQ,CAAC;QACrC,MAAM,cAAc,GAAG,OAAO;aAC3B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC;aAChC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACtB,MAAM,kBAAkB,GAAG,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhF,OAAO;YACL,OAAO;YACP,QAAQ;YACR,QAAQ;YACR,mBAAmB;YACnB,kBAAkB;YAClB,WAAW,EAAE,CAAC;YACd,eAAe,EAAE,OAAO;SACzB,CAAC;IACJ,CAAC;IAEO,wBAAwB,CAC9B,QAA0B,EAC1B,QAA0B;QAE1B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACvD,MAAM,WAAW,GAAyB,EAAE,CAAC;QAE7C,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC1B,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;YAC/B,IAAI,CAAC,EAAE,EAAE,CAAC;gBACR,SAAS;YACX,CAAC;YAED,MAAM,SAAS,GAAG,EAAE,CAAC,gBAAgB,GAAG,EAAE,CAAC,gBAAgB,CAAC;YAC5D,IAAI,MAAyB,CAAC;YAC9B,IAAI,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,iBAAiB,EAAE,CAAC;gBAC5C,MAAM,GAAG,KAAK,CAAC;YACjB,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;YACrC,CAAC;YAED,WAAW,CAAC,IAAI,CAAC;gBACf,MAAM,EAAE,EAAE,CAAC,MAAM;gBACjB,cAAc,EAAE,EAAE;gBAClB,cAAc,EAAE,EAAE;gBAClB,SAAS;gBACT,MAAM;aACP,CAAC,CAAC;QACL,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAEO,oBAAoB,CAC1B,cAA6B,EAC7B,cAA6B,EAC7B,QAA0B,EAC1B,QAA0B;QAE1B,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC;QACtD,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC;QAEtD,MAAM,UAAU,GAAG,IAAI,CAAC,mBAAmB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAC9D,MAAM,UAAU,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAChE,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;QAChG,MAAM,aAAa,GAAG,gBAAgB,IAAI,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;QAEnG,OAAO;YACL,YAAY,EAAE,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;YAC/D,SAAS,EAAE,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;YAC5D,oBAAoB,EAAE,cAAc,CAAC,mBAAmB,GAAG,cAAc,CAAC,mBAAmB;YAC7F,iBAAiB,EAAE,cAAc,CAAC,kBAAkB,GAAG,cAAc,CAAC,kBAAkB;YACxF,aAAa;YACb,UAAU;YACV,UAAU;SACX,CAAC;IACJ,CAAC;IAEO,eAAe,CACrB,UAA2B,EAC3B,QAA0B,EAC1B,QAA0B;QAE1B,IAAI,CAAC,UAAU,CAAC,aAAa,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;YAC1E,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,UAAU,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,GAAG,CAAC;QACb,CAAC;aAAM,IAAI,UAAU,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;YACpC,OAAO,GAAG,CAAC;QACb,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAED,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E,MAAM,UAAU,uBAAuB,CACrC,MAAyC;IAEzC,OAAO,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;AACvC,CAAC;AAED,+EAA+E;AAC/E,sBAAsB;AACtB,+EAA+E;AAE/E,SAAS,IAAI,CAAC,MAAgB;IAC5B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAClC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AAC/D,CAAC;AAED,SAAS,QAAQ,CAAC,MAAgB;IAChC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAChC,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AAChF,CAAC;AAED,SAAS,MAAM,CAAC,MAAgB;IAC9B,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;AACrC,CAAC;AAED,SAAS,YAAY,CAAC,CAAW,EAAE,CAAW;IAC5C,MAAM,EAAE,GAAG,CAAC,CAAC,MAAM,CAAC;IACpB,MAAM,EAAE,GAAG,CAAC,CAAC,MAAM,CAAC;IACpB,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAE1B,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;IACzB,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;IAEzB,MAAM,SAAS,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IACtE,OAAO,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;AAC9B,CAAC;AAED,SAAS,KAAK,CAAC,KAAa,EAAE,GAAW,EAAE,GAAW;IACpD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;AAC7C,CAAC;AAED,SAAS,eAAe,CAAC,CAAS;IAChC,IAAI,CAAC,GAAG,GAAG;QAAE,OAAO,YAAY,CAAC;IACjC,IAAI,CAAC,GAAG,GAAG;QAAE,OAAO,OAAO,CAAC;IAC5B,IAAI,CAAC,GAAG,GAAG;QAAE,OAAO,QAAQ,CAAC;IAC7B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;AACtD,MAAM,SAAS,GAAG,CAAC,CAAS,EAAE,EAAE,GAAG,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AACnF,MAAM,MAAM,GAAG,CAAC,CAAS,EAAE,EAAE,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentic-qe",
|
|
3
|
-
"version": "3.7.
|
|
3
|
+
"version": "3.7.13",
|
|
4
4
|
"description": "Agentic Quality Engineering V3 - Domain-Driven Design Architecture with 13 Bounded Contexts, O(log n) coverage analysis, ReasoningBank learning, 60 specialized QE agents, mathematical Coherence verification, deep Claude Flow integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|