@skillmark/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-entry-point.d.ts +3 -0
- package/dist/cli-entry-point.d.ts.map +1 -0
- package/dist/cli-entry-point.js +207 -0
- package/dist/cli-entry-point.js.map +1 -0
- package/dist/commands/auth-setup-and-token-storage-command.d.ts +21 -0
- package/dist/commands/auth-setup-and-token-storage-command.d.ts.map +1 -0
- package/dist/commands/auth-setup-and-token-storage-command.js +166 -0
- package/dist/commands/auth-setup-and-token-storage-command.js.map +1 -0
- package/dist/commands/publish-results-command.d.ts +21 -0
- package/dist/commands/publish-results-command.d.ts.map +1 -0
- package/dist/commands/publish-results-command.js +256 -0
- package/dist/commands/publish-results-command.js.map +1 -0
- package/dist/commands/run-benchmark-command.d.ts +6 -0
- package/dist/commands/run-benchmark-command.d.ts.map +1 -0
- package/dist/commands/run-benchmark-command.js +331 -0
- package/dist/commands/run-benchmark-command.js.map +1 -0
- package/dist/commands/view-leaderboard-command.d.ts +8 -0
- package/dist/commands/view-leaderboard-command.d.ts.map +1 -0
- package/dist/commands/view-leaderboard-command.js +169 -0
- package/dist/commands/view-leaderboard-command.js.map +1 -0
- package/dist/config/api-key-config-reader.d.ts +14 -0
- package/dist/config/api-key-config-reader.d.ts.map +1 -0
- package/dist/config/api-key-config-reader.js +107 -0
- package/dist/config/api-key-config-reader.js.map +1 -0
- package/dist/config/api-key-config-reader.test.d.ts +2 -0
- package/dist/config/api-key-config-reader.test.d.ts.map +1 -0
- package/dist/config/api-key-config-reader.test.js +21 -0
- package/dist/config/api-key-config-reader.test.js.map +1 -0
- package/dist/engine/claude-cli-executor.d.ts +33 -0
- package/dist/engine/claude-cli-executor.d.ts.map +1 -0
- package/dist/engine/claude-cli-executor.js +251 -0
- package/dist/engine/claude-cli-executor.js.map +1 -0
- package/dist/engine/concept-accuracy-scorer.d.ts +24 -0
- package/dist/engine/concept-accuracy-scorer.d.ts.map +1 -0
- package/dist/engine/concept-accuracy-scorer.js +186 -0
- package/dist/engine/concept-accuracy-scorer.js.map +1 -0
- package/dist/engine/concept-accuracy-scorer.test.d.ts +2 -0
- package/dist/engine/concept-accuracy-scorer.test.d.ts.map +1 -0
- package/dist/engine/concept-accuracy-scorer.test.js +230 -0
- package/dist/engine/concept-accuracy-scorer.test.js.map +1 -0
- package/dist/engine/enhanced-test-prompt-builder.d.ts +30 -0
- package/dist/engine/enhanced-test-prompt-builder.d.ts.map +1 -0
- package/dist/engine/enhanced-test-prompt-builder.js +134 -0
- package/dist/engine/enhanced-test-prompt-builder.js.map +1 -0
- package/dist/engine/markdown-test-definition-parser.d.ts +18 -0
- package/dist/engine/markdown-test-definition-parser.d.ts.map +1 -0
- package/dist/engine/markdown-test-definition-parser.js +525 -0
- package/dist/engine/markdown-test-definition-parser.js.map +1 -0
- package/dist/engine/markdown-test-definition-parser.test.d.ts +2 -0
- package/dist/engine/markdown-test-definition-parser.test.d.ts.map +1 -0
- package/dist/engine/markdown-test-definition-parser.test.js +265 -0
- package/dist/engine/markdown-test-definition-parser.test.js.map +1 -0
- package/dist/engine/retry-with-degrade-utils.d.ts +58 -0
- package/dist/engine/retry-with-degrade-utils.d.ts.map +1 -0
- package/dist/engine/retry-with-degrade-utils.js +86 -0
- package/dist/engine/retry-with-degrade-utils.js.map +1 -0
- package/dist/engine/skill-content-collector.d.ts +53 -0
- package/dist/engine/skill-content-collector.d.ts.map +1 -0
- package/dist/engine/skill-content-collector.js +157 -0
- package/dist/engine/skill-content-collector.js.map +1 -0
- package/dist/engine/skill-creator-invoker.d.ts +36 -0
- package/dist/engine/skill-creator-invoker.d.ts.map +1 -0
- package/dist/engine/skill-creator-invoker.js +222 -0
- package/dist/engine/skill-creator-invoker.js.map +1 -0
- package/dist/engine/transcript-jsonl-parser.d.ts +28 -0
- package/dist/engine/transcript-jsonl-parser.d.ts.map +1 -0
- package/dist/engine/transcript-jsonl-parser.js +175 -0
- package/dist/engine/transcript-jsonl-parser.js.map +1 -0
- package/dist/sources/git-repository-skill-source-handler.d.ts +18 -0
- package/dist/sources/git-repository-skill-source-handler.d.ts.map +1 -0
- package/dist/sources/git-repository-skill-source-handler.js +119 -0
- package/dist/sources/git-repository-skill-source-handler.js.map +1 -0
- package/dist/sources/local-skill-source-handler.d.ts +21 -0
- package/dist/sources/local-skill-source-handler.d.ts.map +1 -0
- package/dist/sources/local-skill-source-handler.js +138 -0
- package/dist/sources/local-skill-source-handler.js.map +1 -0
- package/dist/sources/local-skill-source-handler.test.d.ts +2 -0
- package/dist/sources/local-skill-source-handler.test.d.ts.map +1 -0
- package/dist/sources/local-skill-source-handler.test.js +55 -0
- package/dist/sources/local-skill-source-handler.test.js.map +1 -0
- package/dist/sources/skillsh-registry-source-handler.d.ts +18 -0
- package/dist/sources/skillsh-registry-source-handler.d.ts.map +1 -0
- package/dist/sources/skillsh-registry-source-handler.js +130 -0
- package/dist/sources/skillsh-registry-source-handler.js.map +1 -0
- package/dist/sources/unified-skill-source-resolver.d.ts +20 -0
- package/dist/sources/unified-skill-source-resolver.d.ts.map +1 -0
- package/dist/sources/unified-skill-source-resolver.js +64 -0
- package/dist/sources/unified-skill-source-resolver.js.map +1 -0
- package/dist/sources/unified-skill-source-resolver.test.d.ts +2 -0
- package/dist/sources/unified-skill-source-resolver.test.d.ts.map +1 -0
- package/dist/sources/unified-skill-source-resolver.test.js +84 -0
- package/dist/sources/unified-skill-source-resolver.test.js.map +1 -0
- package/dist/types/benchmark-types.d.ts +142 -0
- package/dist/types/benchmark-types.d.ts.map +1 -0
- package/dist/types/benchmark-types.js +5 -0
- package/dist/types/benchmark-types.js.map +1 -0
- package/dist/types/index.d.ts +5 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/types/index.js.map +1 -0
- package/package.json +38 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Concept accuracy scorer - calculates how well responses match expected concepts
|
|
3
|
+
*/
|
|
4
|
+
import type { TestDefinition, TestResult, BenchmarkMetrics } from '../types/index.js';
|
|
5
|
+
/** Scoring options */
|
|
6
|
+
export interface ScoringOptions {
|
|
7
|
+
/** Minimum similarity threshold for fuzzy matching (0-1) */
|
|
8
|
+
fuzzyThreshold?: number;
|
|
9
|
+
/** Whether to use case-insensitive matching */
|
|
10
|
+
caseInsensitive?: boolean;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Score a response against expected concepts
|
|
14
|
+
*/
|
|
15
|
+
export declare function scoreResponse(test: TestDefinition, response: string, metrics: BenchmarkMetrics, options?: ScoringOptions): TestResult;
|
|
16
|
+
/**
|
|
17
|
+
* Aggregate metrics from multiple test results
|
|
18
|
+
*/
|
|
19
|
+
export declare function aggregateMetrics(results: TestResult[]): BenchmarkMetrics;
|
|
20
|
+
/**
|
|
21
|
+
* Calculate pass rate from test results
|
|
22
|
+
*/
|
|
23
|
+
export declare function calculatePassRate(results: TestResult[]): number;
|
|
24
|
+
//# sourceMappingURL=concept-accuracy-scorer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"concept-accuracy-scorer.d.ts","sourceRoot":"","sources":["../../src/engine/concept-accuracy-scorer.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,KAAK,EAAE,cAAc,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAEtF,sBAAsB;AACtB,MAAM,WAAW,cAAc;IAC7B,4DAA4D;IAC5D,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,+CAA+C;IAC/C,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAOD;;GAEG;AACH,wBAAgB,aAAa,CAC3B,IAAI,EAAE,cAAc,EACpB,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,gBAAgB,EACzB,OAAO,GAAE,cAAmB,GAC3B,UAAU,CA4DZ;AAmFD;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,UAAU,EAAE,GAAG,gBAAgB,CA6CxE;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,UAAU,EAAE,GAAG,MAAM,CAI/D"}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
const DEFAULT_OPTIONS = {
|
|
2
|
+
fuzzyThreshold: 0.8,
|
|
3
|
+
caseInsensitive: true,
|
|
4
|
+
};
|
|
5
|
+
/**
|
|
6
|
+
* Score a response against expected concepts
|
|
7
|
+
*/
|
|
8
|
+
export function scoreResponse(test, response, metrics, options = {}) {
|
|
9
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
10
|
+
const normalizedResponse = opts.caseInsensitive ? response.toLowerCase() : response;
|
|
11
|
+
const matchedConcepts = [];
|
|
12
|
+
const missedConcepts = [];
|
|
13
|
+
// Check each expected concept
|
|
14
|
+
for (const concept of test.concepts) {
|
|
15
|
+
const normalizedConcept = opts.caseInsensitive ? concept.toLowerCase() : concept;
|
|
16
|
+
if (conceptMatches(normalizedResponse, normalizedConcept, opts.fuzzyThreshold)) {
|
|
17
|
+
matchedConcepts.push(concept);
|
|
18
|
+
}
|
|
19
|
+
else {
|
|
20
|
+
missedConcepts.push(concept);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
// Also check expected patterns from test definition
|
|
24
|
+
for (const expected of test.expected) {
|
|
25
|
+
// Parse checkbox items like "- [ ] Pattern to check"
|
|
26
|
+
const checkboxMatch = expected.match(/^-\s*\[[\sx]\]\s*(.+)$/i);
|
|
27
|
+
const pattern = checkboxMatch ? checkboxMatch[1] : expected;
|
|
28
|
+
const normalizedPattern = opts.caseInsensitive ? pattern.toLowerCase() : pattern;
|
|
29
|
+
// Skip if already in concepts
|
|
30
|
+
if (test.concepts.some((c) => c.toLowerCase() === normalizedPattern)) {
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
if (conceptMatches(normalizedResponse, normalizedPattern, opts.fuzzyThreshold)) {
|
|
34
|
+
if (!matchedConcepts.includes(pattern)) {
|
|
35
|
+
matchedConcepts.push(pattern);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
if (!missedConcepts.includes(pattern)) {
|
|
40
|
+
missedConcepts.push(pattern);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
// Calculate accuracy
|
|
45
|
+
const totalConcepts = matchedConcepts.length + missedConcepts.length;
|
|
46
|
+
const accuracy = totalConcepts > 0 ? (matchedConcepts.length / totalConcepts) * 100 : 0;
|
|
47
|
+
// Update metrics with calculated accuracy
|
|
48
|
+
const scoredMetrics = {
|
|
49
|
+
...metrics,
|
|
50
|
+
accuracy,
|
|
51
|
+
};
|
|
52
|
+
return {
|
|
53
|
+
test,
|
|
54
|
+
metrics: scoredMetrics,
|
|
55
|
+
matchedConcepts,
|
|
56
|
+
missedConcepts,
|
|
57
|
+
response,
|
|
58
|
+
timestamp: new Date().toISOString(),
|
|
59
|
+
passed: accuracy >= 70, // Default passing threshold
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Check if a concept is present in the response
|
|
64
|
+
*/
|
|
65
|
+
function conceptMatches(response, concept, fuzzyThreshold) {
|
|
66
|
+
// Direct substring match
|
|
67
|
+
if (response.includes(concept)) {
|
|
68
|
+
return true;
|
|
69
|
+
}
|
|
70
|
+
// Word-by-word match for multi-word concepts
|
|
71
|
+
const conceptWords = concept.split(/\s+/).filter((w) => w.length > 2);
|
|
72
|
+
if (conceptWords.length > 1) {
|
|
73
|
+
const matchedWords = conceptWords.filter((word) => response.includes(word));
|
|
74
|
+
const ratio = matchedWords.length / conceptWords.length;
|
|
75
|
+
if (ratio >= fuzzyThreshold) {
|
|
76
|
+
return true;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// Check for synonyms/variations
|
|
80
|
+
const variations = generateVariations(concept);
|
|
81
|
+
for (const variation of variations) {
|
|
82
|
+
if (response.includes(variation)) {
|
|
83
|
+
return true;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return false;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Generate common variations of a concept
|
|
90
|
+
*/
|
|
91
|
+
function generateVariations(concept) {
|
|
92
|
+
const variations = [];
|
|
93
|
+
// Hyphenated vs spaced
|
|
94
|
+
if (concept.includes('-')) {
|
|
95
|
+
variations.push(concept.replace(/-/g, ' '));
|
|
96
|
+
}
|
|
97
|
+
if (concept.includes(' ')) {
|
|
98
|
+
variations.push(concept.replace(/\s+/g, '-'));
|
|
99
|
+
}
|
|
100
|
+
// Plural/singular
|
|
101
|
+
if (concept.endsWith('s') && concept.length > 3) {
|
|
102
|
+
variations.push(concept.slice(0, -1));
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
variations.push(concept + 's');
|
|
106
|
+
}
|
|
107
|
+
// Common abbreviations
|
|
108
|
+
const abbreviations = {
|
|
109
|
+
'context': ['ctx'],
|
|
110
|
+
'configuration': ['config', 'cfg'],
|
|
111
|
+
'documentation': ['docs', 'doc'],
|
|
112
|
+
'application': ['app'],
|
|
113
|
+
'authentication': ['auth'],
|
|
114
|
+
'authorization': ['authz'],
|
|
115
|
+
'database': ['db'],
|
|
116
|
+
'message': ['msg'],
|
|
117
|
+
'response': ['resp'],
|
|
118
|
+
'request': ['req'],
|
|
119
|
+
};
|
|
120
|
+
for (const [full, abbrs] of Object.entries(abbreviations)) {
|
|
121
|
+
if (concept.includes(full)) {
|
|
122
|
+
for (const abbr of abbrs) {
|
|
123
|
+
variations.push(concept.replace(full, abbr));
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
for (const abbr of abbrs) {
|
|
127
|
+
if (concept.includes(abbr)) {
|
|
128
|
+
variations.push(concept.replace(abbr, full));
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return variations;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Aggregate metrics from multiple test results
|
|
136
|
+
*/
|
|
137
|
+
export function aggregateMetrics(results) {
|
|
138
|
+
if (results.length === 0) {
|
|
139
|
+
return {
|
|
140
|
+
accuracy: 0,
|
|
141
|
+
tokensTotal: 0,
|
|
142
|
+
tokensInput: 0,
|
|
143
|
+
tokensOutput: 0,
|
|
144
|
+
durationMs: 0,
|
|
145
|
+
toolCount: 0,
|
|
146
|
+
costUsd: 0,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
const sum = results.reduce((acc, r) => ({
|
|
150
|
+
accuracy: acc.accuracy + r.metrics.accuracy,
|
|
151
|
+
tokensTotal: acc.tokensTotal + r.metrics.tokensTotal,
|
|
152
|
+
tokensInput: acc.tokensInput + r.metrics.tokensInput,
|
|
153
|
+
tokensOutput: acc.tokensOutput + r.metrics.tokensOutput,
|
|
154
|
+
durationMs: acc.durationMs + r.metrics.durationMs,
|
|
155
|
+
toolCount: acc.toolCount + r.metrics.toolCount,
|
|
156
|
+
costUsd: acc.costUsd + r.metrics.costUsd,
|
|
157
|
+
}), {
|
|
158
|
+
accuracy: 0,
|
|
159
|
+
tokensTotal: 0,
|
|
160
|
+
tokensInput: 0,
|
|
161
|
+
tokensOutput: 0,
|
|
162
|
+
durationMs: 0,
|
|
163
|
+
toolCount: 0,
|
|
164
|
+
costUsd: 0,
|
|
165
|
+
});
|
|
166
|
+
const count = results.length;
|
|
167
|
+
return {
|
|
168
|
+
accuracy: sum.accuracy / count,
|
|
169
|
+
tokensTotal: Math.round(sum.tokensTotal / count),
|
|
170
|
+
tokensInput: Math.round(sum.tokensInput / count),
|
|
171
|
+
tokensOutput: Math.round(sum.tokensOutput / count),
|
|
172
|
+
durationMs: Math.round(sum.durationMs / count),
|
|
173
|
+
toolCount: Math.round(sum.toolCount / count),
|
|
174
|
+
costUsd: sum.costUsd / count,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Calculate pass rate from test results
|
|
179
|
+
*/
|
|
180
|
+
export function calculatePassRate(results) {
|
|
181
|
+
if (results.length === 0)
|
|
182
|
+
return 0;
|
|
183
|
+
const passed = results.filter((r) => r.passed).length;
|
|
184
|
+
return (passed / results.length) * 100;
|
|
185
|
+
}
|
|
186
|
+
//# sourceMappingURL=concept-accuracy-scorer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"concept-accuracy-scorer.js","sourceRoot":"","sources":["../../src/engine/concept-accuracy-scorer.ts"],"names":[],"mappings":"AAaA,MAAM,eAAe,GAA6B;IAChD,cAAc,EAAE,GAAG;IACnB,eAAe,EAAE,IAAI;CACtB,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,aAAa,CAC3B,IAAoB,EACpB,QAAgB,EAChB,OAAyB,EACzB,UAA0B,EAAE;IAE5B,MAAM,IAAI,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;IAChD,MAAM,kBAAkB,GAAG,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;IAEpF,MAAM,eAAe,GAAa,EAAE,CAAC;IACrC,MAAM,cAAc,GAAa,EAAE,CAAC;IAEpC,8BAA8B;IAC9B,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,MAAM,iBAAiB,GAAG,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;QAEjF,IAAI,cAAc,CAAC,kBAAkB,EAAE,iBAAiB,EAAE,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC;YAC/E,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAChC,CAAC;aAAM,CAAC;YACN,cAAc,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,oDAAoD;IACpD,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QACrC,qDAAqD;QACrD,MAAM,aAAa,GAAG,QAAQ,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAChE,MAAM,OAAO,GAAG,aAAa,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC5D,MAAM,iBAAiB,GAAG,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;QAEjF,8BAA8B;QAC9B,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,KAAK,iBAAiB,CAAC,EAAE,CAAC;YACrE,SAAS;QACX,CAAC;QAED,IAAI,cAAc,CAAC,kBAAkB,EAAE,iBAAiB,EAAE,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC;YAC/E,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACvC,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAChC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACtC,cAAc,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC/B,CAAC;QACH,CAAC;IACH,CAAC;IAED,qBAAqB;IACrB,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC;IACrE,MAAM,QAAQ,GAAG,aAAa,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,MAAM,GAAG,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAExF,0CAA0C;IAC1C,MAAM,aAAa,GAAqB;QACtC,GAAG,OAAO;QACV,QAAQ;KACT,CAAC;IAEF,OAAO;QACL,IAAI;QACJ,OAAO,EAAE,aAAa;QACtB,eAAe;QACf,cAAc;QACd,QAAQ;QACR,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,MAAM,EAAE,QAAQ,IAAI,EAAE,EAAE,4BAA4B;KACrD,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,QAAgB,EAAE,OAAe,EAAE,cAAsB;IAC/E,yBAAyB;IACzB,IAAI,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QAC/B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,6CAA6C;IAC7C,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACtE,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,YAAY,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;QAC5E,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC;QACxD,IAAI,KAAK,IAAI,cAAc,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,gCAAgC;IAChC,MAAM,UAAU,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC/C,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,IAAI,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,OAAe;IACzC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,uBAAuB;IACvB,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC1B,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC;IAC9C,CAAC;IACD,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC1B,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC;IAChD,CAAC;IAED,kBAAkB;IAClB,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChD,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC;SAAM,CAAC;QACN,UAAU,CAAC,IAAI,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC;IACjC,CAAC;IAED,uBAAuB;IACvB,MAAM,aAAa,GAA6B;QAC9C,SAAS,EAAE,CAAC,KAAK,CAAC;QAClB,eAAe,EAAE,CAAC,QAAQ,EAAE,KAAK,CAAC;QAClC,eAAe,EAAE,CAAC,MAAM,EAAE,KAAK,CAAC;QAChC,aAAa,EAAE,CAAC,KAAK,CAAC;QACtB,gBAAgB,EAAE,CAAC,MAAM,CAAC;QAC1B,eAAe,EAAE,CAAC,OAAO,CAAC;QAC1B,UAAU,EAAE,CAAC,IAAI,CAAC;QAClB,SAAS,EAAE,CAAC,KAAK,CAAC;QAClB,UAAU,EAAE,CAAC,MAAM,CAAC;QACpB,SAAS,EAAE,CAAC,KAAK,CAAC;KACnB,CAAC;IAEF,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1D,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;YAC/C,CAAC;QACH,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC3B,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;YAC/C,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAqB;IACpD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACL,QAAQ,EAAE,CAAC;YACX,WAAW,EAAE,CAAC;YACd,WAAW,EAAE,CAAC;YACd,YAAY,EAAE,CAAC;YACf,UAAU,EAAE,CAAC;YACb,SAAS,EAAE,CAAC;YACZ,OAAO,EAAE,CAAC;SACX,CAAC;IACJ,CAAC;IAED,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,CACxB,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QACX,QAAQ,EAAE,GAAG,CAAC,QAAQ,GAAG,CAAC,CAAC,OAAO,CAAC,QAAQ;QAC3C,WAAW,EAAE,GAAG,CAAC,WAAW,GAAG,CAAC,CAAC,OAAO,CAAC,WAAW;QACpD,WAAW,EAAE,GAAG,CAAC,WAAW,GAAG,CAAC,CAAC,OAAO,CAAC,WAAW;QACpD,YAAY,EAAE,GAAG,CAAC,YAAY,GAAG,CAAC,CAAC,OAAO,CAAC,YAAY;QACvD,UAAU,EAAE,GAAG,CAAC,UAAU,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU;QACjD,SAAS,EAAE,GAAG,CAAC,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC,SAAS;QAC9C,OAAO,EAAE,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO;KACzC,CAAC,EACF;QACE,QAAQ,EAAE,CAAC;QACX,WAAW,EAAE,CAAC;QACd,WAAW,EAAE,CAAC;QACd,YAAY,EAAE,CAAC;QACf,UAAU,EAAE,CAAC;QACb,SAAS,EAAE,CAAC;QACZ,OAAO,EAAE,CAAC;KACX,CACF,CAAC;IAEF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;IAE7B,OAAO;QACL,QAAQ,EAAE,GAAG,CAAC,QAAQ,GAAG,KAAK;QAC9B,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,WAAW,GAAG,KAAK,CAAC;QAChD,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,WAAW,GAAG,KAAK,CAAC;QAChD,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,YAAY,GAAG,KAAK,CAAC;QAClD,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,GAAG,KAAK,CAAC;QAC9C,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,GAAG,KAAK,CAAC;QAC5C,OAAO,EAAE,GAAG,CAAC,OAAO,GAAG,KAAK;KAC7B,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,OAAqB;IACrD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACnC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IACtD,OAAO,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;AACzC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"concept-accuracy-scorer.test.d.ts","sourceRoot":"","sources":["../../src/engine/concept-accuracy-scorer.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for concept-accuracy-scorer.ts
|
|
3
|
+
* Verifies scoring logic, fuzzy matching, and metric aggregation
|
|
4
|
+
*/
|
|
5
|
+
import { describe, it, expect } from 'vitest';
|
|
6
|
+
import { scoreResponse, aggregateMetrics, calculatePassRate, } from './concept-accuracy-scorer.js';
|
|
7
|
+
// Helper to create test definition
|
|
8
|
+
function createTestDefinition(overrides = {}) {
|
|
9
|
+
return {
|
|
10
|
+
name: 'test-concept-matching',
|
|
11
|
+
type: 'knowledge',
|
|
12
|
+
concepts: ['orchestrator', 'context isolation', 'consensus'],
|
|
13
|
+
timeout: 600,
|
|
14
|
+
prompt: 'How do multi-agent systems work?',
|
|
15
|
+
expected: ['coordination pattern', 'message passing'],
|
|
16
|
+
sourcePath: '/test/test.md',
|
|
17
|
+
...overrides,
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
// Helper to create metrics
|
|
21
|
+
function createMetrics(overrides = {}) {
|
|
22
|
+
return {
|
|
23
|
+
accuracy: 0,
|
|
24
|
+
tokensTotal: 1000,
|
|
25
|
+
tokensInput: 800,
|
|
26
|
+
tokensOutput: 200,
|
|
27
|
+
durationMs: 5000,
|
|
28
|
+
toolCount: 3,
|
|
29
|
+
costUsd: 0.01,
|
|
30
|
+
...overrides,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
describe('scoreResponse', () => {
|
|
34
|
+
describe('exact matching', () => {
|
|
35
|
+
it('matches concepts that appear exactly in response', () => {
|
|
36
|
+
const test = createTestDefinition({
|
|
37
|
+
concepts: ['orchestrator', 'consensus'],
|
|
38
|
+
expected: [],
|
|
39
|
+
});
|
|
40
|
+
const response = 'The orchestrator pattern uses consensus for decisions';
|
|
41
|
+
const metrics = createMetrics();
|
|
42
|
+
const result = scoreResponse(test, response, metrics);
|
|
43
|
+
expect(result.matchedConcepts).toContain('orchestrator');
|
|
44
|
+
expect(result.matchedConcepts).toContain('consensus');
|
|
45
|
+
expect(result.missedConcepts).toHaveLength(0);
|
|
46
|
+
expect(result.metrics.accuracy).toBe(100);
|
|
47
|
+
});
|
|
48
|
+
it('tracks missed concepts', () => {
|
|
49
|
+
const test = createTestDefinition({
|
|
50
|
+
concepts: ['orchestrator', 'blockchain', 'consensus'],
|
|
51
|
+
expected: [],
|
|
52
|
+
});
|
|
53
|
+
const response = 'The orchestrator pattern coordinates agents';
|
|
54
|
+
const metrics = createMetrics();
|
|
55
|
+
const result = scoreResponse(test, response, metrics);
|
|
56
|
+
expect(result.matchedConcepts).toContain('orchestrator');
|
|
57
|
+
expect(result.missedConcepts).toContain('blockchain');
|
|
58
|
+
expect(result.missedConcepts).toContain('consensus');
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
describe('case insensitive matching', () => {
|
|
62
|
+
it('matches regardless of case by default', () => {
|
|
63
|
+
const test = createTestDefinition({
|
|
64
|
+
concepts: ['Orchestrator', 'CONSENSUS'],
|
|
65
|
+
expected: [],
|
|
66
|
+
});
|
|
67
|
+
const response = 'The orchestrator uses consensus mechanisms';
|
|
68
|
+
const metrics = createMetrics();
|
|
69
|
+
const result = scoreResponse(test, response, metrics);
|
|
70
|
+
expect(result.matchedConcepts).toHaveLength(2);
|
|
71
|
+
expect(result.metrics.accuracy).toBe(100);
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
describe('fuzzy matching', () => {
|
|
75
|
+
it('matches hyphenated vs spaced variations', () => {
|
|
76
|
+
const test = createTestDefinition({
|
|
77
|
+
concepts: ['context-isolation'],
|
|
78
|
+
expected: [],
|
|
79
|
+
});
|
|
80
|
+
const response = 'We use context isolation for safety';
|
|
81
|
+
const metrics = createMetrics();
|
|
82
|
+
const result = scoreResponse(test, response, metrics);
|
|
83
|
+
expect(result.matchedConcepts).toContain('context-isolation');
|
|
84
|
+
});
|
|
85
|
+
it('matches plurals', () => {
|
|
86
|
+
const test = createTestDefinition({
|
|
87
|
+
concepts: ['agents'],
|
|
88
|
+
expected: [],
|
|
89
|
+
});
|
|
90
|
+
const response = 'Each agent has its own context';
|
|
91
|
+
const metrics = createMetrics();
|
|
92
|
+
const result = scoreResponse(test, response, metrics);
|
|
93
|
+
expect(result.matchedConcepts).toContain('agents');
|
|
94
|
+
});
|
|
95
|
+
it('matches common abbreviations', () => {
|
|
96
|
+
const test = createTestDefinition({
|
|
97
|
+
concepts: ['configuration'],
|
|
98
|
+
expected: [],
|
|
99
|
+
});
|
|
100
|
+
const response = 'The config file specifies settings';
|
|
101
|
+
const metrics = createMetrics();
|
|
102
|
+
const result = scoreResponse(test, response, metrics);
|
|
103
|
+
expect(result.matchedConcepts).toContain('configuration');
|
|
104
|
+
});
|
|
105
|
+
});
|
|
106
|
+
describe('expected patterns', () => {
|
|
107
|
+
it('parses checkbox items from expected', () => {
|
|
108
|
+
const test = createTestDefinition({
|
|
109
|
+
concepts: [],
|
|
110
|
+
expected: ['- [ ] Uses message passing', '- [x] Handles failures'],
|
|
111
|
+
});
|
|
112
|
+
const response = 'The system uses message passing and handles failures gracefully';
|
|
113
|
+
const metrics = createMetrics();
|
|
114
|
+
const result = scoreResponse(test, response, metrics);
|
|
115
|
+
expect(result.matchedConcepts).toContain('Uses message passing');
|
|
116
|
+
expect(result.matchedConcepts).toContain('Handles failures');
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
describe('accuracy calculation', () => {
|
|
120
|
+
it('calculates accuracy as matched / total * 100', () => {
|
|
121
|
+
const test = createTestDefinition({
|
|
122
|
+
concepts: ['orchestrator', 'consensus', 'blockchain', 'kubernetes'],
|
|
123
|
+
expected: [],
|
|
124
|
+
});
|
|
125
|
+
// Only 'orchestrator' and 'consensus' appear in response
|
|
126
|
+
const response = 'The orchestrator uses consensus for coordination';
|
|
127
|
+
const metrics = createMetrics();
|
|
128
|
+
const result = scoreResponse(test, response, metrics);
|
|
129
|
+
expect(result.metrics.accuracy).toBe(50); // 2/4 * 100
|
|
130
|
+
});
|
|
131
|
+
it('returns 0 accuracy when no concepts', () => {
|
|
132
|
+
const test = createTestDefinition({
|
|
133
|
+
concepts: [],
|
|
134
|
+
expected: [],
|
|
135
|
+
});
|
|
136
|
+
const response = 'Some response';
|
|
137
|
+
const metrics = createMetrics();
|
|
138
|
+
const result = scoreResponse(test, response, metrics);
|
|
139
|
+
expect(result.metrics.accuracy).toBe(0);
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
describe('pass/fail threshold', () => {
|
|
143
|
+
it('passes when accuracy >= 70%', () => {
|
|
144
|
+
const test = createTestDefinition({
|
|
145
|
+
concepts: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'],
|
|
146
|
+
expected: [],
|
|
147
|
+
});
|
|
148
|
+
// Match 7 out of 10 = 70%
|
|
149
|
+
const response = 'a b c d e f g';
|
|
150
|
+
const metrics = createMetrics();
|
|
151
|
+
const result = scoreResponse(test, response, metrics);
|
|
152
|
+
expect(result.passed).toBe(true);
|
|
153
|
+
});
|
|
154
|
+
it('fails when accuracy < 70%', () => {
|
|
155
|
+
const test = createTestDefinition({
|
|
156
|
+
concepts: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'],
|
|
157
|
+
expected: [],
|
|
158
|
+
});
|
|
159
|
+
// Match 6 out of 10 = 60%
|
|
160
|
+
const response = 'a b c d e f';
|
|
161
|
+
const metrics = createMetrics();
|
|
162
|
+
const result = scoreResponse(test, response, metrics);
|
|
163
|
+
expect(result.passed).toBe(false);
|
|
164
|
+
});
|
|
165
|
+
});
|
|
166
|
+
});
|
|
167
|
+
describe('aggregateMetrics', () => {
|
|
168
|
+
it('returns zero metrics for empty results', () => {
|
|
169
|
+
const result = aggregateMetrics([]);
|
|
170
|
+
expect(result.accuracy).toBe(0);
|
|
171
|
+
expect(result.tokensTotal).toBe(0);
|
|
172
|
+
expect(result.durationMs).toBe(0);
|
|
173
|
+
expect(result.costUsd).toBe(0);
|
|
174
|
+
});
|
|
175
|
+
it('averages metrics across multiple results', () => {
|
|
176
|
+
const results = [
|
|
177
|
+
{
|
|
178
|
+
test: createTestDefinition(),
|
|
179
|
+
metrics: createMetrics({ accuracy: 80, tokensTotal: 1000, costUsd: 0.10 }),
|
|
180
|
+
matchedConcepts: [],
|
|
181
|
+
missedConcepts: [],
|
|
182
|
+
response: '',
|
|
183
|
+
timestamp: new Date().toISOString(),
|
|
184
|
+
passed: true,
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
test: createTestDefinition(),
|
|
188
|
+
metrics: createMetrics({ accuracy: 60, tokensTotal: 2000, costUsd: 0.20 }),
|
|
189
|
+
matchedConcepts: [],
|
|
190
|
+
missedConcepts: [],
|
|
191
|
+
response: '',
|
|
192
|
+
timestamp: new Date().toISOString(),
|
|
193
|
+
passed: false,
|
|
194
|
+
},
|
|
195
|
+
];
|
|
196
|
+
const aggregated = aggregateMetrics(results);
|
|
197
|
+
expect(aggregated.accuracy).toBe(70); // (80 + 60) / 2
|
|
198
|
+
expect(aggregated.tokensTotal).toBe(1500); // (1000 + 2000) / 2
|
|
199
|
+
expect(aggregated.costUsd).toBeCloseTo(0.15, 10); // (0.10 + 0.20) / 2
|
|
200
|
+
});
|
|
201
|
+
});
|
|
202
|
+
describe('calculatePassRate', () => {
|
|
203
|
+
it('returns 0 for empty results', () => {
|
|
204
|
+
expect(calculatePassRate([])).toBe(0);
|
|
205
|
+
});
|
|
206
|
+
it('calculates percentage of passed tests', () => {
|
|
207
|
+
const results = [
|
|
208
|
+
{ passed: true },
|
|
209
|
+
{ passed: true },
|
|
210
|
+
{ passed: false },
|
|
211
|
+
{ passed: true },
|
|
212
|
+
];
|
|
213
|
+
expect(calculatePassRate(results)).toBe(75); // 3/4 * 100
|
|
214
|
+
});
|
|
215
|
+
it('returns 100 when all tests pass', () => {
|
|
216
|
+
const results = [
|
|
217
|
+
{ passed: true },
|
|
218
|
+
{ passed: true },
|
|
219
|
+
];
|
|
220
|
+
expect(calculatePassRate(results)).toBe(100);
|
|
221
|
+
});
|
|
222
|
+
it('returns 0 when all tests fail', () => {
|
|
223
|
+
const results = [
|
|
224
|
+
{ passed: false },
|
|
225
|
+
{ passed: false },
|
|
226
|
+
];
|
|
227
|
+
expect(calculatePassRate(results)).toBe(0);
|
|
228
|
+
});
|
|
229
|
+
});
|
|
230
|
+
//# sourceMappingURL=concept-accuracy-scorer.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"concept-accuracy-scorer.test.js","sourceRoot":"","sources":["../../src/engine/concept-accuracy-scorer.test.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EACL,aAAa,EACb,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,8BAA8B,CAAC;AAGtC,mCAAmC;AACnC,SAAS,oBAAoB,CAAC,YAAqC,EAAE;IACnE,OAAO;QACL,IAAI,EAAE,uBAAuB;QAC7B,IAAI,EAAE,WAAW;QACjB,QAAQ,EAAE,CAAC,cAAc,EAAE,mBAAmB,EAAE,WAAW,CAAC;QAC5D,OAAO,EAAE,GAAG;QACZ,MAAM,EAAE,kCAAkC;QAC1C,QAAQ,EAAE,CAAC,sBAAsB,EAAE,iBAAiB,CAAC;QACrD,UAAU,EAAE,eAAe;QAC3B,GAAG,SAAS;KACb,CAAC;AACJ,CAAC;AAED,2BAA2B;AAC3B,SAAS,aAAa,CAAC,YAAuC,EAAE;IAC9D,OAAO;QACL,QAAQ,EAAE,CAAC;QACX,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,GAAG;QAChB,YAAY,EAAE,GAAG;QACjB,UAAU,EAAE,IAAI;QAChB,SAAS,EAAE,CAAC;QACZ,OAAO,EAAE,IAAI;QACb,GAAG,SAAS;KACb,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC9B,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;YAC1D,MAAM,IAAI,GAAG,oBAAoB,CAAC;gBAChC,QAAQ,EAAE,CAAC,cAAc,EAAE,WAAW,CAAC;gBACvC,QAAQ,EAAE,EAAE;aACb,CAAC,CAAC;YACH,MAAM,QAAQ,GAAG,uDAAuD,CAAC;YACzE,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;YAEhC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;YACzD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;YACtD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC9C,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wBAAwB,EAAE,GAAG,EAAE;YAChC,MAAM,IAAI,GAAG,oBAAoB,CAAC;gBAChC,QAAQ,EAAE,CAAC,cAAc,EAAE,YAAY,EAAE,WAAW,CAAC;gBACrD,QAAQ,EAAE,EAAE;aACb,CAAC,CAAC;YACH,MAAM,QAAQ,GAAG,6CAA6C,CAAC;YAC/D,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;YAEhC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;YACzD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;YACtD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACzC,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;YAC/C,MAAM,IAAI,GAAG,oBAAoB,CAAC;gBAChC,QAAQ,EAAE,CAAC,cAAc,EAAE,WAAW,CAAC;gBACvC,QAAQ,EAAE,EAAE;aACb,CAAC,CAAC;YACH,MAAM,QAAQ,GAAG,4CAA4C,CAAC;YAC9D,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;YAEhC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC/C,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC9B,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;YACjD,MAAM,IAAI,GAAG,oBAAoB,CAAC;gBAChC,QAAQ,EAAE,CAAC,mBAAmB,CAAC;gBAC/B,QAAQ,EAAE,EAAE;aACb,CAAC,CAAC;YACH,MAAM,QAAQ,GAAG,qCAAqC,CAAC;YACvD,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;YAEhC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC;QAChE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iBAAiB,EAAE,GAAG,EAAE;YACzB,MAAM,IAAI,GAAG,oBAAoB,CAAC;gBAChC,QAAQ,EAAE,CAAC,QAAQ,CAAC;gBACpB,QAAQ,EAAE,EAAE;aACb,CAAC,CAAC;YACH,MAAM,QAAQ,GAAG,gCAAgC,CAAC;YAClD,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;YAEhC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACrD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;YACtC,MAAM,IAAI,GAAG,oBAAoB,CAAC;gBAChC,QAAQ,EAAE,CAAC,eAAe,CAAC;gBAC3B,QAAQ,EAAE,EAAE;aACb,CAAC,CAAC;YACH,MAAM,QAAQ,GAAG,oCAAoC,CAAC;YACtD,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;YAEhC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAC5D,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,IAAI,GAAG,oBAAoB,CAAC;gBAChC,QAAQ,EAAE,EAAE;gBACZ,QAAQ,EAAE,CAAC,4BAA4B,EAAE,wBAAwB,CAAC;aACnE,CAAC,CAAC;YACH,MAAM,QAAQ,GAAG,iEAAiE,CAAC;YACnF,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;YAEhC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAC;YACjE,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAC/D,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;QACpC,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;YACtD,MAAM,IAAI,GAAG,oBAAoB,CAAC;gBAChC,QAAQ,EAAE,CAAC,cAAc,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,CAAC;gBACnE,QAAQ,EAAE,EAAE;aACb,CAAC,CAAC;YACH,yDAAyD;YACzD,MAAM,QAAQ,GAAG,kDAAkD,CAAC;YACpE,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;YAEhC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,YAAY;QACxD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,IAAI,GAAG,oBAAoB,CAAC;gBAChC,QAAQ,EAAE,EAAE;gBACZ,QAAQ,EAAE,EAAE;aACb,CAAC,CAAC;YACH,MAAM,QAAQ,GAAG,eAAe,CAAC;YACjC,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;YAEhC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACnC,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACrC,MAAM,IAAI,GAAG,oBAAoB,CAAC;gBAChC,QAAQ,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC;gBAC5D,QAAQ,EAAE,EAAE;aACb,CAAC,CAAC;YACH,0BAA0B;YAC1B,MAAM,QAAQ,GAAG,eAAe,CAAC;YACjC,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;YAEhC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;YACnC,MAAM,IAAI,GAAG,oBAAoB,CAAC;gBAChC,QAAQ,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC;gBAC5D,QAAQ,EAAE,EAAE;aACb,CAAC,CAAC;YACH,0BAA0B;YAC1B,MAAM,QAAQ,GAAG,aAAa,CAAC;YAC/B,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;YAEhC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,MAAM,GAAG,gBAAgB,CAAC,EAAE,CAAC,CAAC;QAEpC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,OAAO,GAAiB;YAC5B;gBACE,IAAI,EAAE,oBAAoB,EAAE;gBAC5B,OAAO,EAAE,aAAa,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;gBAC1E,eAAe,EAAE,EAAE;gBACnB,cAAc,EAAE,EAAE;gBAClB,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,MAAM,EAAE,IAAI;aACb;YACD;gBACE,IAAI,EAAE,oBAAoB,EAAE;gBAC5B,OAAO,EAAE,aAAa,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;gBAC1E,eAAe,EAAE,EAAE;gBACnB,cAAc,EAAE,EAAE;gBAClB,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,MAAM,EAAE,KAAK;aACd;SACF,CAAC;QAEF,MAAM,UAAU,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;QAE7C,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB;QACtD,MAAM,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,oBAAoB;QAC/D,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,oBAAoB;IACxE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,OAAO,GAAiB;YAC5B,EAAE,MAAM,EAAE,IAAI,EAAgB;YAC9B,EAAE,MAAM,EAAE,IAAI,EAAgB;YAC9B,EAAE,MAAM,EAAE,KAAK,EAAgB;YAC/B,EAAE,MAAM,EAAE,IAAI,EAAgB;SAC/B,CAAC;QAEF,MAAM,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,YAAY;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,OAAO,GAAiB;YAC5B,EAAE,MAAM,EAAE,IAAI,EAAgB;YAC9B,EAAE,MAAM,EAAE,IAAI,EAAgB;SAC/B,CAAC;QAEF,MAAM,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,MAAM,OAAO,GAAiB;YAC5B,EAAE,MAAM,EAAE,KAAK,EAAgB;YAC/B,EAAE,MAAM,EAAE,KAAK,EAAgB;SAChC,CAAC;QAEF,MAAM,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enhanced test prompt builder for skill test generation.
|
|
3
|
+
*
|
|
4
|
+
* Builds enriched prompts by combining skill content with analysis
|
|
5
|
+
* from skill-creator (capabilities, concepts, edge cases, testing patterns).
|
|
6
|
+
*/
|
|
7
|
+
import type { SkillAnalysis } from './skill-creator-invoker.js';
|
|
8
|
+
/**
|
|
9
|
+
* Build enhanced prompt with skill analysis context.
|
|
10
|
+
*
|
|
11
|
+
* When analysis is available, enriches the prompt with:
|
|
12
|
+
* - Extracted capabilities and concepts
|
|
13
|
+
* - Edge cases to consider
|
|
14
|
+
* - Claude Code testing patterns
|
|
15
|
+
* - Expected tool invocations
|
|
16
|
+
*
|
|
17
|
+
* When analysis is null (graceful degradation), builds basic prompt.
|
|
18
|
+
*
|
|
19
|
+
* @param skillContent - Formatted skill content from SkillContentCollector
|
|
20
|
+
* @param analysis - SkillAnalysis from skill-creator, or null for basic mode
|
|
21
|
+
* @returns Enhanced prompt string for test generation
|
|
22
|
+
*/
|
|
23
|
+
export declare function buildEnhancedTestPrompt(skillContent: string, analysis: SkillAnalysis | null): string;
|
|
24
|
+
/**
|
|
25
|
+
* Extract concepts from analysis for test definition.
|
|
26
|
+
*
|
|
27
|
+
* Merges key concepts from analysis with any existing concepts.
|
|
28
|
+
*/
|
|
29
|
+
export declare function mergeConceptsFromAnalysis(existingConcepts: string[], analysis: SkillAnalysis | null): string[];
|
|
30
|
+
//# sourceMappingURL=enhanced-test-prompt-builder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"enhanced-test-prompt-builder.d.ts","sourceRoot":"","sources":["../../src/engine/enhanced-test-prompt-builder.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAuDhE;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,uBAAuB,CACrC,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,aAAa,GAAG,IAAI,GAC7B,MAAM,CA8CR;AAuBD;;;;GAIG;AACH,wBAAgB,yBAAyB,CACvC,gBAAgB,EAAE,MAAM,EAAE,EAC1B,QAAQ,EAAE,aAAa,GAAG,IAAI,GAC7B,MAAM,EAAE,CAOV"}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Format skill analysis section for the prompt.
|
|
3
|
+
*/
|
|
4
|
+
function formatSkillAnalysis(analysis) {
|
|
5
|
+
const sections = [];
|
|
6
|
+
if (analysis.capabilities.length > 0) {
|
|
7
|
+
sections.push('### Capabilities');
|
|
8
|
+
for (const cap of analysis.capabilities) {
|
|
9
|
+
sections.push(`- ${cap}`);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
if (analysis.keyConcepts.length > 0) {
|
|
13
|
+
sections.push('\n### Key Concepts');
|
|
14
|
+
for (const concept of analysis.keyConcepts) {
|
|
15
|
+
sections.push(`- ${concept}`);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
if (analysis.edgeCases.length > 0) {
|
|
19
|
+
sections.push('\n### Edge Cases to Test');
|
|
20
|
+
for (const edge of analysis.edgeCases) {
|
|
21
|
+
sections.push(`- ${edge}`);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return sections.join('\n');
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Format Claude Code testing context section.
|
|
28
|
+
*/
|
|
29
|
+
function formatTestingContext(analysis) {
|
|
30
|
+
const sections = [];
|
|
31
|
+
if (analysis.testingPatterns.length > 0) {
|
|
32
|
+
sections.push('### Testing Patterns');
|
|
33
|
+
for (const pattern of analysis.testingPatterns) {
|
|
34
|
+
sections.push(`- ${pattern}`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
if (analysis.toolInvocationExpectations.length > 0) {
|
|
38
|
+
sections.push('\n### Expected Tool Invocations');
|
|
39
|
+
for (const tool of analysis.toolInvocationExpectations) {
|
|
40
|
+
sections.push(`- ${tool}`);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return sections.join('\n');
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Build enhanced prompt with skill analysis context.
|
|
47
|
+
*
|
|
48
|
+
* When analysis is available, enriches the prompt with:
|
|
49
|
+
* - Extracted capabilities and concepts
|
|
50
|
+
* - Edge cases to consider
|
|
51
|
+
* - Claude Code testing patterns
|
|
52
|
+
* - Expected tool invocations
|
|
53
|
+
*
|
|
54
|
+
* When analysis is null (graceful degradation), builds basic prompt.
|
|
55
|
+
*
|
|
56
|
+
* @param skillContent - Formatted skill content from SkillContentCollector
|
|
57
|
+
* @param analysis - SkillAnalysis from skill-creator, or null for basic mode
|
|
58
|
+
* @returns Enhanced prompt string for test generation
|
|
59
|
+
*/
|
|
60
|
+
export function buildEnhancedTestPrompt(skillContent, analysis) {
|
|
61
|
+
const hasAnalysis = analysis && (analysis.capabilities.length > 0 ||
|
|
62
|
+
analysis.keyConcepts.length > 0 ||
|
|
63
|
+
analysis.edgeCases.length > 0);
|
|
64
|
+
if (!hasAnalysis) {
|
|
65
|
+
// Fallback to basic prompt (same as original TEST_GENERATION_PROMPT)
|
|
66
|
+
return buildBasicTestPrompt(skillContent);
|
|
67
|
+
}
|
|
68
|
+
const skillAnalysisSection = formatSkillAnalysis(analysis);
|
|
69
|
+
const testingContextSection = formatTestingContext(analysis);
|
|
70
|
+
return `You must respond with ONLY a JSON object. No explanation, no markdown code blocks, just raw JSON.
|
|
71
|
+
|
|
72
|
+
## Skill Analysis
|
|
73
|
+
${skillAnalysisSection}
|
|
74
|
+
|
|
75
|
+
## Claude Code Testing Context
|
|
76
|
+
${testingContextSection}
|
|
77
|
+
|
|
78
|
+
## Skill Content
|
|
79
|
+
${skillContent}
|
|
80
|
+
|
|
81
|
+
## Output Format
|
|
82
|
+
{"skill_name":"<name>","tests":[{
|
|
83
|
+
"name":"<skill>-<topic>",
|
|
84
|
+
"test_type":"knowledge"|"task",
|
|
85
|
+
"concepts":["extracted from analysis above"],
|
|
86
|
+
"timeout":600|1800,
|
|
87
|
+
"prompt":"...",
|
|
88
|
+
"expected_items":["..."]
|
|
89
|
+
}]}
|
|
90
|
+
|
|
91
|
+
## Rules
|
|
92
|
+
- Generate 2-4 tests: at least 1 knowledge + 1 task test
|
|
93
|
+
- Use concepts from the Skill Analysis section above
|
|
94
|
+
- Consider edge cases listed above when designing tests
|
|
95
|
+
- Include expected tool invocations in task test expected_items
|
|
96
|
+
- timeout: 600 (knowledge/10min), 1800 (task/30min)
|
|
97
|
+
- 4-8 expected_items per test
|
|
98
|
+
- Test names should be descriptive: <skill-name>-<topic>
|
|
99
|
+
|
|
100
|
+
JSON:`;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Build basic test prompt without analysis (graceful degradation).
|
|
104
|
+
*/
|
|
105
|
+
function buildBasicTestPrompt(skillContent) {
|
|
106
|
+
return `You must respond with ONLY a JSON object. No explanation, no markdown code blocks, just raw JSON.
|
|
107
|
+
|
|
108
|
+
Generate tests for this skill. Output format:
|
|
109
|
+
{"skill_name":"<name>","tests":[{"name":"<skill>-<topic>","test_type":"knowledge"|"task","concepts":["..."],"timeout":600|1800,"prompt":"...","expected_items":["..."]}]}
|
|
110
|
+
|
|
111
|
+
Rules:
|
|
112
|
+
- 2-4 tests, at least 1 knowledge + 1 task
|
|
113
|
+
- Extract concepts from Key Concepts Index or section headers
|
|
114
|
+
- timeout: 600 (knowledge/10min), 1800 (task/30min)
|
|
115
|
+
- 4-8 expected_items per test
|
|
116
|
+
|
|
117
|
+
Skill content:
|
|
118
|
+
${skillContent}
|
|
119
|
+
|
|
120
|
+
JSON:`;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Extract concepts from analysis for test definition.
|
|
124
|
+
*
|
|
125
|
+
* Merges key concepts from analysis with any existing concepts.
|
|
126
|
+
*/
|
|
127
|
+
export function mergeConceptsFromAnalysis(existingConcepts, analysis) {
|
|
128
|
+
if (!analysis || analysis.keyConcepts.length === 0) {
|
|
129
|
+
return existingConcepts;
|
|
130
|
+
}
|
|
131
|
+
const merged = new Set([...existingConcepts, ...analysis.keyConcepts]);
|
|
132
|
+
return Array.from(merged);
|
|
133
|
+
}
|
|
134
|
+
//# sourceMappingURL=enhanced-test-prompt-builder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"enhanced-test-prompt-builder.js","sourceRoot":"","sources":["../../src/engine/enhanced-test-prompt-builder.ts"],"names":[],"mappings":"AAQA;;GAEG;AACH,SAAS,mBAAmB,CAAC,QAAuB;IAClD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,IAAI,QAAQ,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrC,QAAQ,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAClC,KAAK,MAAM,GAAG,IAAI,QAAQ,CAAC,YAAY,EAAE,CAAC;YACxC,QAAQ,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpC,QAAQ,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QACpC,KAAK,MAAM,OAAO,IAAI,QAAQ,CAAC,WAAW,EAAE,CAAC;YAC3C,QAAQ,CAAC,IAAI,CAAC,KAAK,OAAO,EAAE,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClC,QAAQ,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QAC1C,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,SAAS,EAAE,CAAC;YACtC,QAAQ,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7B,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,QAAuB;IACnD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,IAAI,QAAQ,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,QAAQ,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QACtC,KAAK,MAAM,OAAO,IAAI,QAAQ,CAAC,eAAe,EAAE,CAAC;YAC/C,QAAQ,CAAC,IAAI,CAAC,KAAK,OAAO,EAAE,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,0BAA0B,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnD,QAAQ,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,0BAA0B,EAAE,CAAC;YACvD,QAAQ,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,uBAAuB,CACrC,YAAoB,EACpB,QAA8B;IAE9B,MAAM,WAAW,GAAG,QAAQ,IAAI,CAC9B,QAAQ,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC;QAChC,QAAQ,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC;QAC/B,QAAQ,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAC9B,CAAC;IAEF,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,qEAAqE;QACrE,OAAO,oBAAoB,CAAC,YAAY,CAAC,CAAC;IAC5C,CAAC;IAED,MAAM,oBAAoB,GAAG,mBAAmB,CAAC,QAAS,CAAC,CAAC;IAC5D,MAAM,qBAAqB,GAAG,oBAAoB,CAAC,QAAS,CAAC,CAAC;IAE9D,OAAO;;;EAGP,oBAAoB;;;EAGpB,qBAAqB;;;EAGrB,YAAY;;;;;;;;;;;;;;;;;;;;;MAqBR,CAAC;AACP,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,YAAoB;IAChD,OAAO;;;;;;;;;;;;EAYP,YAAY;;MAER,CAAC;AACP,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,yBAAyB,CACvC,gBAA0B,EAC1B,QAA8B;IAE9B,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACnD,OAAO,gBAAgB,CAAC;IAC1B,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,gBAAgB,EAAE,GAAG,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC;IACvE,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC"}
|