aspectcode 0.4.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -0
- package/dist/agentsMdRenderer.d.ts +16 -0
- package/dist/agentsMdRenderer.d.ts.map +1 -0
- package/dist/agentsMdRenderer.js +137 -0
- package/dist/agentsMdRenderer.js.map +1 -0
- package/dist/auth.d.ts +31 -0
- package/dist/auth.d.ts.map +1 -0
- package/dist/auth.js +385 -0
- package/dist/auth.js.map +1 -0
- package/dist/autoResolve.d.ts +41 -0
- package/dist/autoResolve.d.ts.map +1 -0
- package/dist/autoResolve.js +196 -0
- package/dist/autoResolve.js.map +1 -0
- package/dist/changeEvaluator.d.ts +56 -0
- package/dist/changeEvaluator.d.ts.map +1 -0
- package/dist/changeEvaluator.js +674 -0
- package/dist/changeEvaluator.js.map +1 -0
- package/dist/cli.d.ts +12 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +1 -1
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +37 -17
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +50 -2
- package/dist/config.js.map +1 -1
- package/dist/dreamCycle.d.ts +57 -0
- package/dist/dreamCycle.d.ts.map +1 -0
- package/dist/dreamCycle.js +334 -0
- package/dist/dreamCycle.js.map +1 -0
- package/dist/kbBuilder.d.ts +1 -2
- package/dist/kbBuilder.d.ts.map +1 -1
- package/dist/kbBuilder.js +1 -2
- package/dist/kbBuilder.js.map +1 -1
- package/dist/main.d.ts +2 -1
- package/dist/main.d.ts.map +1 -1
- package/dist/main.js +149 -8
- package/dist/main.js.map +1 -1
- package/dist/optimize.d.ts +13 -6
- package/dist/optimize.d.ts.map +1 -1
- package/dist/optimize.js +433 -142
- package/dist/optimize.js.map +1 -1
- package/dist/pipeline.d.ts +21 -18
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +1139 -162
- package/dist/pipeline.js.map +1 -1
- package/dist/preferences.d.ts +80 -0
- package/dist/preferences.d.ts.map +1 -0
- package/dist/preferences.js +238 -0
- package/dist/preferences.js.map +1 -0
- package/dist/runtimeState.d.ts +30 -0
- package/dist/runtimeState.d.ts.map +1 -0
- package/dist/runtimeState.js +39 -0
- package/dist/runtimeState.js.map +1 -0
- package/dist/scopedRules.d.ts +84 -0
- package/dist/scopedRules.d.ts.map +1 -0
- package/dist/scopedRules.js +449 -0
- package/dist/scopedRules.js.map +1 -0
- package/dist/ui/Dashboard.d.ts +4 -16
- package/dist/ui/Dashboard.d.ts.map +1 -1
- package/dist/ui/Dashboard.js +339 -141
- package/dist/ui/Dashboard.js.map +1 -1
- package/dist/ui/MemoryMap.d.ts +16 -0
- package/dist/ui/MemoryMap.d.ts.map +1 -0
- package/dist/ui/MemoryMap.js +266 -0
- package/dist/ui/MemoryMap.js.map +1 -0
- package/dist/ui/SettingsPanel.d.ts +18 -0
- package/dist/ui/SettingsPanel.d.ts.map +1 -0
- package/dist/ui/SettingsPanel.js +241 -0
- package/dist/ui/SettingsPanel.js.map +1 -0
- package/dist/ui/prompts.d.ts +7 -0
- package/dist/ui/prompts.d.ts.map +1 -1
- package/dist/ui/prompts.js +63 -0
- package/dist/ui/prompts.js.map +1 -1
- package/dist/ui/store.d.ts +154 -18
- package/dist/ui/store.d.ts.map +1 -1
- package/dist/ui/store.js +154 -24
- package/dist/ui/store.js.map +1 -1
- package/dist/ui/theme.d.ts +1 -8
- package/dist/ui/theme.d.ts.map +1 -1
- package/dist/ui/theme.js +2 -20
- package/dist/ui/theme.js.map +1 -1
- package/dist/updateChecker.d.ts +13 -0
- package/dist/updateChecker.d.ts.map +1 -0
- package/dist/updateChecker.js +66 -0
- package/dist/updateChecker.js.map +1 -0
- package/dist/usageTracker.d.ts +12 -0
- package/dist/usageTracker.d.ts.map +1 -0
- package/dist/usageTracker.js +89 -0
- package/dist/usageTracker.js.map +1 -0
- package/dist/writer.d.ts +1 -7
- package/dist/writer.d.ts.map +1 -1
- package/dist/writer.js +1 -11
- package/dist/writer.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/analysis/repo.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/analysis/repo.js +13 -2
- package/node_modules/@aspectcode/core/dist/analysis/repo.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/index.d.ts +1 -3
- package/node_modules/@aspectcode/core/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/index.js +1 -3
- package/node_modules/@aspectcode/core/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.d.ts +14 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.d.ts.map +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.js +191 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.js.map +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/index.d.ts +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/index.js +6 -1
- package/node_modules/@aspectcode/core/dist/parsers/index.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/languages.d.ts +20 -0
- package/node_modules/@aspectcode/core/dist/parsers/languages.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/languages.js +25 -0
- package/node_modules/@aspectcode/core/dist/parsers/languages.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.js +4 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.js.map +1 -1
- package/node_modules/@aspectcode/core/package.json +2 -2
- package/node_modules/@aspectcode/core/parsers/cpp.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/go.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/php.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/ruby.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/rust.wasm +0 -0
- package/node_modules/@aspectcode/emitters/dist/index.d.ts +1 -17
- package/node_modules/@aspectcode/emitters/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/index.js +2 -89
- package/node_modules/@aspectcode/emitters/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/index.d.ts +0 -2
- package/node_modules/@aspectcode/emitters/dist/instructions/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/index.js +1 -7
- package/node_modules/@aspectcode/emitters/dist/instructions/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.d.ts +0 -18
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.js +0 -57
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.d.ts +0 -18
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.js +0 -130
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/index.d.ts +2 -4
- package/node_modules/@aspectcode/emitters/dist/kb/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/index.js +1 -11
- package/node_modules/@aspectcode/emitters/dist/kb/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/package.json +3 -3
- package/node_modules/@aspectcode/evaluator/dist/apply.d.ts +55 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.js +368 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.d.ts +16 -25
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.js +115 -138
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/index.d.ts +8 -43
- package/node_modules/@aspectcode/evaluator/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/index.js +15 -61
- package/node_modules/@aspectcode/evaluator/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/judge.d.ts +32 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.js +165 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.d.ts +15 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.js +41 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/probes.d.ts +20 -29
- package/node_modules/@aspectcode/evaluator/dist/probes.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/probes.js +188 -204
- package/node_modules/@aspectcode/evaluator/dist/probes.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/runner.d.ts +7 -32
- package/node_modules/@aspectcode/evaluator/dist/runner.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/runner.js +21 -146
- package/node_modules/@aspectcode/evaluator/dist/runner.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/types.d.ts +141 -99
- package/node_modules/@aspectcode/evaluator/dist/types.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/types.js +10 -2
- package/node_modules/@aspectcode/evaluator/dist/types.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/package.json +4 -4
- package/node_modules/@aspectcode/optimizer/dist/index.d.ts +3 -10
- package/node_modules/@aspectcode/optimizer/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/index.js +1 -19
- package/node_modules/@aspectcode/optimizer/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.js +40 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.d.ts +9 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.d.ts.map +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.js +83 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.js.map +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/index.d.ts +4 -3
- package/node_modules/@aspectcode/optimizer/dist/providers/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/index.js +24 -10
- package/node_modules/@aspectcode/optimizer/dist/providers/index.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.js +22 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.d.ts +14 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.js +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/types.d.ts +14 -0
- package/node_modules/@aspectcode/optimizer/dist/types.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/types.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/package.json +2 -2
- package/package.json +6 -7
- package/dist/complaintProcessor.d.ts +0 -16
- package/dist/complaintProcessor.d.ts.map +0 -1
- package/dist/complaintProcessor.js +0 -134
- package/dist/complaintProcessor.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/emitter.d.ts +0 -72
- package/node_modules/@aspectcode/emitters/dist/emitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/emitter.js +0 -10
- package/node_modules/@aspectcode/emitters/dist/emitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/content.d.ts +0 -15
- package/node_modules/@aspectcode/emitters/dist/instructions/content.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/content.js +0 -289
- package/node_modules/@aspectcode/emitters/dist/instructions/content.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.d.ts +0 -13
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.js +0 -55
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.d.ts +0 -9
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.js +0 -30
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.d.ts +0 -21
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.js +0 -125
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/manifest.d.ts +0 -37
- package/node_modules/@aspectcode/emitters/dist/manifest.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/manifest.js +0 -50
- package/node_modules/@aspectcode/emitters/dist/manifest.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/report.d.ts +0 -22
- package/node_modules/@aspectcode/emitters/dist/report.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/report.js +0 -3
- package/node_modules/@aspectcode/emitters/dist/report.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/stableJson.d.ts +0 -14
- package/node_modules/@aspectcode/emitters/dist/stableJson.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/stableJson.js +0 -40
- package/node_modules/@aspectcode/emitters/dist/stableJson.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/transaction.d.ts +0 -29
- package/node_modules/@aspectcode/emitters/dist/transaction.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/transaction.js +0 -104
- package/node_modules/@aspectcode/emitters/dist/transaction.js.map +0 -1
|
@@ -3,183 +3,58 @@
|
|
|
3
3
|
* Probe runner — simulates AI responses to probes using AGENTS.md as context.
|
|
4
4
|
*
|
|
5
5
|
* For each probe, constructs a chat where:
|
|
6
|
-
* - System prompt = current AGENTS.md
|
|
6
|
+
* - System prompt = current AGENTS.md
|
|
7
7
|
* - User prompt = the probe's task
|
|
8
|
-
* Then sends it to the LLM and
|
|
8
|
+
* Then sends it to the LLM (temperature 0.0) and returns the raw response.
|
|
9
|
+
*
|
|
10
|
+
* Judging/evaluation is handled separately by the judge module.
|
|
9
11
|
*/
|
|
10
12
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
13
|
exports.runProbes = runProbes;
|
|
12
|
-
|
|
13
|
-
exports.buildBehaviorEvalPrompt = buildBehaviorEvalPrompt;
|
|
14
|
-
exports.parseBehaviorEval = parseBehaviorEval;
|
|
15
|
-
/** Maximum file content characters to include per probe. */
|
|
16
|
-
const MAX_CONTEXT_CHARS = 20000;
|
|
17
|
-
/**
|
|
18
|
-
* Build the system prompt for a probe run.
|
|
19
|
-
* Includes the AGENTS.md instructions and relevant file contents.
|
|
20
|
-
*/
|
|
21
|
-
function buildProbeSystemPrompt(agentsContent, probe, fileContents) {
|
|
22
|
-
let prompt = `You are an AI coding assistant. Follow these project instructions:\n\n${agentsContent}`;
|
|
23
|
-
if (fileContents && probe.contextFiles.length > 0) {
|
|
24
|
-
let contextChars = 0;
|
|
25
|
-
const fileSections = [];
|
|
26
|
-
for (const filePath of probe.contextFiles) {
|
|
27
|
-
const content = fileContents.get(filePath);
|
|
28
|
-
if (!content)
|
|
29
|
-
continue;
|
|
30
|
-
if (contextChars + content.length > MAX_CONTEXT_CHARS)
|
|
31
|
-
break;
|
|
32
|
-
fileSections.push(`### ${filePath}\n\`\`\`\n${content}\n\`\`\``);
|
|
33
|
-
contextChars += content.length;
|
|
34
|
-
}
|
|
35
|
-
if (fileSections.length > 0) {
|
|
36
|
-
prompt += `\n\n## Relevant Files\n\n${fileSections.join('\n\n')}`;
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
return prompt;
|
|
40
|
-
}
|
|
14
|
+
const llmUtil_1 = require("./llmUtil");
|
|
41
15
|
/**
|
|
42
|
-
*
|
|
43
|
-
*
|
|
16
|
+
* Run a single probe simulation.
|
|
17
|
+
* Returns the raw AI response without evaluation.
|
|
44
18
|
*/
|
|
45
|
-
function
|
|
46
|
-
const behaviors = probe.expectedBehaviors
|
|
47
|
-
.map((b, i) => `${i + 1}. ${b}`)
|
|
48
|
-
.join('\n');
|
|
49
|
-
return `You are evaluating an AI coding assistant's response to a specific task.
|
|
50
|
-
|
|
51
|
-
## Task Given
|
|
52
|
-
${probe.task}
|
|
53
|
-
|
|
54
|
-
## Expected Behaviours
|
|
55
|
-
The response should exhibit ALL of these behaviours:
|
|
56
|
-
${behaviors}
|
|
57
|
-
|
|
58
|
-
## AI Response
|
|
59
|
-
${response}
|
|
60
|
-
|
|
61
|
-
## Instructions
|
|
62
|
-
For EACH expected behaviour, determine if the response exhibits it.
|
|
63
|
-
Respond in EXACTLY this format (one line per behaviour):
|
|
64
|
-
|
|
65
|
-
BEHAVIOR_1: PASS|FAIL — <brief explanation>
|
|
66
|
-
BEHAVIOR_2: PASS|FAIL — <brief explanation>
|
|
67
|
-
...
|
|
68
|
-
|
|
69
|
-
Then a final line:
|
|
70
|
-
OVERALL: PASS|FAIL`;
|
|
71
|
-
}
|
|
72
|
-
/** Parse the structured behaviour evaluation response. */
|
|
73
|
-
function parseBehaviorEval(response, expectedBehaviors) {
|
|
74
|
-
const results = [];
|
|
75
|
-
const lines = response.split('\n');
|
|
76
|
-
for (let i = 0; i < expectedBehaviors.length; i++) {
|
|
77
|
-
const pattern = new RegExp(`BEHAVIOR_${i + 1}:\\s*(PASS|FAIL)\\s*[—-]\\s*(.*)`, 'i');
|
|
78
|
-
const match = lines.find((l) => pattern.test(l));
|
|
79
|
-
const parsed = match ? pattern.exec(match) : null;
|
|
80
|
-
results.push({
|
|
81
|
-
behavior: expectedBehaviors[i],
|
|
82
|
-
passed: parsed ? parsed[1].toUpperCase() === 'PASS' : false,
|
|
83
|
-
explanation: parsed ? parsed[2].trim() : 'Could not parse evaluation result',
|
|
84
|
-
});
|
|
85
|
-
}
|
|
86
|
-
const allPassed = results.every((r) => r.passed);
|
|
87
|
-
return { results, allPassed };
|
|
88
|
-
}
|
|
89
|
-
/**
|
|
90
|
-
* Run a single probe: simulate the AI response, then evaluate it.
|
|
91
|
-
*/
|
|
92
|
-
async function runSingleProbe(probe, agentsContent, provider, fileContents, log, signal) {
|
|
19
|
+
async function simulateProbe(probe, agentsContent, provider, log, signal) {
|
|
93
20
|
if (signal?.aborted) {
|
|
94
|
-
return {
|
|
95
|
-
probeId: probe.id,
|
|
96
|
-
passed: false,
|
|
97
|
-
response: '',
|
|
98
|
-
shortcomings: ['Cancelled'],
|
|
99
|
-
behaviorResults: [],
|
|
100
|
-
};
|
|
21
|
+
return { probeId: probe.id, task: probe.task, response: '' };
|
|
101
22
|
}
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
const
|
|
105
|
-
const simMessages = [
|
|
23
|
+
log?.debug(`Simulating probe: ${probe.id}`);
|
|
24
|
+
const systemPrompt = `You are an AI coding assistant. Follow these project instructions:\n\n${agentsContent}`;
|
|
25
|
+
const messages = [
|
|
106
26
|
{ role: 'system', content: systemPrompt },
|
|
107
27
|
{ role: 'user', content: probe.task },
|
|
108
28
|
];
|
|
109
29
|
let response;
|
|
110
30
|
try {
|
|
111
|
-
response = await provider.
|
|
31
|
+
response = await (0, llmUtil_1.chatWithTemp)(provider, messages, 0.0, signal);
|
|
112
32
|
}
|
|
113
33
|
catch (err) {
|
|
114
34
|
const msg = err instanceof Error ? err.message : String(err);
|
|
115
35
|
log?.warn(`Probe ${probe.id} simulation failed: ${msg}`);
|
|
116
|
-
return {
|
|
117
|
-
probeId: probe.id,
|
|
118
|
-
passed: false,
|
|
119
|
-
response: '',
|
|
120
|
-
shortcomings: [`LLM error during simulation: ${msg}`],
|
|
121
|
-
behaviorResults: [],
|
|
122
|
-
};
|
|
123
|
-
}
|
|
124
|
-
if (signal?.aborted) {
|
|
125
|
-
return {
|
|
126
|
-
probeId: probe.id,
|
|
127
|
-
passed: false,
|
|
128
|
-
response,
|
|
129
|
-
shortcomings: ['Cancelled during evaluation'],
|
|
130
|
-
behaviorResults: [],
|
|
131
|
-
};
|
|
132
|
-
}
|
|
133
|
-
// Step 2: Evaluate the response against expected behaviours
|
|
134
|
-
log?.debug(`Evaluating probe: ${probe.id}`);
|
|
135
|
-
const evalPrompt = buildBehaviorEvalPrompt(probe, response);
|
|
136
|
-
const evalMessages = [
|
|
137
|
-
{ role: 'user', content: evalPrompt },
|
|
138
|
-
];
|
|
139
|
-
let evalResponse;
|
|
140
|
-
try {
|
|
141
|
-
evalResponse = await provider.chat(evalMessages);
|
|
142
|
-
}
|
|
143
|
-
catch (err) {
|
|
144
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
145
|
-
log?.warn(`Probe ${probe.id} evaluation failed: ${msg}`);
|
|
146
|
-
return {
|
|
147
|
-
probeId: probe.id,
|
|
148
|
-
passed: false,
|
|
149
|
-
response,
|
|
150
|
-
shortcomings: [`LLM error during evaluation: ${msg}`],
|
|
151
|
-
behaviorResults: [],
|
|
152
|
-
};
|
|
36
|
+
return { probeId: probe.id, task: probe.task, response: '' };
|
|
153
37
|
}
|
|
154
|
-
|
|
155
|
-
const shortcomings = behaviorResults
|
|
156
|
-
.filter((r) => !r.passed)
|
|
157
|
-
.map((r) => `${r.behavior}: ${r.explanation}`);
|
|
158
|
-
return {
|
|
159
|
-
probeId: probe.id,
|
|
160
|
-
passed: allPassed,
|
|
161
|
-
response,
|
|
162
|
-
shortcomings,
|
|
163
|
-
behaviorResults,
|
|
164
|
-
};
|
|
38
|
+
return { probeId: probe.id, task: probe.task, response };
|
|
165
39
|
}
|
|
166
40
|
/**
|
|
167
41
|
* Run all probes against the current AGENTS.md.
|
|
168
42
|
*
|
|
169
43
|
* Each probe is run sequentially (to respect rate limits).
|
|
170
|
-
* Returns results
|
|
44
|
+
* Returns simulation results (raw responses, no evaluation).
|
|
171
45
|
*/
|
|
172
|
-
async function runProbes(agentsContent, probes, provider,
|
|
46
|
+
async function runProbes(agentsContent, probes, provider, log, signal, onProbeProgress) {
|
|
173
47
|
const results = [];
|
|
174
48
|
for (let idx = 0; idx < probes.length; idx++) {
|
|
175
49
|
const probe = probes[idx];
|
|
176
50
|
if (signal?.aborted)
|
|
177
51
|
break;
|
|
178
52
|
onProbeProgress?.({ probeIndex: idx, total: probes.length, probeId: probe.id, phase: 'starting' });
|
|
179
|
-
const result = await
|
|
53
|
+
const result = await simulateProbe(probe, agentsContent, provider, log, signal);
|
|
180
54
|
results.push(result);
|
|
181
|
-
|
|
182
|
-
|
|
55
|
+
const hasResponse = result.response.length > 0;
|
|
56
|
+
onProbeProgress?.({ probeIndex: idx, total: probes.length, probeId: probe.id, phase: 'done', passed: hasResponse });
|
|
57
|
+
log?.info(` ${hasResponse ? '✔' : '✖'} ${probe.id}`);
|
|
183
58
|
}
|
|
184
59
|
return results;
|
|
185
60
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":";AAAA
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;AAqDH,8BAyBC;AApED,uCAAyC;AAEzC;;;GAGG;AACH,KAAK,UAAU,aAAa,CAC1B,KAAY,EACZ,aAAqB,EACrB,QAAqB,EACrB,GAAe,EACf,MAAoB;IAEpB,IAAI,MAAM,EAAE,OAAO,EAAE,CAAC;QACpB,OAAO,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IAC/D,CAAC;IAED,GAAG,EAAE,KAAK,CAAC,qBAAqB,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;IAE5C,MAAM,YAAY,GAAG,yEAAyE,aAAa,EAAE,CAAC;IAC9G,MAAM,QAAQ,GAAkB;QAC9B,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE;QACzC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,IAAI,EAAE;KACtC,CAAC;IAEF,IAAI,QAAgB,CAAC;IACrB,IAAI,CAAC;QACH,QAAQ,GAAG,MAAM,IAAA,sBAAY,EAAC,QAAQ,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;IACjE,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC7D,GAAG,EAAE,IAAI,CAAC,SAAS,KAAK,CAAC,EAAE,uBAAuB,GAAG,EAAE,CAAC,CAAC;QACzD,OAAO,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IAC/D,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,CAAC;AAC3D,CAAC;AAED;;;;;GAKG;AACI,KAAK,UAAU,SAAS,CAC7B,aAAqB,EACrB,MAAe,EACf,QAAqB,EACrB,GAAe,EACf,MAAoB,EACpB,eAAuC;IAEvC,MAAM,OAAO,GAAuB,EAAE,CAAC;IAEvC,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC7C,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;QAC1B,IAAI,MAAM,EAAE,OAAO;YAAE,MAAM;QAE3B,eAAe,EAAE,CAAC,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC;QAEnG,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,KAAK,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAChF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAErB,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;QAC/C,eAAe,EAAE,CAAC,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC,CAAC;QACpH,GAAG,EAAE,IAAI,CAAC,KAAK,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -1,145 +1,166 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @aspectcode/evaluator — core types.
|
|
3
3
|
*
|
|
4
|
-
* Types for probe-based evaluation,
|
|
5
|
-
* evidence-based diagnosis of AGENTS.md quality.
|
|
4
|
+
* Types for probe-based evaluation, probe-and-refine tuning,
|
|
5
|
+
* and evidence-based diagnosis of AGENTS.md quality.
|
|
6
6
|
*/
|
|
7
7
|
import type { LlmProvider, OptLogger } from '@aspectcode/optimizer';
|
|
8
|
-
export type { LlmProvider, OptLogger } from '@aspectcode/optimizer';
|
|
8
|
+
export type { LlmProvider, ChatOptions, OptLogger } from '@aspectcode/optimizer';
|
|
9
9
|
/**
|
|
10
|
-
* A single
|
|
10
|
+
* A single synthetic task that evaluates whether AGENTS.md guides
|
|
11
11
|
* the AI correctly for a specific scenario scoped to the codebase.
|
|
12
12
|
*/
|
|
13
13
|
export interface Probe {
|
|
14
|
-
/** Unique identifier
|
|
14
|
+
/** Unique identifier. */
|
|
15
15
|
id: string;
|
|
16
|
-
/** Human-readable description of what this probe tests. */
|
|
17
|
-
description: string;
|
|
18
|
-
/** Category for grouping (e.g. "hub-safety", "naming", "architecture"). */
|
|
19
|
-
category: ProbeCategory;
|
|
20
|
-
/**
|
|
21
|
-
* Workspace-relative paths of files relevant to this probe.
|
|
22
|
-
* These are included as context when running the probe.
|
|
23
|
-
*/
|
|
24
|
-
contextFiles: string[];
|
|
25
16
|
/** The task/question posed to the AI in this probe. */
|
|
26
17
|
task: string;
|
|
27
18
|
/**
|
|
28
19
|
* Specific behaviours the AI's response should exhibit.
|
|
29
|
-
* Used by the
|
|
20
|
+
* Used by the judge to score the response.
|
|
30
21
|
*/
|
|
31
22
|
expectedBehaviors: string[];
|
|
23
|
+
/** Why this probe is useful (optional rationale from the generator). */
|
|
24
|
+
rationale?: string;
|
|
32
25
|
}
|
|
33
|
-
/**
|
|
34
|
-
export
|
|
35
|
-
/** Result of running a single probe against the current AGENTS.md. */
|
|
36
|
-
export interface ProbeResult {
|
|
37
|
-
/** The probe that was run. */
|
|
26
|
+
/** Raw result of simulating a single probe (before judging). */
|
|
27
|
+
export interface SimulationResult {
|
|
38
28
|
probeId: string;
|
|
39
|
-
|
|
40
|
-
passed: boolean;
|
|
41
|
-
/** The AI's simulated response to the probe task. */
|
|
29
|
+
task: string;
|
|
42
30
|
response: string;
|
|
43
|
-
/**
|
|
44
|
-
* Specific shortcomings identified by the evaluator.
|
|
45
|
-
* Empty if `passed` is true.
|
|
46
|
-
*/
|
|
47
|
-
shortcomings: string[];
|
|
48
|
-
/** Per-behaviour pass/fail breakdown. */
|
|
49
|
-
behaviorResults: BehaviorResult[];
|
|
50
31
|
}
|
|
51
|
-
/**
|
|
52
|
-
export interface
|
|
32
|
+
/** Per-behavior assessment from the judge (strong/partial/missing). */
|
|
33
|
+
export interface BehaviorReview {
|
|
53
34
|
/** The expected behaviour description. */
|
|
54
35
|
behavior: string;
|
|
55
|
-
/**
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
/**
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
|
|
36
|
+
/** How well the response exhibited this behaviour. */
|
|
37
|
+
assessment: 'strong' | 'partial' | 'missing';
|
|
38
|
+
/** Short excerpt from response supporting the assessment. */
|
|
39
|
+
evidence: string;
|
|
40
|
+
/** What AGENTS.md should add/change to improve this behaviour. */
|
|
41
|
+
improvement: string;
|
|
42
|
+
}
|
|
43
|
+
/** Result of judging a single probe's response. */
|
|
44
|
+
export interface JudgedProbeResult {
|
|
45
|
+
/** The probe that was judged. */
|
|
46
|
+
probeId: string;
|
|
47
|
+
/** The original task. */
|
|
48
|
+
task: string;
|
|
49
|
+
/** The AI's simulated response. */
|
|
50
|
+
response: string;
|
|
51
|
+
/** Per-behaviour assessments. */
|
|
52
|
+
behaviorReviews: BehaviorReview[];
|
|
53
|
+
/** Per-probe edit suggestions from the judge (up to 3). */
|
|
54
|
+
proposedEdits: AgentsEdit[];
|
|
55
|
+
/** Summary notes from the judge. */
|
|
56
|
+
overallNotes: string;
|
|
68
57
|
}
|
|
69
58
|
/** A specific proposed edit to AGENTS.md. */
|
|
70
59
|
export interface AgentsEdit {
|
|
71
|
-
/** What section/area
|
|
60
|
+
/** What section/area to modify. AGENTS.md section name, or "scoped:slug" / "scoped:CREATE:slug" / "scoped:DELETE:slug". */
|
|
72
61
|
section: string;
|
|
73
62
|
/** The type of edit. */
|
|
74
63
|
action: 'add' | 'modify' | 'strengthen' | 'remove';
|
|
75
64
|
/** The proposed rule or content change. */
|
|
76
65
|
content: string;
|
|
77
|
-
/** Which probe failures motivated this edit. */
|
|
78
|
-
motivatedBy
|
|
66
|
+
/** Which probe failures motivated this edit (optional). */
|
|
67
|
+
motivatedBy?: string[];
|
|
68
|
+
/** Glob patterns (only for scoped:CREATE). */
|
|
69
|
+
globs?: string[];
|
|
70
|
+
/** Description (only for scoped:CREATE). */
|
|
71
|
+
description?: string;
|
|
79
72
|
}
|
|
80
|
-
/**
|
|
81
|
-
export interface
|
|
82
|
-
/**
|
|
83
|
-
|
|
84
|
-
/**
|
|
85
|
-
|
|
86
|
-
/**
|
|
87
|
-
|
|
88
|
-
/**
|
|
89
|
-
|
|
90
|
-
/** Workspace-relative file paths referenced in the conversation. */
|
|
91
|
-
filesReferenced: string[];
|
|
73
|
+
/** Configuration for the multi-iteration probe-and-refine loop. */
|
|
74
|
+
export interface ProbeRefineConfig {
|
|
75
|
+
/** Maximum iterations before stopping. Default: 3. */
|
|
76
|
+
maxIterations: number;
|
|
77
|
+
/** Target probes per iteration. Default: 10. */
|
|
78
|
+
targetProbesPerIteration: number;
|
|
79
|
+
/** Max edits applied per iteration. Default: 5. */
|
|
80
|
+
maxEditsPerIteration: number;
|
|
81
|
+
/** Character budget for the AGENTS.md artifact. Default: 8000. */
|
|
82
|
+
charBudget: number;
|
|
92
83
|
}
|
|
93
|
-
/**
|
|
94
|
-
export
|
|
95
|
-
/**
|
|
84
|
+
/** Default probe-and-refine configuration. */
|
|
85
|
+
export declare const DEFAULT_PROBE_REFINE_CONFIG: ProbeRefineConfig;
|
|
86
|
+
/** Summary of a single iteration in the probe-and-refine loop. */
|
|
87
|
+
export interface IterationSummary {
|
|
88
|
+
iteration: number;
|
|
89
|
+
probesGenerated: number;
|
|
90
|
+
probesEvaluated: number;
|
|
91
|
+
editsApplied: number;
|
|
92
|
+
guidanceChanged: boolean;
|
|
93
|
+
charsBefore: number;
|
|
94
|
+
charsAfter: number;
|
|
95
|
+
}
|
|
96
|
+
/** Result of the full probe-and-refine loop. */
|
|
97
|
+
export interface ProbeRefineResult {
|
|
98
|
+
/** The final refined AGENTS.md content. */
|
|
99
|
+
finalContent: string;
|
|
100
|
+
/** Per-iteration summaries. */
|
|
101
|
+
iterations: IterationSummary[];
|
|
102
|
+
/** Why the loop stopped (if before maxIterations). */
|
|
103
|
+
convergedReason?: string;
|
|
104
|
+
}
|
|
105
|
+
/** Options for LLM-powered probe generation. */
|
|
96
106
|
export interface ProbeGeneratorOptions {
|
|
97
|
-
/** Full KB content for
|
|
107
|
+
/** Full KB content for context. */
|
|
98
108
|
kb: string;
|
|
99
|
-
/**
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
kbDiff?: string;
|
|
104
|
-
/** Harvested prompts to generate additional probes from. */
|
|
105
|
-
harvestedPrompts?: HarvestedPrompt[];
|
|
109
|
+
/** Current AGENTS.md content being tuned. */
|
|
110
|
+
currentAgentsMd: string;
|
|
111
|
+
/** Prior probe tasks (across iterations) for deduplication. */
|
|
112
|
+
priorProbeTasks: string[];
|
|
106
113
|
/** Maximum number of probes to generate. Default: 10. */
|
|
107
114
|
maxProbes?: number;
|
|
108
|
-
/**
|
|
109
|
-
|
|
115
|
+
/** LLM provider for generating probes. */
|
|
116
|
+
provider: LlmProvider;
|
|
117
|
+
/** Project name (derived from workspace root). */
|
|
118
|
+
projectName?: string;
|
|
119
|
+
/** AbortSignal for cooperative cancellation. */
|
|
120
|
+
signal?: AbortSignal;
|
|
121
|
+
/** Optional logger. */
|
|
122
|
+
log?: OptLogger;
|
|
110
123
|
}
|
|
111
|
-
/** Options for running
|
|
124
|
+
/** Options for running probe simulations. */
|
|
112
125
|
export interface ProbeRunnerOptions {
|
|
113
|
-
/** Current AGENTS.md content (used as system prompt
|
|
126
|
+
/** Current AGENTS.md content (used as system prompt). */
|
|
114
127
|
agentsContent: string;
|
|
115
|
-
/** Probes to
|
|
128
|
+
/** Probes to simulate. */
|
|
116
129
|
probes: Probe[];
|
|
117
130
|
/** LLM provider for simulating AI responses. */
|
|
118
131
|
provider: LlmProvider;
|
|
119
|
-
/** File contents map for including context files. */
|
|
120
|
-
fileContents?: ReadonlyMap<string, string>;
|
|
121
132
|
/** Optional logger. */
|
|
122
133
|
log?: OptLogger;
|
|
123
134
|
/** AbortSignal for cooperative cancellation. */
|
|
124
135
|
signal?: AbortSignal;
|
|
125
136
|
}
|
|
126
|
-
/** Options for
|
|
127
|
-
export interface
|
|
128
|
-
/**
|
|
129
|
-
|
|
130
|
-
/**
|
|
137
|
+
/** Options for judging a probe's response. */
|
|
138
|
+
export interface JudgeOptions {
|
|
139
|
+
/** The probe task. */
|
|
140
|
+
task: string;
|
|
141
|
+
/** The AI's simulated response. */
|
|
142
|
+
response: string;
|
|
143
|
+
/** Expected behaviours to judge against. */
|
|
144
|
+
expectedBehaviors: string[];
|
|
145
|
+
/** Probe ID for tracking. */
|
|
146
|
+
probeId: string;
|
|
147
|
+
/** LLM provider for judging. */
|
|
131
148
|
provider: LlmProvider;
|
|
132
149
|
/** Optional logger. */
|
|
133
150
|
log?: OptLogger;
|
|
134
151
|
/** AbortSignal for cooperative cancellation. */
|
|
135
152
|
signal?: AbortSignal;
|
|
136
153
|
}
|
|
137
|
-
/** Options for diagnosing AGENTS.md issues from
|
|
154
|
+
/** Options for diagnosing AGENTS.md issues from judged probes. */
|
|
138
155
|
export interface DiagnosisOptions {
|
|
139
|
-
/**
|
|
140
|
-
|
|
156
|
+
/** All judged probe results (including strong ones). */
|
|
157
|
+
judgedResults: JudgedProbeResult[];
|
|
141
158
|
/** Current AGENTS.md content. */
|
|
142
159
|
agentsContent: string;
|
|
160
|
+
/** Current scoped rules context (slug → content map). Optional. */
|
|
161
|
+
scopedRulesContext?: string;
|
|
162
|
+
/** Raw static analysis data for scoped rule decisions. Optional. */
|
|
163
|
+
staticAnalysisData?: string;
|
|
143
164
|
/** LLM provider for diagnosis. */
|
|
144
165
|
provider: LlmProvider;
|
|
145
166
|
/** Optional logger. */
|
|
@@ -147,6 +168,40 @@ export interface DiagnosisOptions {
|
|
|
147
168
|
/** AbortSignal for cooperative cancellation. */
|
|
148
169
|
signal?: AbortSignal;
|
|
149
170
|
}
|
|
171
|
+
/** Callback invoked before/after each probe for live progress updates. */
|
|
172
|
+
export interface ProbeProgressCallback {
|
|
173
|
+
(info: {
|
|
174
|
+
probeIndex: number;
|
|
175
|
+
total: number;
|
|
176
|
+
probeId: string;
|
|
177
|
+
phase: 'starting' | 'done';
|
|
178
|
+
passed?: boolean;
|
|
179
|
+
}): void;
|
|
180
|
+
}
|
|
181
|
+
/** Result of deterministic edit application. */
|
|
182
|
+
export interface ApplyResult {
|
|
183
|
+
/** The updated AGENTS.md content. */
|
|
184
|
+
content: string;
|
|
185
|
+
/** Number of edits successfully applied. */
|
|
186
|
+
applied: number;
|
|
187
|
+
/** Number of bullets trimmed to fit budget. */
|
|
188
|
+
trimmed: number;
|
|
189
|
+
}
|
|
190
|
+
/** A conversation turn harvested from an AI tool's history. */
|
|
191
|
+
export interface HarvestedPrompt {
|
|
192
|
+
/** Which tool this came from. */
|
|
193
|
+
source: PromptSource;
|
|
194
|
+
/** When this conversation happened (ISO-8601, if available). */
|
|
195
|
+
timestamp?: string;
|
|
196
|
+
/** The user's prompt/question. */
|
|
197
|
+
userPrompt: string;
|
|
198
|
+
/** The AI's response. */
|
|
199
|
+
assistantResponse: string;
|
|
200
|
+
/** Workspace-relative file paths referenced in the conversation. */
|
|
201
|
+
filesReferenced: string[];
|
|
202
|
+
}
|
|
203
|
+
/** Supported prompt history sources. */
|
|
204
|
+
export type PromptSource = 'aider' | 'claude-code' | 'cline' | 'copilot-chat' | 'cursor' | 'windsurf' | 'export';
|
|
150
205
|
/** Options for prompt harvesting. */
|
|
151
206
|
export interface HarvestOptions {
|
|
152
207
|
/** Workspace root directory. */
|
|
@@ -160,17 +215,4 @@ export interface HarvestOptions {
|
|
|
160
215
|
/** Optional logger. */
|
|
161
216
|
log?: OptLogger;
|
|
162
217
|
}
|
|
163
|
-
/** Full result of the evaluation pipeline. */
|
|
164
|
-
export interface EvaluationResult {
|
|
165
|
-
/** All probe results (passed + failed). */
|
|
166
|
-
probeResults: ProbeResult[];
|
|
167
|
-
/** Diagnosis based on failures (undefined if all probes passed). */
|
|
168
|
-
diagnosis?: Diagnosis;
|
|
169
|
-
/** Number of probes that passed. */
|
|
170
|
-
passCount: number;
|
|
171
|
-
/** Number of probes that failed. */
|
|
172
|
-
failCount: number;
|
|
173
|
-
/** Total probes run. */
|
|
174
|
-
totalProbes: number;
|
|
175
|
-
}
|
|
176
218
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAGpE,YAAY,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAGpE,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAIjF;;;GAGG;AACH,MAAM,WAAW,KAAK;IACpB,yBAAyB;IACzB,EAAE,EAAE,MAAM,CAAC;IAEX,uDAAuD;IACvD,IAAI,EAAE,MAAM,CAAC;IAEb;;;OAGG;IACH,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAE5B,wEAAwE;IACxE,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAID,gEAAgE;AAChE,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,uEAAuE;AACvE,MAAM,WAAW,cAAc;IAC7B,0CAA0C;IAC1C,QAAQ,EAAE,MAAM,CAAC;IACjB,sDAAsD;IACtD,UAAU,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;IAC7C,6DAA6D;IAC7D,QAAQ,EAAE,MAAM,CAAC;IACjB,kEAAkE;IAClE,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,mDAAmD;AACnD,MAAM,WAAW,iBAAiB;IAChC,iCAAiC;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,mCAAmC;IACnC,QAAQ,EAAE,MAAM,CAAC;IACjB,iCAAiC;IACjC,eAAe,EAAE,cAAc,EAAE,CAAC;IAClC,2DAA2D;IAC3D,aAAa,EAAE,UAAU,EAAE,CAAC;IAC5B,oCAAoC;IACpC,YAAY,EAAE,MAAM,CAAC;CACtB;AAID,6CAA6C;AAC7C,MAAM,WAAW,UAAU;IACzB,2HAA2H;IAC3H,OAAO,EAAE,MAAM,CAAC;IAEhB,wBAAwB;IACxB,MAAM,EAAE,KAAK,GAAG,QAAQ,GAAG,YAAY,GAAG,QAAQ,CAAC;IAEnD,2CAA2C;IAC3C,OAAO,EAAE,MAAM,CAAC;IAEhB,2DAA2D;IAC3D,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IAEvB,8CAA8C;IAC9C,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IAEjB,4CAA4C;IAC5C,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAID,mEAAmE;AACnE,MAAM,WAAW,iBAAiB;IAChC,sDAAsD;IACtD,aAAa,EAAE,MAAM,CAAC;IACtB,gDAAgD;IAChD,wBAAwB,EAAE,MAAM,CAAC;IACjC,mDAAmD;IACnD,oBAAoB,EAAE,MAAM,CAAC;IAC7B,kEAAkE;IAClE,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,8CAA8C;AAC9C,eAAO,MAAM,2BAA2B,EAAE,iBAKzC,CAAC;AAEF,kEAAkE;AAClE,MAAM,WAAW,gBAAgB;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,OAAO,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,gDAAgD;AAChD,MAAM,WAAW,iBAAiB;IAChC,2CAA2C;IAC3C,YAAY,EAAE,MAAM,CAAC;IACrB,+BAA+B;IAC/B,UAAU,EAAE,gBAAgB,EAAE,CAAC;IAC/B,sDAAsD;IACtD,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAID,gDAAgD;AAChD,MAAM,WAAW,qBAAqB;IACpC,mCAAmC;IACnC,EAAE,EAAE,MAAM,CAAC;IAEX,6CAA6C;IAC7C,eAAe,EAAE,MAAM,CAAC;IAExB,+DAA+D;IAC/D,eAAe,EAAE,MAAM,EAAE,CAAC;IAE1B,yDAAyD;IACzD,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,0CAA0C;IAC1C,QAAQ,EAAE,WAAW,CAAC;IAEtB,kDAAkD;IAClD,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,gDAAgD;IAChD,MAAM,CAAC,EAAE,WAAW,CAAC;IAErB,uBAAuB;IACvB,GAAG,CAAC,EAAE,SAAS,CAAC;CACjB;AAED,6CAA6C;AAC7C,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,aAAa,EAAE,MAAM,CAAC;IAEtB,0BAA0B;IAC1B,MAAM,EAAE,KAAK,EAAE,CAAC;IAEhB,gDAAgD;IAChD,QAAQ,EAAE,WAAW,CAAC;IAEtB,uBAAuB;IACvB,GAAG,CAAC,EAAE,SAAS,CAAC;IAEhB,gDAAgD;IAChD,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB;AAED,8CAA8C;AAC9C,MAAM,WAAW,YAAY;IAC3B,sBAAsB;IACtB,IAAI,EAAE,MAAM,CAAC;IAEb,mCAAmC;IACnC,QAAQ,EAAE,MAAM,CAAC;IAEjB,4CAA4C;IAC5C,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAE5B,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAEhB,gCAAgC;IAChC,QAAQ,EAAE,WAAW,CAAC;IAEtB,uBAAuB;IACvB,GAAG,CAAC,EAAE,SAAS,CAAC;IAEhB,gDAAgD;IAChD,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB;AAED,kEAAkE;AAClE,MAAM,WAAW,gBAAgB;IAC/B,wDAAwD;IACxD,aAAa,EAAE,iBAAiB,EAAE,CAAC;IAEnC,iCAAiC;IACjC,aAAa,EAAE,MAAM,CAAC;IAEtB,mEAAmE;IACnE,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAE5B,oEAAoE;IACpE,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAE5B,kCAAkC;IAClC,QAAQ,EAAE,WAAW,CAAC;IAEtB,uBAAuB;IACvB,GAAG,CAAC,EAAE,SAAS,CAAC;IAEhB,gDAAgD;IAChD,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB;AAED,0EAA0E;AAC1E,MAAM,WAAW,qBAAqB;IACpC,CAAC,IAAI,EAAE;QACL,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,UAAU,GAAG,MAAM,CAAC;QAC3B,MAAM,CAAC,EAAE,OAAO,CAAC;KAClB,GAAG,IAAI,CAAC;CACV;AAID,gDAAgD;AAChD,MAAM,WAAW,WAAW;IAC1B,qCAAqC;IACrC,OAAO,EAAE,MAAM,CAAC;IAChB,4CAA4C;IAC5C,OAAO,EAAE,MAAM,CAAC;IAChB,+CAA+C;IAC/C,OAAO,EAAE,MAAM,CAAC;CACjB;AAID,+DAA+D;AAC/D,MAAM,WAAW,eAAe;IAC9B,iCAAiC;IACjC,MAAM,EAAE,YAAY,CAAC;IACrB,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,yBAAyB;IACzB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,oEAAoE;IACpE,eAAe,EAAE,MAAM,EAAE,CAAC;CAC3B;AAED,wCAAwC;AACxC,MAAM,MAAM,YAAY,GACpB,OAAO,GACP,aAAa,GACb,OAAO,GACP,cAAc,GACd,QAAQ,GACR,UAAU,GACV,QAAQ,CAAC;AAEb,qCAAqC;AACrC,MAAM,WAAW,cAAc;IAC7B,gCAAgC;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,gEAAgE;IAChE,OAAO,CAAC,EAAE,YAAY,EAAE,CAAC;IACzB,0DAA0D;IAC1D,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,iDAAiD;IACjD,KAAK,CAAC,EAAE,IAAI,CAAC;IACb,uBAAuB;IACvB,GAAG,CAAC,EAAE,SAAS,CAAC;CACjB"}
|
|
@@ -2,8 +2,16 @@
|
|
|
2
2
|
/**
|
|
3
3
|
* @aspectcode/evaluator — core types.
|
|
4
4
|
*
|
|
5
|
-
* Types for probe-based evaluation,
|
|
6
|
-
* evidence-based diagnosis of AGENTS.md quality.
|
|
5
|
+
* Types for probe-based evaluation, probe-and-refine tuning,
|
|
6
|
+
* and evidence-based diagnosis of AGENTS.md quality.
|
|
7
7
|
*/
|
|
8
8
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.DEFAULT_PROBE_REFINE_CONFIG = void 0;
|
|
10
|
+
/** Default probe-and-refine configuration. */
|
|
11
|
+
exports.DEFAULT_PROBE_REFINE_CONFIG = {
|
|
12
|
+
maxIterations: 1,
|
|
13
|
+
targetProbesPerIteration: 5,
|
|
14
|
+
maxEditsPerIteration: 5,
|
|
15
|
+
charBudget: 8000,
|
|
16
|
+
};
|
|
9
17
|
//# sourceMappingURL=types.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":";AAAA;;;;;GAKG"}
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;AAwGH,8CAA8C;AACjC,QAAA,2BAA2B,GAAsB;IAC5D,aAAa,EAAE,CAAC;IAChB,wBAAwB,EAAE,CAAC;IAC3B,oBAAoB,EAAE,CAAC;IACvB,UAAU,EAAE,IAAI;CACjB,CAAC"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aspectcode/evaluator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "1.0.0",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Evidence-based evaluation for Aspect Code",
|
|
6
6
|
"license": "MIT",
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
},
|
|
15
15
|
"repository": {
|
|
16
16
|
"type": "git",
|
|
17
|
-
"url": "https://github.com/
|
|
17
|
+
"url": "https://github.com/Aspect-Code-Labs/aspectcode.git",
|
|
18
18
|
"directory": "packages/evaluator"
|
|
19
19
|
},
|
|
20
20
|
"scripts": {
|
|
@@ -24,8 +24,8 @@
|
|
|
24
24
|
"test": "mocha --require ts-node/register test/**/*.test.ts"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@aspectcode/core": "0.
|
|
28
|
-
"@aspectcode/optimizer": "0.
|
|
27
|
+
"@aspectcode/core": "1.0.0",
|
|
28
|
+
"@aspectcode/optimizer": "1.0.0"
|
|
29
29
|
},
|
|
30
30
|
"optionalDependencies": {
|
|
31
31
|
"better-sqlite3": "^11.0.0"
|
|
@@ -1,15 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @aspectcode/optimizer — public API.
|
|
3
|
-
*
|
|
4
|
-
* Re-exports types and provides the top-level `optimizeInstructions` entry point.
|
|
5
3
|
*/
|
|
6
|
-
export type { LlmProvider, ChatMessage, ChatUsage, ChatResult, ProviderOptions, OptimizeOptions, OptimizeResult, OptimizeStep, OptLogger,
|
|
7
|
-
export {
|
|
8
|
-
export { resolveProvider, loadEnvFile, parseDotenv } from './providers/index';
|
|
9
|
-
export { createOpenAiProvider } from './providers/openai';
|
|
10
|
-
export { createAnthropicProvider } from './providers/anthropic';
|
|
4
|
+
export type { LlmProvider, ChatMessage, ChatOptions, ChatUsage, ChatResult, ProviderOptions, OptimizeOptions, OptimizeResult, OptimizeStep, OptLogger, } from './types';
|
|
5
|
+
export { resolveProvider, loadEnvFile } from './providers/index';
|
|
11
6
|
export { withRetry } from './providers/retry';
|
|
12
|
-
export
|
|
13
|
-
export { runGenerateAgent, runComplaintAgent } from './agent';
|
|
14
|
-
export { buildSystemPrompt, buildGeneratePrompt, truncateKb, buildComplaintPrompt, parseComplaintResponse, } from './prompts';
|
|
7
|
+
export { runGenerateAgent } from './agent';
|
|
15
8
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,YAAY,EACV,WAAW,EACX,WAAW,EACX,WAAW,EACX,SAAS,EACT,UAAU,EACV,eAAe,EACf,eAAe,EACf,cAAc,EACd,YAAY,EACZ,SAAS,GACV,MAAM,SAAS,CAAC;AAEjB,OAAO,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACjE,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC"}
|