aspectcode 0.4.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/agentsMdRenderer.d.ts +16 -0
- package/dist/agentsMdRenderer.d.ts.map +1 -0
- package/dist/agentsMdRenderer.js +137 -0
- package/dist/agentsMdRenderer.js.map +1 -0
- package/dist/auth.d.ts +31 -0
- package/dist/auth.d.ts.map +1 -0
- package/dist/auth.js +385 -0
- package/dist/auth.js.map +1 -0
- package/dist/autoResolve.d.ts +41 -0
- package/dist/autoResolve.d.ts.map +1 -0
- package/dist/autoResolve.js +196 -0
- package/dist/autoResolve.js.map +1 -0
- package/dist/changeEvaluator.d.ts +56 -0
- package/dist/changeEvaluator.d.ts.map +1 -0
- package/dist/changeEvaluator.js +674 -0
- package/dist/changeEvaluator.js.map +1 -0
- package/dist/cli.d.ts +3 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +1 -1
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +37 -17
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +50 -2
- package/dist/config.js.map +1 -1
- package/dist/dreamCycle.d.ts +57 -0
- package/dist/dreamCycle.d.ts.map +1 -0
- package/dist/dreamCycle.js +334 -0
- package/dist/dreamCycle.js.map +1 -0
- package/dist/kbBuilder.d.ts +1 -2
- package/dist/kbBuilder.d.ts.map +1 -1
- package/dist/kbBuilder.js +1 -2
- package/dist/kbBuilder.js.map +1 -1
- package/dist/main.d.ts +2 -1
- package/dist/main.d.ts.map +1 -1
- package/dist/main.js +148 -7
- package/dist/main.js.map +1 -1
- package/dist/optimize.d.ts +13 -6
- package/dist/optimize.d.ts.map +1 -1
- package/dist/optimize.js +433 -142
- package/dist/optimize.js.map +1 -1
- package/dist/pipeline.d.ts +19 -21
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +1093 -160
- package/dist/pipeline.js.map +1 -1
- package/dist/preferences.d.ts +80 -0
- package/dist/preferences.d.ts.map +1 -0
- package/dist/preferences.js +238 -0
- package/dist/preferences.js.map +1 -0
- package/dist/runtimeState.d.ts +30 -0
- package/dist/runtimeState.d.ts.map +1 -0
- package/dist/runtimeState.js +39 -0
- package/dist/runtimeState.js.map +1 -0
- package/dist/scopedRules.d.ts +84 -0
- package/dist/scopedRules.d.ts.map +1 -0
- package/dist/scopedRules.js +449 -0
- package/dist/scopedRules.js.map +1 -0
- package/dist/ui/Dashboard.d.ts +4 -16
- package/dist/ui/Dashboard.d.ts.map +1 -1
- package/dist/ui/Dashboard.js +339 -140
- package/dist/ui/Dashboard.js.map +1 -1
- package/dist/ui/MemoryMap.d.ts +16 -0
- package/dist/ui/MemoryMap.d.ts.map +1 -0
- package/dist/ui/MemoryMap.js +266 -0
- package/dist/ui/MemoryMap.js.map +1 -0
- package/dist/ui/SettingsPanel.d.ts +18 -0
- package/dist/ui/SettingsPanel.d.ts.map +1 -0
- package/dist/ui/SettingsPanel.js +241 -0
- package/dist/ui/SettingsPanel.js.map +1 -0
- package/dist/ui/prompts.d.ts +7 -0
- package/dist/ui/prompts.d.ts.map +1 -1
- package/dist/ui/prompts.js +63 -0
- package/dist/ui/prompts.js.map +1 -1
- package/dist/ui/store.d.ts +154 -18
- package/dist/ui/store.d.ts.map +1 -1
- package/dist/ui/store.js +154 -24
- package/dist/ui/store.js.map +1 -1
- package/dist/ui/theme.d.ts +1 -8
- package/dist/ui/theme.d.ts.map +1 -1
- package/dist/ui/theme.js +2 -21
- package/dist/ui/theme.js.map +1 -1
- package/dist/updateChecker.d.ts +13 -0
- package/dist/updateChecker.d.ts.map +1 -0
- package/dist/updateChecker.js +66 -0
- package/dist/updateChecker.js.map +1 -0
- package/dist/usageTracker.d.ts +12 -0
- package/dist/usageTracker.d.ts.map +1 -0
- package/dist/usageTracker.js +89 -0
- package/dist/usageTracker.js.map +1 -0
- package/dist/writer.d.ts +1 -7
- package/dist/writer.d.ts.map +1 -1
- package/dist/writer.js +1 -11
- package/dist/writer.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/analysis/repo.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/analysis/repo.js +13 -2
- package/node_modules/@aspectcode/core/dist/analysis/repo.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/index.d.ts +1 -3
- package/node_modules/@aspectcode/core/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/index.js +1 -3
- package/node_modules/@aspectcode/core/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.d.ts +14 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.d.ts.map +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.js +191 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.js.map +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/index.d.ts +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/index.js +6 -1
- package/node_modules/@aspectcode/core/dist/parsers/index.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/languages.d.ts +20 -0
- package/node_modules/@aspectcode/core/dist/parsers/languages.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/languages.js +25 -0
- package/node_modules/@aspectcode/core/dist/parsers/languages.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.js +4 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.js.map +1 -1
- package/node_modules/@aspectcode/core/package.json +2 -2
- package/node_modules/@aspectcode/core/parsers/cpp.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/go.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/php.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/ruby.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/rust.wasm +0 -0
- package/node_modules/@aspectcode/emitters/dist/index.d.ts +1 -17
- package/node_modules/@aspectcode/emitters/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/index.js +2 -90
- package/node_modules/@aspectcode/emitters/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/index.d.ts +0 -2
- package/node_modules/@aspectcode/emitters/dist/instructions/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/index.js +1 -7
- package/node_modules/@aspectcode/emitters/dist/instructions/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.d.ts +0 -18
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.js +0 -57
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.d.ts +0 -18
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.js +0 -130
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/index.d.ts +2 -4
- package/node_modules/@aspectcode/emitters/dist/kb/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/index.js +1 -11
- package/node_modules/@aspectcode/emitters/dist/kb/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/package.json +3 -3
- package/node_modules/@aspectcode/evaluator/dist/apply.d.ts +55 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.js +368 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.d.ts +16 -25
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.js +115 -138
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/index.d.ts +8 -43
- package/node_modules/@aspectcode/evaluator/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/index.js +15 -61
- package/node_modules/@aspectcode/evaluator/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/judge.d.ts +32 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.js +165 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.d.ts +15 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.js +41 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/probes.d.ts +20 -47
- package/node_modules/@aspectcode/evaluator/dist/probes.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/probes.js +188 -278
- package/node_modules/@aspectcode/evaluator/dist/probes.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/runner.d.ts +7 -32
- package/node_modules/@aspectcode/evaluator/dist/runner.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/runner.js +21 -146
- package/node_modules/@aspectcode/evaluator/dist/runner.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/types.d.ts +141 -99
- package/node_modules/@aspectcode/evaluator/dist/types.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/types.js +10 -2
- package/node_modules/@aspectcode/evaluator/dist/types.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/package.json +4 -4
- package/node_modules/@aspectcode/optimizer/dist/index.d.ts +3 -10
- package/node_modules/@aspectcode/optimizer/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/index.js +1 -19
- package/node_modules/@aspectcode/optimizer/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.js +40 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.d.ts +9 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.d.ts.map +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.js +83 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.js.map +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/index.d.ts +4 -3
- package/node_modules/@aspectcode/optimizer/dist/providers/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/index.js +24 -10
- package/node_modules/@aspectcode/optimizer/dist/providers/index.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.js +22 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.d.ts +14 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.js +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/types.d.ts +14 -0
- package/node_modules/@aspectcode/optimizer/dist/types.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/types.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/package.json +2 -2
- package/package.json +6 -7
- package/dist/complaintProcessor.d.ts +0 -16
- package/dist/complaintProcessor.d.ts.map +0 -1
- package/dist/complaintProcessor.js +0 -134
- package/dist/complaintProcessor.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/emitter.d.ts +0 -72
- package/node_modules/@aspectcode/emitters/dist/emitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/emitter.js +0 -10
- package/node_modules/@aspectcode/emitters/dist/emitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/content.d.ts +0 -26
- package/node_modules/@aspectcode/emitters/dist/instructions/content.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/content.js +0 -501
- package/node_modules/@aspectcode/emitters/dist/instructions/content.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.d.ts +0 -13
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.js +0 -55
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.d.ts +0 -9
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.js +0 -30
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.d.ts +0 -21
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.js +0 -125
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/manifest.d.ts +0 -37
- package/node_modules/@aspectcode/emitters/dist/manifest.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/manifest.js +0 -50
- package/node_modules/@aspectcode/emitters/dist/manifest.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/report.d.ts +0 -22
- package/node_modules/@aspectcode/emitters/dist/report.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/report.js +0 -3
- package/node_modules/@aspectcode/emitters/dist/report.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/stableJson.d.ts +0 -14
- package/node_modules/@aspectcode/emitters/dist/stableJson.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/stableJson.js +0 -40
- package/node_modules/@aspectcode/emitters/dist/stableJson.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/transaction.d.ts +0 -29
- package/node_modules/@aspectcode/emitters/dist/transaction.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/transaction.js +0 -104
- package/node_modules/@aspectcode/emitters/dist/transaction.js.map +0 -1
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Per-probe judge — evaluates AI responses with strong/partial/missing assessments.
|
|
4
|
+
*
|
|
5
|
+
* For each probe, the judge reviews the simulated response against expected
|
|
6
|
+
* behaviours and proposes targeted AGENTS.md edits.
|
|
7
|
+
*
|
|
8
|
+
* Ported from sweagent_bench oracle/judge.py.
|
|
9
|
+
*/
|
|
10
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
+
exports.parseJudgeResponse = parseJudgeResponse;
|
|
12
|
+
exports.judgeProbe = judgeProbe;
|
|
13
|
+
const llmUtil_1 = require("./llmUtil");
|
|
14
|
+
// ── Prompts ─────────────────────────────────────────────────
|
|
15
|
+
const JUDGE_SYSTEM = `You are an evaluator/editor for AGENTS.md quality.
|
|
16
|
+
You will be given a TASK, the assistant RESPONSE, and EXPECTED BEHAVIORS.
|
|
17
|
+
|
|
18
|
+
Assess each behavior with one of: "strong", "partial", "missing".
|
|
19
|
+
|
|
20
|
+
Return a JSON object with this exact shape:
|
|
21
|
+
{
|
|
22
|
+
"behavior_reviews": [
|
|
23
|
+
{
|
|
24
|
+
"behavior": "...",
|
|
25
|
+
"assessment": "strong|partial|missing",
|
|
26
|
+
"evidence": "short evidence from response",
|
|
27
|
+
"improvement": "what AGENTS.md should add/change"
|
|
28
|
+
}
|
|
29
|
+
],
|
|
30
|
+
"proposed_edits": [
|
|
31
|
+
{"section": "Operating Mode|Procedural Standards|High-Impact Hubs|Entry Points|Import Chains|Validation|Integration Risk|Conventions|Guardrails", "action": "add|modify|strengthen|remove", "content": "..."}
|
|
32
|
+
],
|
|
33
|
+
"overall_notes": "short summary"
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
Rules:
|
|
37
|
+
- Judge whether the response produced a focused, plausible fix grounded in repo evidence.
|
|
38
|
+
- Prefer edits that improve repo-specific guidance, not generic checklists.
|
|
39
|
+
- The "content" field must be the ACTUAL guideline text to appear in AGENTS.md as a bullet point.
|
|
40
|
+
Write it as a direct imperative (e.g. "Verify component exists before importing").
|
|
41
|
+
NEVER write meta-instructions like "Add a step to..." or "Include an example of...".
|
|
42
|
+
- Content must be general enough to help across the repo, not tied to one probe scenario.
|
|
43
|
+
- Edits are optional; return [] if behavior is already strong.
|
|
44
|
+
- Return at most 3 proposed edits.
|
|
45
|
+
- Output ONLY valid JSON.`;
|
|
46
|
+
function buildJudgeUserPrompt(task, response, expectedBehaviors) {
|
|
47
|
+
const behaviors = expectedBehaviors.map((b, i) => `${i + 1}. ${b}`).join('\n');
|
|
48
|
+
return `TASK:\n${task}\n\nRESPONSE:\n${response}\n\nEXPECTED BEHAVIORS:\n${behaviors}\n\nProduce behavior_reviews and proposed_edits JSON.`;
|
|
49
|
+
}
|
|
50
|
+
function parseJudgeResponse(raw) {
|
|
51
|
+
// Strip thinking tags if present
|
|
52
|
+
let cleaned = raw.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
53
|
+
// Strip code fences
|
|
54
|
+
cleaned = cleaned.replace(/^```(?:json)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
|
|
55
|
+
try {
|
|
56
|
+
return JSON.parse(cleaned);
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
// Try to find JSON object in the response
|
|
60
|
+
const match = cleaned.match(/\{[\s\S]*\}/);
|
|
61
|
+
if (match) {
|
|
62
|
+
try {
|
|
63
|
+
return JSON.parse(match[0]);
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
// ── Public API ──────────────────────────────────────────────
|
|
73
|
+
/**
|
|
74
|
+
* Judge a single probe's response against expected behaviours.
|
|
75
|
+
*
|
|
76
|
+
* Returns structured assessments (strong/partial/missing) and
|
|
77
|
+
* up to 3 proposed AGENTS.md edits.
|
|
78
|
+
*/
|
|
79
|
+
async function judgeProbe(options) {
|
|
80
|
+
const { task, response, expectedBehaviors, probeId, provider, log, signal } = options;
|
|
81
|
+
if (signal?.aborted) {
|
|
82
|
+
return {
|
|
83
|
+
probeId,
|
|
84
|
+
task,
|
|
85
|
+
response,
|
|
86
|
+
behaviorReviews: [],
|
|
87
|
+
proposedEdits: [],
|
|
88
|
+
overallNotes: 'Cancelled',
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
log?.debug(`Judging probe: ${probeId}`);
|
|
92
|
+
const userPrompt = buildJudgeUserPrompt(task, response, expectedBehaviors);
|
|
93
|
+
const messages = [
|
|
94
|
+
{ role: 'system', content: JUDGE_SYSTEM },
|
|
95
|
+
{ role: 'user', content: userPrompt },
|
|
96
|
+
];
|
|
97
|
+
let llmResponse;
|
|
98
|
+
try {
|
|
99
|
+
llmResponse = await (0, llmUtil_1.chatWithTemp)(provider, messages, 0.0, signal);
|
|
100
|
+
}
|
|
101
|
+
catch (err) {
|
|
102
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
103
|
+
log?.warn(`Judge call failed for ${probeId}: ${msg}`);
|
|
104
|
+
return {
|
|
105
|
+
probeId,
|
|
106
|
+
task,
|
|
107
|
+
response,
|
|
108
|
+
behaviorReviews: expectedBehaviors.map((b) => ({
|
|
109
|
+
behavior: b,
|
|
110
|
+
assessment: 'missing',
|
|
111
|
+
evidence: '',
|
|
112
|
+
improvement: `Judge call failed: ${msg}`,
|
|
113
|
+
})),
|
|
114
|
+
proposedEdits: [],
|
|
115
|
+
overallNotes: `Judge call failed: ${msg}`,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
const parsed = parseJudgeResponse(llmResponse);
|
|
119
|
+
if (!parsed) {
|
|
120
|
+
log?.warn(`Could not parse judge response for ${probeId}`);
|
|
121
|
+
return {
|
|
122
|
+
probeId,
|
|
123
|
+
task,
|
|
124
|
+
response,
|
|
125
|
+
behaviorReviews: expectedBehaviors.map((b) => ({
|
|
126
|
+
behavior: b,
|
|
127
|
+
assessment: 'missing',
|
|
128
|
+
evidence: '',
|
|
129
|
+
improvement: 'Could not parse judge response',
|
|
130
|
+
})),
|
|
131
|
+
proposedEdits: [],
|
|
132
|
+
overallNotes: 'Failed to parse judge response',
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
// Map behavior reviews
|
|
136
|
+
const behaviorReviews = (parsed.behavior_reviews || []).map((br) => ({
|
|
137
|
+
behavior: br.behavior,
|
|
138
|
+
assessment: (['strong', 'partial', 'missing'].includes(br.assessment)
|
|
139
|
+
? br.assessment
|
|
140
|
+
: 'missing'),
|
|
141
|
+
evidence: br.evidence || '',
|
|
142
|
+
improvement: br.improvement || '',
|
|
143
|
+
}));
|
|
144
|
+
// Map proposed edits
|
|
145
|
+
const proposedEdits = (parsed.proposed_edits || [])
|
|
146
|
+
.slice(0, 3)
|
|
147
|
+
.filter((e) => e.section && e.action && e.content)
|
|
148
|
+
.map((e) => ({
|
|
149
|
+
section: e.section,
|
|
150
|
+
action: (['add', 'modify', 'strengthen', 'remove'].includes(e.action)
|
|
151
|
+
? e.action
|
|
152
|
+
: 'add'),
|
|
153
|
+
content: e.content,
|
|
154
|
+
motivatedBy: [probeId],
|
|
155
|
+
}));
|
|
156
|
+
return {
|
|
157
|
+
probeId,
|
|
158
|
+
task,
|
|
159
|
+
response,
|
|
160
|
+
behaviorReviews,
|
|
161
|
+
proposedEdits,
|
|
162
|
+
overallNotes: parsed.overall_notes || '',
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
//# sourceMappingURL=judge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"judge.js","sourceRoot":"","sources":["../src/judge.ts"],"names":[],"mappings":";AAAA;;;;;;;GAOG;;AAuEH,gDAqBC;AAUD,gCA6FC;AA1LD,uCAAyC;AAEzC,+DAA+D;AAE/D,MAAM,YAAY,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;0BA8BK,CAAC;AAE3B,SAAS,oBAAoB,CAC3B,IAAY,EACZ,QAAgB,EAChB,iBAA2B;IAE3B,MAAM,SAAS,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/E,OAAO,UAAU,IAAI,kBAAkB,QAAQ,4BAA4B,SAAS,uDAAuD,CAAC;AAC9I,CAAC;AAmBD,SAAgB,kBAAkB,CAAC,GAAW;IAC5C,iCAAiC;IACjC,IAAI,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,2BAA2B,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAElE,oBAAoB;IACpB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,sBAAsB,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAExF,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAkB,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,0CAA0C;QAC1C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC3C,IAAI,KAAK,EAAE,CAAC;YACV,IAAI,CAAC;gBACH,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAkB,CAAC;YAC/C,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,+DAA+D;AAE/D;;;;;GAKG;AACI,KAAK,UAAU,UAAU,CAAC,OAAqB;IACpD,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,iBAAiB,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAEtF,IAAI,MAAM,EAAE,OAAO,EAAE,CAAC;QACpB,OAAO;YACL,OAAO;YACP,IAAI;YACJ,QAAQ;YACR,eAAe,EAAE,EAAE;YACnB,aAAa,EAAE,EAAE;YACjB,YAAY,EAAE,WAAW;SAC1B,CAAC;IACJ,CAAC;IAED,GAAG,EAAE,KAAK,CAAC,kBAAkB,OAAO,EAAE,CAAC,CAAC;IAExC,MAAM,UAAU,GAAG,oBAAoB,CAAC,IAAI,EAAE,QAAQ,EAAE,iBAAiB,CAAC,CAAC;IAC3E,MAAM,QAAQ,GAAkB;QAC9B,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE;QACzC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE;KACtC,CAAC;IAEF,IAAI,WAAmB,CAAC;IACxB,IAAI,CAAC;QACH,WAAW,GAAG,MAAM,IAAA,sBAAY,EAAC,QAAQ,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;IACpE,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC7D,GAAG,EAAE,IAAI,CAAC,yBAAyB,OAAO,KAAK,GAAG,EAAE,CAAC,CAAC;QACtD,OAAO;YACL,OAAO;YACP,IAAI;YACJ,QAAQ;YACR,eAAe,EAAE,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC7C,QAAQ,EAAE,CAAC;gBACX,UAAU,EAAE,SAAkB;gBAC9B,QAAQ,EAAE,EAAE;gBACZ,WAAW,EAAE,sBAAsB,GAAG,EAAE;aACzC,CAAC,CAAC;YACH,aAAa,EAAE,EAAE;YACjB,YAAY,EAAE,sBAAsB,GAAG,EAAE;SAC1C,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;IAE/C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,GAAG,EAAE,IAAI,CAAC,sCAAsC,OAAO,EAAE,CAAC,CAAC;QAC3D,OAAO;YACL,OAAO;YACP,IAAI;YACJ,QAAQ;YACR,eAAe,EAAE,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC7C,QAAQ,EAAE,CAAC;gBACX,UAAU,EAAE,SAAkB;gBAC9B,QAAQ,EAAE,EAAE;gBACZ,WAAW,EAAE,gCAAgC;aAC9C,CAAC,CAAC;YACH,aAAa,EAAE,EAAE;YACjB,YAAY,EAAE,gCAAgC;SAC/C,CAAC;IACJ,CAAC;IAED,uBAAuB;IACvB,MAAM,eAAe,GAAqB,CAAC,MAAM,CAAC,gBAAgB,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;QACrF,QAAQ,EAAE,EAAE,CAAC,QAAQ;QACrB,UAAU,EAAE,CAAC,CAAC,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,UAAU,CAAC;YACnE,CAAC,CAAC,EAAE,CAAC,UAAU;YACf,CAAC,CAAC,SAAS,CAAiC;QAC9C,QAAQ,EAAE,EAAE,CAAC,QAAQ,IAAI,EAAE;QAC3B,WAAW,EAAE,EAAE,CAAC,WAAW,IAAI,EAAE;KAClC,CAAC,CAAC,CAAC;IAEJ,qBAAqB;IACrB,MAAM,aAAa,GAAiB,CAAC,MAAM,CAAC,cAAc,IAAI,EAAE,CAAC;SAC9D,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;SACX,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,OAAO,CAAC;SACjD,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACX,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,MAAM,EAAE,CAAC,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC;YACnE,CAAC,CAAC,CAAC,CAAC,MAAM;YACV,CAAC,CAAC,KAAK,CAAyB;QAClC,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,WAAW,EAAE,CAAC,OAAO,CAAC;KACvB,CAAC,CAAC,CAAC;IAEN,OAAO;QACL,OAAO;QACP,IAAI;QACJ,QAAQ;QACR,eAAe;QACf,aAAa;QACb,YAAY,EAAE,MAAM,CAAC,aAAa,IAAI,EAAE;KACzC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM utility — helpers for calling providers with per-call options.
|
|
3
|
+
*/
|
|
4
|
+
import type { ChatMessage } from '@aspectcode/optimizer';
|
|
5
|
+
import type { LlmProvider } from './types';
|
|
6
|
+
/**
|
|
7
|
+
* Call the LLM with a specific temperature.
|
|
8
|
+
* Uses `chatWithOptions` if available, falls back to `chat()`.
|
|
9
|
+
*
|
|
10
|
+
* When an AbortSignal is provided and fires, the returned promise
|
|
11
|
+
* rejects immediately (the underlying HTTP call may still finish
|
|
12
|
+
* in the background, but the caller stops waiting).
|
|
13
|
+
*/
|
|
14
|
+
export declare function chatWithTemp(provider: LlmProvider, messages: ChatMessage[], temperature: number, signal?: AbortSignal): Promise<string>;
|
|
15
|
+
//# sourceMappingURL=llmUtil.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llmUtil.d.ts","sourceRoot":"","sources":["../src/llmUtil.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAE3C;;;;;;;GAOG;AACH,wBAAsB,YAAY,CAChC,QAAQ,EAAE,WAAW,EACrB,QAAQ,EAAE,WAAW,EAAE,EACvB,WAAW,EAAE,MAAM,EACnB,MAAM,CAAC,EAAE,WAAW,GACnB,OAAO,CAAC,MAAM,CAAC,CA0BjB"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* LLM utility — helpers for calling providers with per-call options.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.chatWithTemp = chatWithTemp;
|
|
7
|
+
/**
|
|
8
|
+
* Call the LLM with a specific temperature.
|
|
9
|
+
* Uses `chatWithOptions` if available, falls back to `chat()`.
|
|
10
|
+
*
|
|
11
|
+
* When an AbortSignal is provided and fires, the returned promise
|
|
12
|
+
* rejects immediately (the underlying HTTP call may still finish
|
|
13
|
+
* in the background, but the caller stops waiting).
|
|
14
|
+
*/
|
|
15
|
+
async function chatWithTemp(provider, messages, temperature, signal) {
|
|
16
|
+
if (signal?.aborted)
|
|
17
|
+
throw new DOMException('Aborted', 'AbortError');
|
|
18
|
+
const chatPromise = provider.chatWithOptions
|
|
19
|
+
? provider.chatWithOptions(messages, { temperature })
|
|
20
|
+
: provider.chat(messages);
|
|
21
|
+
if (!signal)
|
|
22
|
+
return chatPromise;
|
|
23
|
+
// Race the chat against the abort signal, cleaning up the listener afterward
|
|
24
|
+
let cleanup;
|
|
25
|
+
const abortPromise = new Promise((_, reject) => {
|
|
26
|
+
if (signal.aborted) {
|
|
27
|
+
reject(new DOMException('Aborted', 'AbortError'));
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
const handler = () => reject(new DOMException('Aborted', 'AbortError'));
|
|
31
|
+
signal.addEventListener('abort', handler, { once: true });
|
|
32
|
+
cleanup = () => signal.removeEventListener('abort', handler);
|
|
33
|
+
});
|
|
34
|
+
try {
|
|
35
|
+
return await Promise.race([chatPromise, abortPromise]);
|
|
36
|
+
}
|
|
37
|
+
finally {
|
|
38
|
+
cleanup?.();
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
//# sourceMappingURL=llmUtil.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llmUtil.js","sourceRoot":"","sources":["../src/llmUtil.ts"],"names":[],"mappings":";AAAA;;GAEG;;AAaH,oCA+BC;AAvCD;;;;;;;GAOG;AACI,KAAK,UAAU,YAAY,CAChC,QAAqB,EACrB,QAAuB,EACvB,WAAmB,EACnB,MAAoB;IAEpB,IAAI,MAAM,EAAE,OAAO;QAAE,MAAM,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IAErE,MAAM,WAAW,GAAG,QAAQ,CAAC,eAAe;QAC1C,CAAC,CAAC,QAAQ,CAAC,eAAe,CAAC,QAAQ,EAAE,EAAE,WAAW,EAAE,CAAC;QACrD,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAE5B,IAAI,CAAC,MAAM;QAAE,OAAO,WAAW,CAAC;IAEhC,6EAA6E;IAC7E,IAAI,OAAiC,CAAC;IACtC,MAAM,YAAY,GAAG,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE;QACpD,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,CAAC,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC,CAAC;YAClD,OAAO;QACT,CAAC;QACD,MAAM,OAAO,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC,CAAC;QACxE,MAAM,CAAC,gBAAgB,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,OAAO,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,OAAO,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,YAAY,CAAC,CAAC,CAAC;IACzD,CAAC;YAAS,CAAC;QACT,OAAO,EAAE,EAAE,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
@@ -1,56 +1,29 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* LLM-powered probe generator — creates synthetic bug-fix tasks.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
4
|
+
* Each probe is a realistic coding-assistant request with expected
|
|
5
|
+
* behaviours. Probes are generated via LLM at temperature 0.9 for
|
|
6
|
+
* diversity, with deduplication across iterations and a fallback
|
|
7
|
+
* pool of hardcoded templates.
|
|
8
8
|
*
|
|
9
|
-
*
|
|
10
|
-
* to an LLM with AGENTS.md as context and evaluating the response.
|
|
9
|
+
* Ported from sweagent_bench oracle/probes.py.
|
|
11
10
|
*/
|
|
12
11
|
import type { Probe, ProbeGeneratorOptions } from './types';
|
|
13
|
-
|
|
14
|
-
declare function
|
|
12
|
+
export declare function normalizeProbeText(text: string): string;
|
|
13
|
+
export declare function isDuplicate(task: string, existing: string[]): boolean;
|
|
14
|
+
interface RawProbe {
|
|
15
|
+
task: string;
|
|
16
|
+
expected_behaviors: string[];
|
|
17
|
+
rationale?: string;
|
|
18
|
+
}
|
|
19
|
+
export declare function parseProbeResponse(raw: string): RawProbe[];
|
|
15
20
|
/**
|
|
16
|
-
*
|
|
21
|
+
* Generate probes via LLM with fallback to hardcoded templates.
|
|
17
22
|
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
23
|
+
* Uses temperature 0.9 for diverse probe generation. Deduplicates
|
|
24
|
+
* against prior tasks across iterations. Falls back to a pool of
|
|
25
|
+
* hardcoded templates when LLM generation fails.
|
|
21
26
|
*/
|
|
22
|
-
declare function
|
|
23
|
-
|
|
24
|
-
inDegree: number;
|
|
25
|
-
outDegree: number;
|
|
26
|
-
}>;
|
|
27
|
-
/**
|
|
28
|
-
* Parse "Entry Points" from architecture section.
|
|
29
|
-
*
|
|
30
|
-
* Handles two formats:
|
|
31
|
-
* - Legacy table: | File | Kind |
|
|
32
|
-
* - Emitter bullet: - 🟢 `path`: reason (under ### Runtime / Tooling sub-headings)
|
|
33
|
-
*/
|
|
34
|
-
declare function parseEntryPoints(architecture: string): Array<{
|
|
35
|
-
file: string;
|
|
36
|
-
kind: string;
|
|
37
|
-
}>;
|
|
38
|
-
/**
|
|
39
|
-
* Parse naming conventions from the map section.
|
|
40
|
-
*
|
|
41
|
-
* Handles both simple bullet lists and the structured emitter output
|
|
42
|
-
* (tables for file naming, bullets for function naming / framework patterns).
|
|
43
|
-
* Extracts a flat list of convention strings for probe generation.
|
|
44
|
-
*/
|
|
45
|
-
declare function parseConventions(mapSection: string): string[];
|
|
46
|
-
/** Parse file paths mentioned in a diff string. */
|
|
47
|
-
declare function parseDiffFiles(diff: string): string[];
|
|
48
|
-
/**
|
|
49
|
-
* Generate probes scoped to the KB content and optional diff.
|
|
50
|
-
*
|
|
51
|
-
* When a diff is provided, probes focus on changed areas.
|
|
52
|
-
* Otherwise, probes cover the full KB (hubs, entry points, conventions).
|
|
53
|
-
*/
|
|
54
|
-
export declare function generateProbes(options: ProbeGeneratorOptions): Probe[];
|
|
55
|
-
export { extractSection, parseHubs, parseEntryPoints, parseConventions, parseDiffFiles };
|
|
27
|
+
export declare function generateProbes(options: ProbeGeneratorOptions): Promise<Probe[]>;
|
|
28
|
+
export {};
|
|
56
29
|
//# sourceMappingURL=probes.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"probes.d.ts","sourceRoot":"","sources":["../src/probes.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"probes.d.ts","sourceRoot":"","sources":["../src/probes.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,OAAO,KAAK,EACV,KAAK,EACL,qBAAqB,EACtB,MAAM,SAAS,CAAC;AA8FjB,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMvD;AAED,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAMrE;AAID,UAAU,QAAQ;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,kBAAkB,EAAE,MAAM,EAAE,CAAC;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,QAAQ,EAAE,CAuB1D;AAID;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,qBAAqB,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC,CA0FrF"}
|