aspectcode 0.4.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/agentsMdRenderer.d.ts +16 -0
- package/dist/agentsMdRenderer.d.ts.map +1 -0
- package/dist/agentsMdRenderer.js +137 -0
- package/dist/agentsMdRenderer.js.map +1 -0
- package/dist/auth.d.ts +31 -0
- package/dist/auth.d.ts.map +1 -0
- package/dist/auth.js +386 -0
- package/dist/auth.js.map +1 -0
- package/dist/autoResolve.d.ts +41 -0
- package/dist/autoResolve.d.ts.map +1 -0
- package/dist/autoResolve.js +196 -0
- package/dist/autoResolve.js.map +1 -0
- package/dist/changeEvaluator.d.ts +56 -0
- package/dist/changeEvaluator.d.ts.map +1 -0
- package/dist/changeEvaluator.js +674 -0
- package/dist/changeEvaluator.js.map +1 -0
- package/dist/cli.d.ts +3 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +1 -1
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +37 -17
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +50 -2
- package/dist/config.js.map +1 -1
- package/dist/dreamCycle.d.ts +57 -0
- package/dist/dreamCycle.d.ts.map +1 -0
- package/dist/dreamCycle.js +334 -0
- package/dist/dreamCycle.js.map +1 -0
- package/dist/kbBuilder.d.ts +1 -2
- package/dist/kbBuilder.d.ts.map +1 -1
- package/dist/kbBuilder.js +1 -2
- package/dist/kbBuilder.js.map +1 -1
- package/dist/main.d.ts +2 -1
- package/dist/main.d.ts.map +1 -1
- package/dist/main.js +148 -7
- package/dist/main.js.map +1 -1
- package/dist/optimize.d.ts +13 -6
- package/dist/optimize.d.ts.map +1 -1
- package/dist/optimize.js +433 -142
- package/dist/optimize.js.map +1 -1
- package/dist/pipeline.d.ts +19 -21
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +1093 -160
- package/dist/pipeline.js.map +1 -1
- package/dist/preferences.d.ts +80 -0
- package/dist/preferences.d.ts.map +1 -0
- package/dist/preferences.js +238 -0
- package/dist/preferences.js.map +1 -0
- package/dist/runtimeState.d.ts +30 -0
- package/dist/runtimeState.d.ts.map +1 -0
- package/dist/runtimeState.js +39 -0
- package/dist/runtimeState.js.map +1 -0
- package/dist/scopedRules.d.ts +84 -0
- package/dist/scopedRules.d.ts.map +1 -0
- package/dist/scopedRules.js +449 -0
- package/dist/scopedRules.js.map +1 -0
- package/dist/ui/Dashboard.d.ts +4 -16
- package/dist/ui/Dashboard.d.ts.map +1 -1
- package/dist/ui/Dashboard.js +339 -140
- package/dist/ui/Dashboard.js.map +1 -1
- package/dist/ui/MemoryMap.d.ts +16 -0
- package/dist/ui/MemoryMap.d.ts.map +1 -0
- package/dist/ui/MemoryMap.js +266 -0
- package/dist/ui/MemoryMap.js.map +1 -0
- package/dist/ui/SettingsPanel.d.ts +18 -0
- package/dist/ui/SettingsPanel.d.ts.map +1 -0
- package/dist/ui/SettingsPanel.js +241 -0
- package/dist/ui/SettingsPanel.js.map +1 -0
- package/dist/ui/prompts.d.ts +7 -0
- package/dist/ui/prompts.d.ts.map +1 -1
- package/dist/ui/prompts.js +63 -0
- package/dist/ui/prompts.js.map +1 -1
- package/dist/ui/store.d.ts +154 -18
- package/dist/ui/store.d.ts.map +1 -1
- package/dist/ui/store.js +154 -24
- package/dist/ui/store.js.map +1 -1
- package/dist/ui/theme.d.ts +1 -8
- package/dist/ui/theme.d.ts.map +1 -1
- package/dist/ui/theme.js +2 -21
- package/dist/ui/theme.js.map +1 -1
- package/dist/updateChecker.d.ts +13 -0
- package/dist/updateChecker.d.ts.map +1 -0
- package/dist/updateChecker.js +66 -0
- package/dist/updateChecker.js.map +1 -0
- package/dist/usageTracker.d.ts +12 -0
- package/dist/usageTracker.d.ts.map +1 -0
- package/dist/usageTracker.js +89 -0
- package/dist/usageTracker.js.map +1 -0
- package/dist/writer.d.ts +1 -7
- package/dist/writer.d.ts.map +1 -1
- package/dist/writer.js +1 -11
- package/dist/writer.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/analysis/repo.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/analysis/repo.js +13 -2
- package/node_modules/@aspectcode/core/dist/analysis/repo.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/index.d.ts +1 -3
- package/node_modules/@aspectcode/core/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/index.js +1 -3
- package/node_modules/@aspectcode/core/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.d.ts +14 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.d.ts.map +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.js +191 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.js.map +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/index.d.ts +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/index.js +6 -1
- package/node_modules/@aspectcode/core/dist/parsers/index.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/languages.d.ts +20 -0
- package/node_modules/@aspectcode/core/dist/parsers/languages.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/languages.js +25 -0
- package/node_modules/@aspectcode/core/dist/parsers/languages.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.js +4 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.js.map +1 -1
- package/node_modules/@aspectcode/core/package.json +2 -2
- package/node_modules/@aspectcode/core/parsers/cpp.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/go.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/php.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/ruby.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/rust.wasm +0 -0
- package/node_modules/@aspectcode/emitters/dist/index.d.ts +1 -17
- package/node_modules/@aspectcode/emitters/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/index.js +2 -90
- package/node_modules/@aspectcode/emitters/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/index.d.ts +0 -2
- package/node_modules/@aspectcode/emitters/dist/instructions/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/index.js +1 -7
- package/node_modules/@aspectcode/emitters/dist/instructions/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.d.ts +0 -18
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.js +0 -57
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.d.ts +0 -18
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.js +0 -130
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/index.d.ts +2 -4
- package/node_modules/@aspectcode/emitters/dist/kb/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/index.js +1 -11
- package/node_modules/@aspectcode/emitters/dist/kb/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/package.json +3 -3
- package/node_modules/@aspectcode/evaluator/dist/apply.d.ts +55 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.js +368 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.d.ts +16 -25
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.js +115 -138
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/index.d.ts +8 -43
- package/node_modules/@aspectcode/evaluator/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/index.js +15 -61
- package/node_modules/@aspectcode/evaluator/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/judge.d.ts +32 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.js +165 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.d.ts +15 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.js +41 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/probes.d.ts +20 -47
- package/node_modules/@aspectcode/evaluator/dist/probes.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/probes.js +188 -278
- package/node_modules/@aspectcode/evaluator/dist/probes.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/runner.d.ts +7 -32
- package/node_modules/@aspectcode/evaluator/dist/runner.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/runner.js +21 -146
- package/node_modules/@aspectcode/evaluator/dist/runner.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/types.d.ts +141 -99
- package/node_modules/@aspectcode/evaluator/dist/types.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/types.js +10 -2
- package/node_modules/@aspectcode/evaluator/dist/types.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/package.json +4 -4
- package/node_modules/@aspectcode/optimizer/dist/index.d.ts +3 -10
- package/node_modules/@aspectcode/optimizer/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/index.js +1 -19
- package/node_modules/@aspectcode/optimizer/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.js +40 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.d.ts +9 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.d.ts.map +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.js +83 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.js.map +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/index.d.ts +4 -3
- package/node_modules/@aspectcode/optimizer/dist/providers/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/index.js +24 -10
- package/node_modules/@aspectcode/optimizer/dist/providers/index.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.js +22 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.d.ts +14 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.js +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/types.d.ts +14 -0
- package/node_modules/@aspectcode/optimizer/dist/types.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/types.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/package.json +2 -2
- package/node_modules/web-tree-sitter/LICENSE +21 -0
- package/node_modules/web-tree-sitter/README.md +198 -0
- package/node_modules/web-tree-sitter/package.json +36 -0
- package/node_modules/web-tree-sitter/tree-sitter-web.d.ts +204 -0
- package/node_modules/web-tree-sitter/tree-sitter.js +1 -0
- package/node_modules/web-tree-sitter/tree-sitter.wasm +0 -0
- package/package.json +8 -8
- package/dist/complaintProcessor.d.ts +0 -16
- package/dist/complaintProcessor.d.ts.map +0 -1
- package/dist/complaintProcessor.js +0 -134
- package/dist/complaintProcessor.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/emitter.d.ts +0 -72
- package/node_modules/@aspectcode/emitters/dist/emitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/emitter.js +0 -10
- package/node_modules/@aspectcode/emitters/dist/emitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/content.d.ts +0 -26
- package/node_modules/@aspectcode/emitters/dist/instructions/content.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/content.js +0 -501
- package/node_modules/@aspectcode/emitters/dist/instructions/content.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.d.ts +0 -13
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.js +0 -55
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.d.ts +0 -9
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.js +0 -30
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.d.ts +0 -21
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.js +0 -125
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/manifest.d.ts +0 -37
- package/node_modules/@aspectcode/emitters/dist/manifest.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/manifest.js +0 -50
- package/node_modules/@aspectcode/emitters/dist/manifest.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/report.d.ts +0 -22
- package/node_modules/@aspectcode/emitters/dist/report.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/report.js +0 -3
- package/node_modules/@aspectcode/emitters/dist/report.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/stableJson.d.ts +0 -14
- package/node_modules/@aspectcode/emitters/dist/stableJson.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/stableJson.js +0 -40
- package/node_modules/@aspectcode/emitters/dist/stableJson.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/transaction.d.ts +0 -29
- package/node_modules/@aspectcode/emitters/dist/transaction.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/transaction.js +0 -104
- package/node_modules/@aspectcode/emitters/dist/transaction.js.map +0 -1
|
@@ -1,172 +1,149 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
/**
|
|
3
|
-
*
|
|
3
|
+
* Aggregate diagnosis — analyzes judged probe results and proposes AGENTS.md edits.
|
|
4
4
|
*
|
|
5
|
-
* Takes
|
|
6
|
-
*
|
|
5
|
+
* Takes all judged probe results (including behaviour reviews and per-probe edits),
|
|
6
|
+
* aggregates the failures, and proposes up to 6 targeted AGENTS.md edits.
|
|
7
|
+
*
|
|
8
|
+
* Ported from sweagent_bench oracle/diagnose.py.
|
|
7
9
|
*/
|
|
8
10
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
+
exports.parseDiagnoseResponse = parseDiagnoseResponse;
|
|
9
12
|
exports.diagnose = diagnose;
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Build the diagnosis prompt from failed probe results.
|
|
15
|
-
*/
|
|
16
|
-
function buildDiagnosisPrompt(failures, agentsContent) {
|
|
17
|
-
const failureSummaries = failures.map((f, i) => {
|
|
18
|
-
const shortcomings = f.shortcomings.map((s) => ` - ${s}`).join('\n');
|
|
19
|
-
return `${i + 1}. Probe: ${f.probeId}\n Shortcomings:\n${shortcomings}`;
|
|
20
|
-
}).join('\n\n');
|
|
21
|
-
return `You are diagnosing why an AI coding assistant's AGENTS.md instructions
|
|
22
|
-
are failing to guide it correctly. Below are probe test results showing
|
|
23
|
-
specific scenarios where the AI fell short.
|
|
13
|
+
const llmUtil_1 = require("./llmUtil");
|
|
14
|
+
// ── Prompts ─────────────────────────────────────────────────
|
|
15
|
+
const DIAGNOSE_SYSTEM = `You are an expert context editor for AI coding assistants. You manage AGENTS.md (general guidance) and scoped rules (directory-specific guidance).
|
|
24
16
|
|
|
25
|
-
|
|
26
|
-
|
|
17
|
+
Output a JSON array of edit objects, each with:
|
|
18
|
+
- "section": an AGENTS.md section OR "scoped:slug" to edit a scoped rule OR "scoped:CREATE:slug" to create one OR "scoped:DELETE:slug" to remove one
|
|
19
|
+
- "action": one of "add", "modify", "strengthen", "remove"
|
|
20
|
+
- "content": the specific text (for AGENTS.md: a bullet point; for scoped rules: full markdown body)
|
|
21
|
+
- "globs": (only for scoped:CREATE) array of glob patterns, e.g. ["src/core/**"]
|
|
22
|
+
- "description": (only for scoped:CREATE) short description of the rule
|
|
27
23
|
|
|
28
|
-
|
|
29
|
-
${agentsContent}
|
|
24
|
+
AGENTS.md sections: "Operating Mode", "Procedural Standards", "High-Impact Hubs", "Entry Points", "Import Chains", "Validation", "Integration Risk", "Conventions", "Guardrails", "Setup"
|
|
30
25
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
26
|
+
Rules:
|
|
27
|
+
- STRONGLY prefer editing AGENTS.md over creating scoped rules. Scoped rules are only for content that is truly directory-specific and would be misleading if applied globally.
|
|
28
|
+
- Do NOT create scoped rules for naming conventions alone — that belongs in AGENTS.md.
|
|
29
|
+
- Keep edits specific and actionable. Write direct imperatives.
|
|
30
|
+
- Content must be general enough to apply broadly, not tied to one probe scenario.
|
|
31
|
+
- Use "modify"/"strengthen" to refine existing guidance before adding new rules.
|
|
32
|
+
- You may delete scoped rules that are redundant, trivial, or already covered by AGENTS.md.
|
|
33
|
+
- Edits are optional: return [] when guidance is already strong.
|
|
34
|
+
- Return at most 8 edits total.
|
|
35
|
+
- Keep AGENTS.md under 8,000 characters.
|
|
36
|
+
- Output ONLY the JSON array.`;
|
|
37
|
+
function buildDiagnoseUserPrompt(agentsMd, results, scopedRulesContext, staticAnalysisData) {
|
|
38
|
+
const diagnostics = results.map((r, i) => {
|
|
39
|
+
const reviews = r.behaviorReviews
|
|
40
|
+
.map((br) => ` * Behavior: ${br.behavior} | Assessment: ${br.assessment} | Evidence: ${br.evidence} | Improvement: ${br.improvement}`)
|
|
41
|
+
.join('\n');
|
|
42
|
+
const edits = r.proposedEdits
|
|
43
|
+
.map((e) => ` * ProposedEdit: ${e.action}@${e.section}: ${e.content}`)
|
|
44
|
+
.join('\n');
|
|
45
|
+
return `- Probe ${i + 1}: ${r.task}\n${reviews}\n * Overall: ${r.overallNotes}\n${edits}`;
|
|
46
|
+
}).join('\n\n');
|
|
47
|
+
let prompt = `CURRENT AGENTS.MD:
|
|
48
|
+
---
|
|
49
|
+
${agentsMd}
|
|
50
|
+
---`;
|
|
51
|
+
if (scopedRulesContext) {
|
|
52
|
+
prompt += `
|
|
36
53
|
|
|
37
|
-
|
|
54
|
+
CURRENT SCOPED RULES:
|
|
55
|
+
---
|
|
56
|
+
${scopedRulesContext}
|
|
57
|
+
---`;
|
|
58
|
+
}
|
|
59
|
+
if (staticAnalysisData) {
|
|
60
|
+
prompt += `
|
|
38
61
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
MOTIVATED_BY: <comma-separated probe IDs>
|
|
62
|
+
STATIC ANALYSIS DATA:
|
|
63
|
+
${staticAnalysisData}`;
|
|
64
|
+
}
|
|
65
|
+
prompt += `
|
|
44
66
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
ACTION: ...
|
|
48
|
-
CONTENT: ...
|
|
49
|
-
MOTIVATED_BY: ...
|
|
67
|
+
PROBE DIAGNOSTICS:
|
|
68
|
+
${diagnostics}
|
|
50
69
|
|
|
51
|
-
|
|
70
|
+
Propose edits to improve the guidance. You may edit AGENTS.md sections, create/update/delete scoped rules, or return [] if no changes needed.`;
|
|
71
|
+
return prompt;
|
|
52
72
|
}
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
while ((match = editRegex.exec(response)) !== null) {
|
|
63
|
-
edits.push({
|
|
64
|
-
section: match[1].trim(),
|
|
65
|
-
action: match[2].trim().toLowerCase(),
|
|
66
|
-
content: match[3].trim(),
|
|
67
|
-
motivatedBy: match[4].trim().split(/,\s*/).filter(Boolean),
|
|
68
|
-
});
|
|
73
|
+
function parseDiagnoseResponse(raw) {
|
|
74
|
+
// Strip thinking tags
|
|
75
|
+
let cleaned = raw.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
76
|
+
// Strip code fences
|
|
77
|
+
cleaned = cleaned.replace(/^```(?:json)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
|
|
78
|
+
try {
|
|
79
|
+
const parsed = JSON.parse(cleaned);
|
|
80
|
+
if (Array.isArray(parsed))
|
|
81
|
+
return parsed;
|
|
69
82
|
}
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
const
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
content: match[3].trim(),
|
|
80
|
-
motivatedBy: match[4].trim().split(/,\s*/).filter(Boolean),
|
|
81
|
-
});
|
|
83
|
+
catch {
|
|
84
|
+
// Try to find JSON array
|
|
85
|
+
const match = cleaned.match(/\[[\s\S]*\]/);
|
|
86
|
+
if (match) {
|
|
87
|
+
try {
|
|
88
|
+
return JSON.parse(match[0]);
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
// fall through
|
|
82
92
|
}
|
|
83
93
|
}
|
|
84
94
|
}
|
|
85
|
-
return
|
|
95
|
+
return [];
|
|
86
96
|
}
|
|
97
|
+
// ── Public API ──────────────────────────────────────────────
|
|
87
98
|
/**
|
|
88
|
-
* Diagnose AGENTS.md shortcomings from
|
|
99
|
+
* Diagnose AGENTS.md shortcomings from judged probe results.
|
|
89
100
|
*
|
|
90
|
-
*
|
|
91
|
-
*
|
|
101
|
+
* Aggregates all probe behavior reviews and per-probe edits,
|
|
102
|
+
* then proposes up to 6 aggregate edits via a single LLM call.
|
|
92
103
|
*/
|
|
93
|
-
async function diagnose(
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
if (signal?.aborted)
|
|
98
|
-
return
|
|
104
|
+
async function diagnose(options) {
|
|
105
|
+
const { judgedResults, agentsContent, provider, log, signal, scopedRulesContext, staticAnalysisData } = options;
|
|
106
|
+
if (judgedResults.length === 0)
|
|
107
|
+
return [];
|
|
108
|
+
if (signal?.aborted)
|
|
109
|
+
return [];
|
|
110
|
+
// Only send probes that have non-strong behaviours
|
|
111
|
+
const weakResults = judgedResults.filter((r) => r.behaviorReviews.some((br) => br.assessment !== 'strong'));
|
|
112
|
+
if (weakResults.length === 0) {
|
|
113
|
+
log?.info('All probes assessed as strong — no diagnosis needed.');
|
|
114
|
+
return [];
|
|
99
115
|
}
|
|
100
|
-
log?.info(`Diagnosing ${
|
|
101
|
-
const
|
|
116
|
+
log?.info(`Diagnosing ${weakResults.length} probe result${weakResults.length === 1 ? '' : 's'} with weak behaviors…`);
|
|
117
|
+
const userPrompt = buildDiagnoseUserPrompt(agentsContent, weakResults, scopedRulesContext, staticAnalysisData);
|
|
102
118
|
const messages = [
|
|
103
|
-
{ role: '
|
|
119
|
+
{ role: 'system', content: DIAGNOSE_SYSTEM },
|
|
120
|
+
{ role: 'user', content: userPrompt },
|
|
104
121
|
];
|
|
105
122
|
let response;
|
|
106
123
|
try {
|
|
107
|
-
response = await
|
|
124
|
+
response = await (0, llmUtil_1.chatWithTemp)(provider, messages, 0.0, signal);
|
|
108
125
|
}
|
|
109
126
|
catch (err) {
|
|
110
127
|
const msg = err instanceof Error ? err.message : String(err);
|
|
111
128
|
log?.error(`Diagnosis LLM call failed: ${msg}`);
|
|
112
|
-
return
|
|
113
|
-
edits: [],
|
|
114
|
-
summary: `Diagnosis failed: ${msg}`,
|
|
115
|
-
failureCount: failures.length,
|
|
116
|
-
};
|
|
117
|
-
}
|
|
118
|
-
const diagnosis = parseDiagnosisResponse(response, failures.length);
|
|
119
|
-
log?.info(`Diagnosis: ${diagnosis.edits.length} edit${diagnosis.edits.length === 1 ? '' : 's'} proposed`);
|
|
120
|
-
return diagnosis;
|
|
121
|
-
}
|
|
122
|
-
/**
|
|
123
|
-
* Apply diagnosis edits to AGENTS.md content.
|
|
124
|
-
*
|
|
125
|
-
* Uses the LLM to intelligently merge the proposed edits into the
|
|
126
|
-
* existing content, since edits reference sections by name (not line number).
|
|
127
|
-
*/
|
|
128
|
-
async function applyDiagnosisEdits(agentsContent, diagnosis, provider, log, signal) {
|
|
129
|
-
if (diagnosis.edits.length === 0) {
|
|
130
|
-
return { content: agentsContent, appliedEdits: [] };
|
|
131
|
-
}
|
|
132
|
-
if (signal?.aborted) {
|
|
133
|
-
return { content: agentsContent, appliedEdits: [] };
|
|
134
|
-
}
|
|
135
|
-
const editDescriptions = diagnosis.edits
|
|
136
|
-
.map((e, i) => `${i + 1}. [${e.action.toUpperCase()}] Section "${e.section}": ${e.content}`)
|
|
137
|
-
.join('\n');
|
|
138
|
-
const prompt = `Apply the following edits to the AGENTS.md instructions.
|
|
139
|
-
Each edit specifies a section, an action (add/modify/strengthen/remove), and content.
|
|
140
|
-
|
|
141
|
-
## Edits to Apply
|
|
142
|
-
${editDescriptions}
|
|
143
|
-
|
|
144
|
-
## Current AGENTS.md
|
|
145
|
-
${agentsContent}
|
|
146
|
-
|
|
147
|
-
## Rules
|
|
148
|
-
- Apply ALL edits.
|
|
149
|
-
- Keep the same overall structure unless an edit requires restructuring.
|
|
150
|
-
- For "add": insert the new rule in the appropriate section.
|
|
151
|
-
- For "modify": find and replace the relevant rule.
|
|
152
|
-
- For "strengthen": make the existing rule more specific/forceful.
|
|
153
|
-
- For "remove": delete the rule.
|
|
154
|
-
- AGENTS.md must remain fully self-contained — no references to external documents.
|
|
155
|
-
- Output ONLY the full updated AGENTS.md content (no explanations or fences).`;
|
|
156
|
-
const messages = [
|
|
157
|
-
{ role: 'user', content: prompt },
|
|
158
|
-
];
|
|
159
|
-
let response;
|
|
160
|
-
try {
|
|
161
|
-
response = await provider.chat(messages);
|
|
162
|
-
}
|
|
163
|
-
catch (err) {
|
|
164
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
165
|
-
log?.error(`Edit application failed: ${msg}`);
|
|
166
|
-
return { content: agentsContent, appliedEdits: [] };
|
|
129
|
+
return [];
|
|
167
130
|
}
|
|
168
|
-
const
|
|
169
|
-
|
|
170
|
-
|
|
131
|
+
const rawEdits = parseDiagnoseResponse(response);
|
|
132
|
+
const edits = rawEdits
|
|
133
|
+
.slice(0, 8)
|
|
134
|
+
.filter((e) => e.section && e.action)
|
|
135
|
+
.map((e) => ({
|
|
136
|
+
section: e.section,
|
|
137
|
+
action: (['add', 'modify', 'strengthen', 'remove'].includes(e.action)
|
|
138
|
+
? e.action
|
|
139
|
+
: 'add'),
|
|
140
|
+
content: e.content || '',
|
|
141
|
+
globs: e.globs,
|
|
142
|
+
description: e.description,
|
|
143
|
+
}));
|
|
144
|
+
const agentsEdits = edits.filter((e) => !e.section.startsWith('scoped:'));
|
|
145
|
+
const scopedEdits = edits.filter((e) => e.section.startsWith('scoped:'));
|
|
146
|
+
log?.info(`Diagnosis: ${agentsEdits.length} AGENTS.md edit${agentsEdits.length === 1 ? '' : 's'}, ${scopedEdits.length} scoped rule edit${scopedEdits.length === 1 ? '' : 's'}`);
|
|
147
|
+
return edits;
|
|
171
148
|
}
|
|
172
149
|
//# sourceMappingURL=diagnosis.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"diagnosis.js","sourceRoot":"","sources":["../src/diagnosis.ts"],"names":[],"mappings":";AAAA
|
|
1
|
+
{"version":3,"file":"diagnosis.js","sourceRoot":"","sources":["../src/diagnosis.ts"],"names":[],"mappings":";AAAA;;;;;;;GAOG;;AA4FH,sDAuBC;AAUD,4BAoDC;AAzKD,uCAAyC;AAEzC,+DAA+D;AAE/D,MAAM,eAAe,GAAG;;;;;;;;;;;;;;;;;;;;;8BAqBM,CAAC;AAE/B,SAAS,uBAAuB,CAC9B,QAAgB,EAChB,OAA4B,EAC5B,kBAA2B,EAC3B,kBAA2B;IAE3B,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACvC,MAAM,OAAO,GAAG,CAAC,CAAC,eAAe;aAC9B,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CACV,iBAAiB,EAAE,CAAC,QAAQ,kBAAkB,EAAE,CAAC,UAAU,gBAAgB,EAAE,CAAC,QAAQ,mBAAmB,EAAE,CAAC,WAAW,EAAE,CAC1H;aACA,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,MAAM,KAAK,GAAG,CAAC,CAAC,aAAa;aAC1B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,qBAAqB,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,OAAO,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC;aACtE,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,OAAO,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,KAAK,OAAO,kBAAkB,CAAC,CAAC,YAAY,KAAK,KAAK,EAAE,CAAC;IAC7F,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,IAAI,MAAM,GAAG;;EAEb,QAAQ;IACN,CAAC;IAEH,IAAI,kBAAkB,EAAE,CAAC;QACvB,MAAM,IAAI;;;;EAIZ,kBAAkB;IAChB,CAAC;IACH,CAAC;IAED,IAAI,kBAAkB,EAAE,CAAC;QACvB,MAAM,IAAI;;;EAGZ,kBAAkB,EAAE,CAAC;IACrB,CAAC;IAED,MAAM,IAAI;;;EAGV,WAAW;;8IAEiI,CAAC;IAE7I,OAAO,MAAM,CAAC;AAChB,CAAC;AAUD,SAAgB,qBAAqB,CAAC,GAAW;IAC/C,sBAAsB;IACtB,IAAI,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,2BAA2B,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAElE,oBAAoB;IACpB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,sBAAsB,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAExF,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;YAAE,OAAO,MAAmB,CAAC;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,yBAAyB;QACzB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC3C,IAAI,KAAK,EAAE,CAAC;YACV,IAAI,CAAC;gBACH,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAc,CAAC;YAC3C,CAAC;YAAC,MAAM,CAAC;gBACP,eAAe;YACjB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,+DAA+D;AAE/D;;;;;GAKG;AACI,KAAK,UAAU,QAAQ,CAAC,OAAyB;IACtD,MAAM,EAAE,aAAa,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,GAAG,OAAO,CAAC;IAEhH,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAC1C,IAAI,MAAM,EAAE,OAAO;QAAE,OAAO,EAAE,CAAC;IAE/B,mDAAmD;IACnD,MAAM,WAAW,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAC7C,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,UAAU,KAAK,QAAQ,CAAC,CAC3D,CAAC;IAEF,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,GAAG,EAAE,IAAI,CAAC,sDAAsD,CAAC,CAAC;QAClE,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,GAAG,EAAE,IAAI,CAAC,cAAc,WAAW,CAAC,MAAM,gBAAgB,WAAW,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,uBAAuB,CAAC,CAAC;IAEtH,MAAM,UAAU,GAAG,uBAAuB,CAAC,aAAa,EAAE,WAAW,EAAE,kBAAkB,EAAE,kBAAkB,CAAC,CAAC;IAC/G,MAAM,QAAQ,GAAkB;QAC9B,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,eAAe,EAAE;QAC5C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE;KACtC,CAAC;IAEF,IAAI,QAAgB,CAAC;IACrB,IAAI,CAAC;QACH,QAAQ,GAAG,MAAM,IAAA,sBAAY,EAAC,QAAQ,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;IACjE,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC7D,GAAG,EAAE,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;QAChD,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAEjD,MAAM,KAAK,GAAiB,QAAQ;SACjC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;SACX,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,MAAM,CAAC;SACpC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACX,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,MAAM,EAAE,CAAC,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC;YACnE,CAAC,CAAC,CAAC,CAAC,MAAM;YACV,CAAC,CAAC,KAAK,CAAyB;QAClC,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE;QACxB,KAAK,EAAG,CAAS,CAAC,KAAK;QACvB,WAAW,EAAG,CAAS,CAAC,WAAW;KACpC,CAAC,CAAC,CAAC;IAEN,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC;IAC1E,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC;IACzE,GAAG,EAAE,IAAI,CAAC,cAAc,WAAW,CAAC,MAAM,kBAAkB,WAAW,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,WAAW,CAAC,MAAM,oBAAoB,WAAW,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;IACjL,OAAO,KAAK,CAAC;AACf,CAAC"}
|
|
@@ -1,49 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @aspectcode/evaluator —
|
|
2
|
+
* @aspectcode/evaluator — Probe-and-refine tuning for AGENTS.md.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* 2. Prompt history harvesting from AI coding tools
|
|
7
|
-
* 3. Evidence-driven diagnosis and AGENTS.md improvement
|
|
8
|
-
*
|
|
9
|
-
* @example
|
|
10
|
-
* ```ts
|
|
11
|
-
* import { generateProbes, runProbes, diagnose, harvestPrompts } from '@aspectcode/evaluator';
|
|
12
|
-
*
|
|
13
|
-
* const harvested = await harvestPrompts({ root });
|
|
14
|
-
* const probes = generateProbes({ kb, harvestedPrompts: harvested });
|
|
15
|
-
* const results = await runProbes({ agentsContent, probes, provider });
|
|
16
|
-
* const failures = results.filter(r => !r.passed);
|
|
17
|
-
* if (failures.length > 0) {
|
|
18
|
-
* const diagnosis = await diagnose({ failures, agentsContent, provider });
|
|
19
|
-
* }
|
|
20
|
-
* ```
|
|
4
|
+
* Multi-iteration loop: LLM-generated probes, per-probe judging,
|
|
5
|
+
* aggregate diagnosis, and deterministic edit application.
|
|
21
6
|
*/
|
|
22
|
-
export type { Probe,
|
|
7
|
+
export type { Probe, SimulationResult, BehaviorReview, JudgedProbeResult, AgentsEdit, ProbeGeneratorOptions, ProbeRunnerOptions, JudgeOptions, DiagnosisOptions, ProbeRefineConfig, ProbeRefineResult, IterationSummary, ApplyResult, ProbeProgressCallback, LlmProvider, ChatOptions, OptLogger, } from './types';
|
|
8
|
+
export { DEFAULT_PROBE_REFINE_CONFIG } from './types';
|
|
23
9
|
export { generateProbes } from './probes';
|
|
24
10
|
export { runProbes } from './runner';
|
|
25
|
-
export
|
|
26
|
-
export { diagnose
|
|
27
|
-
export {
|
|
28
|
-
import type { EvaluationResult, ProbeRunnerOptions, DiagnosisOptions, ProbeGeneratorOptions } from './types';
|
|
29
|
-
/**
|
|
30
|
-
* Run the full evaluation pipeline: generate probes → run them → diagnose failures.
|
|
31
|
-
*
|
|
32
|
-
* This is a convenience function combining the individual steps.
|
|
33
|
-
* For more control, use the individual functions directly.
|
|
34
|
-
*/
|
|
35
|
-
export declare function evaluate(options: {
|
|
36
|
-
/** Probe generation options. */
|
|
37
|
-
probeOptions: ProbeGeneratorOptions;
|
|
38
|
-
/** Current AGENTS.md content. */
|
|
39
|
-
agentsContent: string;
|
|
40
|
-
/** LLM provider for probe execution and diagnosis. */
|
|
41
|
-
provider: ProbeRunnerOptions['provider'];
|
|
42
|
-
/** File contents for context. */
|
|
43
|
-
fileContents?: ReadonlyMap<string, string>;
|
|
44
|
-
/** Logger. */
|
|
45
|
-
log?: DiagnosisOptions['log'];
|
|
46
|
-
/** Abort signal. */
|
|
47
|
-
signal?: AbortSignal;
|
|
48
|
-
}): Promise<EvaluationResult>;
|
|
11
|
+
export { judgeProbe } from './judge';
|
|
12
|
+
export { diagnose } from './diagnosis';
|
|
13
|
+
export { applyEdits, applyEditsWithLlm, AGENTS_MD_CHAR_BUDGET } from './apply';
|
|
49
14
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,YAAY,EACV,KAAK,EACL,gBAAgB,EAChB,cAAc,EACd,iBAAiB,EACjB,UAAU,EACV,qBAAqB,EACrB,kBAAkB,EAClB,YAAY,EACZ,gBAAgB,EAChB,iBAAiB,EACjB,iBAAiB,EACjB,gBAAgB,EAChB,WAAW,EACX,qBAAqB,EACrB,WAAW,EACX,WAAW,EACX,SAAS,GACV,MAAM,SAAS,CAAC;AAEjB,OAAO,EAAE,2BAA2B,EAAE,MAAM,SAAS,CAAC;AAGtD,OAAO,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAG1C,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAGrC,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAGrC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,UAAU,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,SAAS,CAAC"}
|
|
@@ -1,75 +1,29 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
/**
|
|
3
|
-
* @aspectcode/evaluator —
|
|
3
|
+
* @aspectcode/evaluator — Probe-and-refine tuning for AGENTS.md.
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* 2. Prompt history harvesting from AI coding tools
|
|
8
|
-
* 3. Evidence-driven diagnosis and AGENTS.md improvement
|
|
9
|
-
*
|
|
10
|
-
* @example
|
|
11
|
-
* ```ts
|
|
12
|
-
* import { generateProbes, runProbes, diagnose, harvestPrompts } from '@aspectcode/evaluator';
|
|
13
|
-
*
|
|
14
|
-
* const harvested = await harvestPrompts({ root });
|
|
15
|
-
* const probes = generateProbes({ kb, harvestedPrompts: harvested });
|
|
16
|
-
* const results = await runProbes({ agentsContent, probes, provider });
|
|
17
|
-
* const failures = results.filter(r => !r.passed);
|
|
18
|
-
* if (failures.length > 0) {
|
|
19
|
-
* const diagnosis = await diagnose({ failures, agentsContent, provider });
|
|
20
|
-
* }
|
|
21
|
-
* ```
|
|
5
|
+
* Multi-iteration loop: LLM-generated probes, per-probe judging,
|
|
6
|
+
* aggregate diagnosis, and deterministic edit application.
|
|
22
7
|
*/
|
|
23
8
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
24
|
-
exports.
|
|
25
|
-
|
|
9
|
+
exports.AGENTS_MD_CHAR_BUDGET = exports.applyEditsWithLlm = exports.applyEdits = exports.diagnose = exports.judgeProbe = exports.runProbes = exports.generateProbes = exports.DEFAULT_PROBE_REFINE_CONFIG = void 0;
|
|
10
|
+
var types_1 = require("./types");
|
|
11
|
+
Object.defineProperty(exports, "DEFAULT_PROBE_REFINE_CONFIG", { enumerable: true, get: function () { return types_1.DEFAULT_PROBE_REFINE_CONFIG; } });
|
|
26
12
|
// ── Probe generation ────────────────────────────────────────
|
|
27
13
|
var probes_1 = require("./probes");
|
|
28
14
|
Object.defineProperty(exports, "generateProbes", { enumerable: true, get: function () { return probes_1.generateProbes; } });
|
|
29
15
|
// ── Probe execution ─────────────────────────────────────────
|
|
30
16
|
var runner_1 = require("./runner");
|
|
31
17
|
Object.defineProperty(exports, "runProbes", { enumerable: true, get: function () { return runner_1.runProbes; } });
|
|
32
|
-
// ──
|
|
18
|
+
// ── Per-probe judging ───────────────────────────────────────
|
|
19
|
+
var judge_1 = require("./judge");
|
|
20
|
+
Object.defineProperty(exports, "judgeProbe", { enumerable: true, get: function () { return judge_1.judgeProbe; } });
|
|
21
|
+
// ── Aggregate diagnosis ─────────────────────────────────────
|
|
33
22
|
var diagnosis_1 = require("./diagnosis");
|
|
34
23
|
Object.defineProperty(exports, "diagnose", { enumerable: true, get: function () { return diagnosis_1.diagnose; } });
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
Object.defineProperty(exports, "
|
|
39
|
-
Object.defineProperty(exports, "
|
|
40
|
-
Object.defineProperty(exports, "harvestClaudeCode", { enumerable: true, get: function () { return index_1.harvestClaudeCode; } });
|
|
41
|
-
Object.defineProperty(exports, "harvestCline", { enumerable: true, get: function () { return index_1.harvestCline; } });
|
|
42
|
-
Object.defineProperty(exports, "harvestCopilotChat", { enumerable: true, get: function () { return index_1.harvestCopilotChat; } });
|
|
43
|
-
Object.defineProperty(exports, "harvestCursor", { enumerable: true, get: function () { return index_1.harvestCursor; } });
|
|
44
|
-
Object.defineProperty(exports, "harvestWindsurf", { enumerable: true, get: function () { return index_1.harvestWindsurf; } });
|
|
45
|
-
Object.defineProperty(exports, "harvestExport", { enumerable: true, get: function () { return index_1.harvestExport; } });
|
|
46
|
-
const probes_2 = require("./probes");
|
|
47
|
-
const runner_2 = require("./runner");
|
|
48
|
-
const diagnosis_2 = require("./diagnosis");
|
|
49
|
-
/**
|
|
50
|
-
* Run the full evaluation pipeline: generate probes → run them → diagnose failures.
|
|
51
|
-
*
|
|
52
|
-
* This is a convenience function combining the individual steps.
|
|
53
|
-
* For more control, use the individual functions directly.
|
|
54
|
-
*/
|
|
55
|
-
async function evaluate(options) {
|
|
56
|
-
const { probeOptions, agentsContent, provider, fileContents, log, signal } = options;
|
|
57
|
-
// Step 1: Generate probes
|
|
58
|
-
const probes = (0, probes_2.generateProbes)(probeOptions);
|
|
59
|
-
// Step 2: Run probes
|
|
60
|
-
const probeResults = await (0, runner_2.runProbes)(agentsContent, probes, provider, fileContents, log, signal);
|
|
61
|
-
const failures = probeResults.filter((r) => !r.passed);
|
|
62
|
-
// Step 3: Diagnose if there are failures
|
|
63
|
-
let diagnosis;
|
|
64
|
-
if (failures.length > 0) {
|
|
65
|
-
diagnosis = await (0, diagnosis_2.diagnose)(failures, agentsContent, provider, log, signal);
|
|
66
|
-
}
|
|
67
|
-
return {
|
|
68
|
-
probeResults,
|
|
69
|
-
diagnosis,
|
|
70
|
-
passCount: probeResults.length - failures.length,
|
|
71
|
-
failCount: failures.length,
|
|
72
|
-
totalProbes: probeResults.length,
|
|
73
|
-
};
|
|
74
|
-
}
|
|
24
|
+
// ── Edit application ───────────────────────────────────────
|
|
25
|
+
var apply_1 = require("./apply");
|
|
26
|
+
Object.defineProperty(exports, "applyEdits", { enumerable: true, get: function () { return apply_1.applyEdits; } });
|
|
27
|
+
Object.defineProperty(exports, "applyEditsWithLlm", { enumerable: true, get: function () { return apply_1.applyEditsWithLlm; } });
|
|
28
|
+
Object.defineProperty(exports, "AGENTS_MD_CHAR_BUDGET", { enumerable: true, get: function () { return apply_1.AGENTS_MD_CHAR_BUDGET; } });
|
|
75
29
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;AAuBH,iCAAsD;AAA7C,oHAAA,2BAA2B,OAAA;AAEpC,+DAA+D;AAC/D,mCAA0C;AAAjC,wGAAA,cAAc,OAAA;AAEvB,+DAA+D;AAC/D,mCAAqC;AAA5B,mGAAA,SAAS,OAAA;AAElB,+DAA+D;AAC/D,iCAAqC;AAA5B,mGAAA,UAAU,OAAA;AAEnB,+DAA+D;AAC/D,yCAAuC;AAA9B,qGAAA,QAAQ,OAAA;AAEjB,8DAA8D;AAC9D,iCAA+E;AAAtE,mGAAA,UAAU,OAAA;AAAE,0GAAA,iBAAiB,OAAA;AAAE,8GAAA,qBAAqB,OAAA"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-probe judge — evaluates AI responses with strong/partial/missing assessments.
|
|
3
|
+
*
|
|
4
|
+
* For each probe, the judge reviews the simulated response against expected
|
|
5
|
+
* behaviours and proposes targeted AGENTS.md edits.
|
|
6
|
+
*
|
|
7
|
+
* Ported from sweagent_bench oracle/judge.py.
|
|
8
|
+
*/
|
|
9
|
+
import type { JudgedProbeResult, JudgeOptions } from './types';
|
|
10
|
+
export interface JudgeResponse {
|
|
11
|
+
behavior_reviews: Array<{
|
|
12
|
+
behavior: string;
|
|
13
|
+
assessment: string;
|
|
14
|
+
evidence: string;
|
|
15
|
+
improvement: string;
|
|
16
|
+
}>;
|
|
17
|
+
proposed_edits: Array<{
|
|
18
|
+
section: string;
|
|
19
|
+
action: string;
|
|
20
|
+
content: string;
|
|
21
|
+
}>;
|
|
22
|
+
overall_notes: string;
|
|
23
|
+
}
|
|
24
|
+
export declare function parseJudgeResponse(raw: string): JudgeResponse | null;
|
|
25
|
+
/**
|
|
26
|
+
* Judge a single probe's response against expected behaviours.
|
|
27
|
+
*
|
|
28
|
+
* Returns structured assessments (strong/partial/missing) and
|
|
29
|
+
* up to 3 proposed AGENTS.md edits.
|
|
30
|
+
*/
|
|
31
|
+
export declare function judgeProbe(options: JudgeOptions): Promise<JudgedProbeResult>;
|
|
32
|
+
//# sourceMappingURL=judge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"judge.d.ts","sourceRoot":"","sources":["../src/judge.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,KAAK,EACV,iBAAiB,EAGjB,YAAY,EACb,MAAM,SAAS,CAAC;AAgDjB,MAAM,WAAW,aAAa;IAC5B,gBAAgB,EAAE,KAAK,CAAC;QACtB,QAAQ,EAAE,MAAM,CAAC;QACjB,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,WAAW,EAAE,MAAM,CAAC;KACrB,CAAC,CAAC;IACH,cAAc,EAAE,KAAK,CAAC;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC,CAAC;IACH,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAqBpE;AAID;;;;;GAKG;AACH,wBAAsB,UAAU,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,iBAAiB,CAAC,CA6FlF"}
|