aspectcode 0.4.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/agentsMdRenderer.d.ts +16 -0
- package/dist/agentsMdRenderer.d.ts.map +1 -0
- package/dist/agentsMdRenderer.js +137 -0
- package/dist/agentsMdRenderer.js.map +1 -0
- package/dist/auth.d.ts +31 -0
- package/dist/auth.d.ts.map +1 -0
- package/dist/auth.js +386 -0
- package/dist/auth.js.map +1 -0
- package/dist/autoResolve.d.ts +41 -0
- package/dist/autoResolve.d.ts.map +1 -0
- package/dist/autoResolve.js +196 -0
- package/dist/autoResolve.js.map +1 -0
- package/dist/changeEvaluator.d.ts +56 -0
- package/dist/changeEvaluator.d.ts.map +1 -0
- package/dist/changeEvaluator.js +674 -0
- package/dist/changeEvaluator.js.map +1 -0
- package/dist/cli.d.ts +3 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +1 -1
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +37 -17
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +50 -2
- package/dist/config.js.map +1 -1
- package/dist/dreamCycle.d.ts +57 -0
- package/dist/dreamCycle.d.ts.map +1 -0
- package/dist/dreamCycle.js +334 -0
- package/dist/dreamCycle.js.map +1 -0
- package/dist/kbBuilder.d.ts +1 -2
- package/dist/kbBuilder.d.ts.map +1 -1
- package/dist/kbBuilder.js +1 -2
- package/dist/kbBuilder.js.map +1 -1
- package/dist/main.d.ts +2 -1
- package/dist/main.d.ts.map +1 -1
- package/dist/main.js +148 -7
- package/dist/main.js.map +1 -1
- package/dist/optimize.d.ts +13 -6
- package/dist/optimize.d.ts.map +1 -1
- package/dist/optimize.js +433 -142
- package/dist/optimize.js.map +1 -1
- package/dist/pipeline.d.ts +19 -21
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +1093 -160
- package/dist/pipeline.js.map +1 -1
- package/dist/preferences.d.ts +80 -0
- package/dist/preferences.d.ts.map +1 -0
- package/dist/preferences.js +238 -0
- package/dist/preferences.js.map +1 -0
- package/dist/runtimeState.d.ts +30 -0
- package/dist/runtimeState.d.ts.map +1 -0
- package/dist/runtimeState.js +39 -0
- package/dist/runtimeState.js.map +1 -0
- package/dist/scopedRules.d.ts +84 -0
- package/dist/scopedRules.d.ts.map +1 -0
- package/dist/scopedRules.js +449 -0
- package/dist/scopedRules.js.map +1 -0
- package/dist/ui/Dashboard.d.ts +4 -16
- package/dist/ui/Dashboard.d.ts.map +1 -1
- package/dist/ui/Dashboard.js +339 -140
- package/dist/ui/Dashboard.js.map +1 -1
- package/dist/ui/MemoryMap.d.ts +16 -0
- package/dist/ui/MemoryMap.d.ts.map +1 -0
- package/dist/ui/MemoryMap.js +266 -0
- package/dist/ui/MemoryMap.js.map +1 -0
- package/dist/ui/SettingsPanel.d.ts +18 -0
- package/dist/ui/SettingsPanel.d.ts.map +1 -0
- package/dist/ui/SettingsPanel.js +241 -0
- package/dist/ui/SettingsPanel.js.map +1 -0
- package/dist/ui/prompts.d.ts +7 -0
- package/dist/ui/prompts.d.ts.map +1 -1
- package/dist/ui/prompts.js +63 -0
- package/dist/ui/prompts.js.map +1 -1
- package/dist/ui/store.d.ts +154 -18
- package/dist/ui/store.d.ts.map +1 -1
- package/dist/ui/store.js +154 -24
- package/dist/ui/store.js.map +1 -1
- package/dist/ui/theme.d.ts +1 -8
- package/dist/ui/theme.d.ts.map +1 -1
- package/dist/ui/theme.js +2 -21
- package/dist/ui/theme.js.map +1 -1
- package/dist/updateChecker.d.ts +13 -0
- package/dist/updateChecker.d.ts.map +1 -0
- package/dist/updateChecker.js +66 -0
- package/dist/updateChecker.js.map +1 -0
- package/dist/usageTracker.d.ts +12 -0
- package/dist/usageTracker.d.ts.map +1 -0
- package/dist/usageTracker.js +89 -0
- package/dist/usageTracker.js.map +1 -0
- package/dist/writer.d.ts +1 -7
- package/dist/writer.d.ts.map +1 -1
- package/dist/writer.js +1 -11
- package/dist/writer.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/analysis/repo.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/analysis/repo.js +13 -2
- package/node_modules/@aspectcode/core/dist/analysis/repo.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/index.d.ts +1 -3
- package/node_modules/@aspectcode/core/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/index.js +1 -3
- package/node_modules/@aspectcode/core/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.d.ts +14 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.d.ts.map +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.js +191 -0
- package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.js.map +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/index.d.ts +1 -0
- package/node_modules/@aspectcode/core/dist/parsers/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/index.js +6 -1
- package/node_modules/@aspectcode/core/dist/parsers/index.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/languages.d.ts +20 -0
- package/node_modules/@aspectcode/core/dist/parsers/languages.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/languages.js +25 -0
- package/node_modules/@aspectcode/core/dist/parsers/languages.js.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.d.ts.map +1 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.js +4 -1
- package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.js.map +1 -1
- package/node_modules/@aspectcode/core/package.json +2 -2
- package/node_modules/@aspectcode/core/parsers/cpp.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/go.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/php.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/ruby.wasm +0 -0
- package/node_modules/@aspectcode/core/parsers/rust.wasm +0 -0
- package/node_modules/@aspectcode/emitters/dist/index.d.ts +1 -17
- package/node_modules/@aspectcode/emitters/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/index.js +2 -90
- package/node_modules/@aspectcode/emitters/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/index.d.ts +0 -2
- package/node_modules/@aspectcode/emitters/dist/instructions/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/index.js +1 -7
- package/node_modules/@aspectcode/emitters/dist/instructions/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.d.ts +0 -18
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.js +0 -57
- package/node_modules/@aspectcode/emitters/dist/kb/analyzers.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.d.ts +0 -18
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.js +0 -130
- package/node_modules/@aspectcode/emitters/dist/kb/conventions.js.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/index.d.ts +2 -4
- package/node_modules/@aspectcode/emitters/dist/kb/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/emitters/dist/kb/index.js +1 -11
- package/node_modules/@aspectcode/emitters/dist/kb/index.js.map +1 -1
- package/node_modules/@aspectcode/emitters/package.json +3 -3
- package/node_modules/@aspectcode/evaluator/dist/apply.d.ts +55 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.js +368 -0
- package/node_modules/@aspectcode/evaluator/dist/apply.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.d.ts +16 -25
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.js +115 -138
- package/node_modules/@aspectcode/evaluator/dist/diagnosis.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/index.d.ts +8 -43
- package/node_modules/@aspectcode/evaluator/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/index.js +15 -61
- package/node_modules/@aspectcode/evaluator/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/judge.d.ts +32 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.js +165 -0
- package/node_modules/@aspectcode/evaluator/dist/judge.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.d.ts +15 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.d.ts.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.js +41 -0
- package/node_modules/@aspectcode/evaluator/dist/llmUtil.js.map +1 -0
- package/node_modules/@aspectcode/evaluator/dist/probes.d.ts +20 -47
- package/node_modules/@aspectcode/evaluator/dist/probes.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/probes.js +188 -278
- package/node_modules/@aspectcode/evaluator/dist/probes.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/runner.d.ts +7 -32
- package/node_modules/@aspectcode/evaluator/dist/runner.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/runner.js +21 -146
- package/node_modules/@aspectcode/evaluator/dist/runner.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/types.d.ts +141 -99
- package/node_modules/@aspectcode/evaluator/dist/types.d.ts.map +1 -1
- package/node_modules/@aspectcode/evaluator/dist/types.js +10 -2
- package/node_modules/@aspectcode/evaluator/dist/types.js.map +1 -1
- package/node_modules/@aspectcode/evaluator/package.json +4 -4
- package/node_modules/@aspectcode/optimizer/dist/index.d.ts +3 -10
- package/node_modules/@aspectcode/optimizer/dist/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/index.js +1 -19
- package/node_modules/@aspectcode/optimizer/dist/index.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.js +40 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.d.ts +9 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.d.ts.map +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.js +83 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.js.map +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/index.d.ts +4 -3
- package/node_modules/@aspectcode/optimizer/dist/providers/index.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/index.js +24 -10
- package/node_modules/@aspectcode/optimizer/dist/providers/index.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.js +22 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/openai.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.d.ts +14 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.js +1 -0
- package/node_modules/@aspectcode/optimizer/dist/providers/retry.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/types.d.ts +14 -0
- package/node_modules/@aspectcode/optimizer/dist/types.d.ts.map +1 -1
- package/node_modules/@aspectcode/optimizer/dist/types.js.map +1 -1
- package/node_modules/@aspectcode/optimizer/package.json +2 -2
- package/node_modules/web-tree-sitter/LICENSE +21 -0
- package/node_modules/web-tree-sitter/README.md +198 -0
- package/node_modules/web-tree-sitter/package.json +36 -0
- package/node_modules/web-tree-sitter/tree-sitter-web.d.ts +204 -0
- package/node_modules/web-tree-sitter/tree-sitter.js +1 -0
- package/node_modules/web-tree-sitter/tree-sitter.wasm +0 -0
- package/package.json +8 -8
- package/dist/complaintProcessor.d.ts +0 -16
- package/dist/complaintProcessor.d.ts.map +0 -1
- package/dist/complaintProcessor.js +0 -134
- package/dist/complaintProcessor.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/emitter.d.ts +0 -72
- package/node_modules/@aspectcode/emitters/dist/emitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/emitter.js +0 -10
- package/node_modules/@aspectcode/emitters/dist/emitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/content.d.ts +0 -26
- package/node_modules/@aspectcode/emitters/dist/instructions/content.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/content.js +0 -501
- package/node_modules/@aspectcode/emitters/dist/instructions/content.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.d.ts +0 -13
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.js +0 -55
- package/node_modules/@aspectcode/emitters/dist/instructions/detection.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.d.ts +0 -9
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.js +0 -30
- package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.d.ts +0 -21
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.js +0 -125
- package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/manifest.d.ts +0 -37
- package/node_modules/@aspectcode/emitters/dist/manifest.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/manifest.js +0 -50
- package/node_modules/@aspectcode/emitters/dist/manifest.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/report.d.ts +0 -22
- package/node_modules/@aspectcode/emitters/dist/report.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/report.js +0 -3
- package/node_modules/@aspectcode/emitters/dist/report.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/stableJson.d.ts +0 -14
- package/node_modules/@aspectcode/emitters/dist/stableJson.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/stableJson.js +0 -40
- package/node_modules/@aspectcode/emitters/dist/stableJson.js.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/transaction.d.ts +0 -29
- package/node_modules/@aspectcode/emitters/dist/transaction.d.ts.map +0 -1
- package/node_modules/@aspectcode/emitters/dist/transaction.js +0 -104
- package/node_modules/@aspectcode/emitters/dist/transaction.js.map +0 -1
|
@@ -1,303 +1,213 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
/**
|
|
3
|
-
*
|
|
3
|
+
* LLM-powered probe generator — creates synthetic bug-fix tasks.
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
5
|
+
* Each probe is a realistic coding-assistant request with expected
|
|
6
|
+
* behaviours. Probes are generated via LLM at temperature 0.9 for
|
|
7
|
+
* diversity, with deduplication across iterations and a fallback
|
|
8
|
+
* pool of hardcoded templates.
|
|
9
9
|
*
|
|
10
|
-
*
|
|
11
|
-
* to an LLM with AGENTS.md as context and evaluating the response.
|
|
10
|
+
* Ported from sweagent_bench oracle/probes.py.
|
|
12
11
|
*/
|
|
13
12
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
13
|
+
exports.normalizeProbeText = normalizeProbeText;
|
|
14
|
+
exports.isDuplicate = isDuplicate;
|
|
15
|
+
exports.parseProbeResponse = parseProbeResponse;
|
|
14
16
|
exports.generateProbes = generateProbes;
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
return sepIdx > 0 ? kb.slice(start, sepIdx).trim() : kb.slice(start).trim();
|
|
30
|
-
}
|
|
31
|
-
/**
|
|
32
|
-
* Parse "High-Risk Architectural Hubs" table rows.
|
|
33
|
-
*
|
|
34
|
-
* Handles two formats:
|
|
35
|
-
* - Legacy 3-col: | File | In | Out |
|
|
36
|
-
* - Emitter 5-col: | Rank | File | Imports | Imported By | Risk |
|
|
37
|
-
*/
|
|
38
|
-
function parseHubs(architecture) {
|
|
39
|
-
const hubs = [];
|
|
40
|
-
const section = extractSubSection(architecture, 'High-Risk Architectural Hubs');
|
|
41
|
-
if (!section)
|
|
42
|
-
return hubs;
|
|
43
|
-
// Try 5-col emitter format first: | Rank | `file` | Imports | Imported By | Risk |
|
|
44
|
-
const fiveCol = /\|\s*\d+\s*\|\s*`([^`]+)`\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|/g;
|
|
45
|
-
let match;
|
|
46
|
-
while ((match = fiveCol.exec(section)) !== null) {
|
|
47
|
-
hubs.push({
|
|
48
|
-
file: match[1].trim(),
|
|
49
|
-
outDegree: parseInt(match[2], 10), // "Imports"
|
|
50
|
-
inDegree: parseInt(match[3], 10), // "Imported By"
|
|
51
|
-
});
|
|
52
|
-
}
|
|
53
|
-
if (hubs.length > 0)
|
|
54
|
-
return hubs;
|
|
55
|
-
// Fallback: 3-col legacy format | File | In | Out |
|
|
56
|
-
const threeCol = /\|\s*`?([^`|]+?)`?\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|/g;
|
|
57
|
-
while ((match = threeCol.exec(section)) !== null) {
|
|
58
|
-
hubs.push({
|
|
59
|
-
file: match[1].trim(),
|
|
60
|
-
inDegree: parseInt(match[2], 10),
|
|
61
|
-
outDegree: parseInt(match[3], 10),
|
|
62
|
-
});
|
|
17
|
+
const llmUtil_1 = require("./llmUtil");
|
|
18
|
+
// ── Constants ───────────────────────────────────────────────
|
|
19
|
+
const MAX_KB_CHARS = 12000;
|
|
20
|
+
const MAX_TOPUP_ATTEMPTS = 3;
|
|
21
|
+
// ── Prompts ─────────────────────────────────────────────────
|
|
22
|
+
function buildProbeSystemPrompt(maxProbes) {
|
|
23
|
+
return `Generate probe tasks to evaluate whether AGENTS.md improves repo self-exploration.
|
|
24
|
+
|
|
25
|
+
Return ONLY a JSON array of probe objects with this shape:
|
|
26
|
+
[
|
|
27
|
+
{
|
|
28
|
+
"task": "short realistic coding-assistant user request",
|
|
29
|
+
"expected_behaviors": ["behavior 1", "behavior 2"],
|
|
30
|
+
"rationale": "why this probe is useful"
|
|
63
31
|
}
|
|
64
|
-
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
Rules:
|
|
35
|
+
- Return exactly ${maxProbes} probes, diverse across bug-fix and test-failure tasks.
|
|
36
|
+
- Tasks must be concrete coding requests, not AGENTS/meta questions.
|
|
37
|
+
- Each probe must include 2-4 expected behaviors.
|
|
38
|
+
- Expected behaviors must emphasize: evidence-first localization, dependency tracing, minimal scoped edits, and targeted validation.
|
|
39
|
+
- Every task must be patchable and executable by a tool-using coding runner (inspect files, run commands, propose code diff).
|
|
40
|
+
- Avoid pure advisory/navigation-only tasks that do not naturally end in a code diff.
|
|
41
|
+
- Avoid duplicates with prior tasks.
|
|
42
|
+
- Exactly ${maxProbes} probes.`;
|
|
65
43
|
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
if (file && !file.includes('---') && kind !== 'Kind') {
|
|
92
|
-
entries.push({ file, kind });
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
return entries;
|
|
44
|
+
function buildProbeUserPrompt(projectName, agentsMd, kbText, priorTasks) {
|
|
45
|
+
const priorSection = priorTasks.length > 0
|
|
46
|
+
? priorTasks.map((t, i) => `${i + 1}. ${t}`).join('\n')
|
|
47
|
+
: '(none)';
|
|
48
|
+
return `Project: ${projectName}
|
|
49
|
+
|
|
50
|
+
CURRENT AGENTS.MD:
|
|
51
|
+
---
|
|
52
|
+
${agentsMd}
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
REPO KB SNIPPET:
|
|
56
|
+
---
|
|
57
|
+
${kbText}
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
PRIOR PROBE TASKS (avoid duplicates):
|
|
61
|
+
${priorSection}
|
|
62
|
+
|
|
63
|
+
Generate probes now.
|
|
64
|
+
|
|
65
|
+
Additional requirements for this batch:
|
|
66
|
+
- Include realistic technical context (module/function/test hints) when possible.
|
|
67
|
+
- Prefer scoped fixes; avoid broad refactors.
|
|
68
|
+
- Phrase each task as a request to fix a concrete failing behavior or test regression.`;
|
|
96
69
|
}
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
function
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
const boldRegex = /\*\*Use:\*\*\s*(.+)/g;
|
|
128
|
-
while ((match = boldRegex.exec(section)) !== null) {
|
|
129
|
-
conventions.push(match[1].trim());
|
|
130
|
-
}
|
|
131
|
-
return conventions.filter(Boolean);
|
|
70
|
+
// ── Fallback probe pool ─────────────────────────────────────
|
|
71
|
+
const FALLBACK_BEHAVIORS = [
|
|
72
|
+
'Localizes likely files/functions before editing',
|
|
73
|
+
'Applies a minimal scoped code change',
|
|
74
|
+
'Runs targeted validation relevant to the change',
|
|
75
|
+
];
|
|
76
|
+
const FALLBACK_PROBES = [
|
|
77
|
+
{ task: 'A recent commit introduced a regression in a high-traffic module. A core function now returns incorrect results for edge-case inputs. Fix it.', rationale: 'Tests regression diagnosis in a core path' },
|
|
78
|
+
{ task: 'A test suite for the main entry point is failing after a dependency update. The test expects old behavior. Update the test or the code to restore correctness.', rationale: 'Tests adaptation to dependency changes' },
|
|
79
|
+
{ task: 'An edge case in input validation causes a crash when empty strings are passed. Add proper validation and a test.', rationale: 'Tests defensive coding and validation' },
|
|
80
|
+
{ task: 'A serialization function drops fields when the input contains nested objects with optional keys. Fix the serialization logic.', rationale: 'Tests careful data handling' },
|
|
81
|
+
{ task: 'A caching layer returns stale data after a config change. The cache invalidation logic does not account for the new config key. Fix it.', rationale: 'Tests understanding of caching dependencies' },
|
|
82
|
+
{ task: 'A version compatibility check is too strict and rejects valid inputs from the latest release. Relax the check while maintaining safety.', rationale: 'Tests careful constraint relaxation' },
|
|
83
|
+
{ task: 'A lifecycle hook fires in the wrong order during initialization, causing a null reference. Fix the ordering.', rationale: 'Tests understanding of initialization order' },
|
|
84
|
+
{ task: 'A deduplication filter is too aggressive and removes valid entries that share a partial key. Fix the matching logic.', rationale: 'Tests precise filtering logic' },
|
|
85
|
+
{ task: 'An API endpoint returns a 500 error when called with a valid but uncommon parameter combination. Trace and fix the handler.', rationale: 'Tests end-to-end debugging' },
|
|
86
|
+
{ task: 'A migration script fails silently when the target schema already exists. Add proper detection and error handling.', rationale: 'Tests robustness in data operations' },
|
|
87
|
+
{ task: 'A search function returns duplicate results when the query matches items in multiple indexes. Fix the result merging.', rationale: 'Tests data aggregation correctness' },
|
|
88
|
+
{ task: 'An event handler leaks resources because it registers listeners but never removes them. Fix the cleanup.', rationale: 'Tests resource management' },
|
|
89
|
+
{ task: 'A formatting function produces incorrect output for locale-specific inputs. Fix the locale handling.', rationale: 'Tests internationalization awareness' },
|
|
90
|
+
{ task: 'A retry mechanism enters an infinite loop when the error type changes between attempts. Add proper loop termination.', rationale: 'Tests error handling robustness' },
|
|
91
|
+
{ task: 'A permissions check incorrectly grants access when multiple roles overlap. Fix the authorization logic.', rationale: 'Tests security-sensitive logic' },
|
|
92
|
+
];
|
|
93
|
+
// ── Deduplication ───────────────────────────────────────────
|
|
94
|
+
function normalizeProbeText(text) {
|
|
95
|
+
return text
|
|
96
|
+
.toLowerCase()
|
|
97
|
+
.replace(/[^a-z0-9\s]/g, '')
|
|
98
|
+
.replace(/\s+/g, ' ')
|
|
99
|
+
.trim();
|
|
132
100
|
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
*/
|
|
140
|
-
function extractSubSection(section, heading) {
|
|
141
|
-
const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
142
|
-
// Allow optional emoji / symbol chars between the # and the heading text
|
|
143
|
-
const regex = new RegExp(`(#{1,6})\\s*(?:[^\\w\\s]*\\s*)?${escaped}`, 'i');
|
|
144
|
-
const match = regex.exec(section);
|
|
145
|
-
if (!match)
|
|
146
|
-
return '';
|
|
147
|
-
const headingLevel = match[1].length; // number of # chars
|
|
148
|
-
const start = match.index + match[0].length;
|
|
149
|
-
// Find next heading of same or higher (shallower) level
|
|
150
|
-
const rest = section.slice(start);
|
|
151
|
-
const endPattern = new RegExp(`\\n#{1,${headingLevel}}\\s`);
|
|
152
|
-
const nextHeading = rest.search(endPattern);
|
|
153
|
-
return nextHeading > 0 ? rest.slice(0, nextHeading).trim() : rest.trim();
|
|
101
|
+
function isDuplicate(task, existing) {
|
|
102
|
+
const normalized = normalizeProbeText(task);
|
|
103
|
+
return existing.some((t) => {
|
|
104
|
+
const n = normalizeProbeText(t);
|
|
105
|
+
return n === normalized || n.includes(normalized) || normalized.includes(n);
|
|
106
|
+
});
|
|
154
107
|
}
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
108
|
+
function parseProbeResponse(raw) {
|
|
109
|
+
// Strip thinking tags
|
|
110
|
+
let cleaned = raw.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
111
|
+
// Strip code fences
|
|
112
|
+
cleaned = cleaned.replace(/^```(?:json)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
|
|
113
|
+
try {
|
|
114
|
+
const parsed = JSON.parse(cleaned);
|
|
115
|
+
if (Array.isArray(parsed))
|
|
116
|
+
return parsed;
|
|
162
117
|
}
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
118
|
+
catch {
|
|
119
|
+
// Try to find JSON array
|
|
120
|
+
const match = cleaned.match(/\[[\s\S]*\]/);
|
|
121
|
+
if (match) {
|
|
122
|
+
try {
|
|
123
|
+
return JSON.parse(match[0]);
|
|
124
|
+
}
|
|
125
|
+
catch {
|
|
126
|
+
// fall through
|
|
127
|
+
}
|
|
128
|
+
}
|
|
167
129
|
}
|
|
168
|
-
return [
|
|
169
|
-
}
|
|
170
|
-
// ── Probe generators by category ────────────────────────────
|
|
171
|
-
function generateHubProbes(hubs) {
|
|
172
|
-
return hubs.slice(0, 5).map((hub) => ({
|
|
173
|
-
id: `hub-safety-${sanitizeId(hub.file)}`,
|
|
174
|
-
description: `Test that the AI handles ${hub.file} carefully (${hub.inDegree} dependents)`,
|
|
175
|
-
category: 'hub-safety',
|
|
176
|
-
contextFiles: [hub.file],
|
|
177
|
-
task: `I need to modify \`${hub.file}\` to add a new exported function. This file has ${hub.inDegree} files that depend on it. Write the code change and explain what else needs updating.`,
|
|
178
|
-
expectedBehaviors: [
|
|
179
|
-
`Acknowledges that ${hub.file} is a high-risk hub with many dependents`,
|
|
180
|
-
'Warns about or checks for breaking changes to existing exports',
|
|
181
|
-
'Suggests verifying or updating downstream consumers',
|
|
182
|
-
'Makes a minimal, backwards-compatible change',
|
|
183
|
-
],
|
|
184
|
-
}));
|
|
185
|
-
}
|
|
186
|
-
function generateEntryPointProbes(entries) {
|
|
187
|
-
return entries.slice(0, 3).map((entry) => ({
|
|
188
|
-
id: `entry-point-${sanitizeId(entry.file)}`,
|
|
189
|
-
description: `Test that the AI handles ${entry.kind} entry point ${entry.file} correctly`,
|
|
190
|
-
category: 'entry-point',
|
|
191
|
-
contextFiles: [entry.file],
|
|
192
|
-
task: `I want to add a new ${entry.kind.toLowerCase()} route/command in \`${entry.file}\`. Where should I add it and what patterns should I follow?`,
|
|
193
|
-
expectedBehaviors: [
|
|
194
|
-
`References the existing patterns in ${entry.file}`,
|
|
195
|
-
`Follows the ${entry.kind.toLowerCase()} conventions used in the project`,
|
|
196
|
-
'Suggests appropriate error handling consistent with existing handlers',
|
|
197
|
-
'Places the new code in the correct location within the file',
|
|
198
|
-
],
|
|
199
|
-
}));
|
|
200
|
-
}
|
|
201
|
-
function generateNamingProbes(conventions) {
|
|
202
|
-
if (conventions.length === 0)
|
|
203
|
-
return [];
|
|
204
|
-
const conventionText = conventions.slice(0, 5).join('; ');
|
|
205
|
-
return [{
|
|
206
|
-
id: 'naming-conventions',
|
|
207
|
-
description: 'Test that the AI follows the project\'s naming conventions',
|
|
208
|
-
category: 'naming',
|
|
209
|
-
contextFiles: [],
|
|
210
|
-
task: `I need to create a new utility module with a helper function and a class. What should I name the file, function, and class? The project has these conventions: ${conventionText}`,
|
|
211
|
-
expectedBehaviors: conventions.slice(0, 5).map((c) => `Follows convention: ${c}`),
|
|
212
|
-
}];
|
|
213
|
-
}
|
|
214
|
-
function generateDiffProbes(diffFiles) {
|
|
215
|
-
if (diffFiles.length === 0)
|
|
216
|
-
return [];
|
|
217
|
-
return diffFiles.slice(0, 3).map((file) => ({
|
|
218
|
-
id: `diff-area-${sanitizeId(file)}`,
|
|
219
|
-
description: `Test AI awareness of recently changed file ${file}`,
|
|
220
|
-
category: 'architecture',
|
|
221
|
-
contextFiles: [file],
|
|
222
|
-
task: `I'm working on \`${file}\` which was recently modified. I need to add a related feature. What do I need to know about this file and its dependencies before making changes?`,
|
|
223
|
-
expectedBehaviors: [
|
|
224
|
-
`Identifies the role/purpose of ${file} in the project`,
|
|
225
|
-
'Notes any imports/exports that constrain changes',
|
|
226
|
-
'Suggests checking dependent files',
|
|
227
|
-
'Follows the existing code style in the file',
|
|
228
|
-
],
|
|
229
|
-
}));
|
|
230
|
-
}
|
|
231
|
-
function generateHarvestedProbes(prompts) {
|
|
232
|
-
// Take the most recent prompts that reference specific files
|
|
233
|
-
const withFiles = prompts
|
|
234
|
-
.filter((p) => p.filesReferenced.length > 0)
|
|
235
|
-
.slice(0, 3);
|
|
236
|
-
return withFiles.map((p, i) => ({
|
|
237
|
-
id: `harvested-${i}-${sanitizeId(p.filesReferenced[0] ?? 'general')}`,
|
|
238
|
-
description: `Probe from real ${p.source} interaction involving ${p.filesReferenced.join(', ')}`,
|
|
239
|
-
category: 'harvested',
|
|
240
|
-
contextFiles: p.filesReferenced,
|
|
241
|
-
task: p.userPrompt,
|
|
242
|
-
expectedBehaviors: [
|
|
243
|
-
'Produces a response consistent with the project\'s conventions',
|
|
244
|
-
'References the correct files and their roles',
|
|
245
|
-
'Does not hallucinate non-existent APIs or patterns',
|
|
246
|
-
`Handles the task at least as well as the original ${p.source} response`,
|
|
247
|
-
],
|
|
248
|
-
}));
|
|
249
|
-
}
|
|
250
|
-
// ── Helpers ─────────────────────────────────────────────────
|
|
251
|
-
function sanitizeId(path) {
|
|
252
|
-
return path
|
|
253
|
-
.replace(/[/\\]/g, '-')
|
|
254
|
-
.replace(/\.[^.]+$/, '')
|
|
255
|
-
.replace(/[^a-zA-Z0-9-]/g, '')
|
|
256
|
-
.toLowerCase()
|
|
257
|
-
.slice(0, 40);
|
|
130
|
+
return [];
|
|
258
131
|
}
|
|
259
132
|
// ── Public API ──────────────────────────────────────────────
|
|
260
133
|
/**
|
|
261
|
-
* Generate probes
|
|
134
|
+
* Generate probes via LLM with fallback to hardcoded templates.
|
|
262
135
|
*
|
|
263
|
-
*
|
|
264
|
-
*
|
|
136
|
+
* Uses temperature 0.9 for diverse probe generation. Deduplicates
|
|
137
|
+
* against prior tasks across iterations. Falls back to a pool of
|
|
138
|
+
* hardcoded templates when LLM generation fails.
|
|
265
139
|
*/
|
|
266
|
-
function generateProbes(options) {
|
|
267
|
-
const { kb,
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
140
|
+
async function generateProbes(options) {
|
|
141
|
+
const { kb, currentAgentsMd, priorProbeTasks, maxProbes = 10, provider, projectName = 'project', log, signal, } = options;
|
|
142
|
+
if (signal?.aborted)
|
|
143
|
+
return [];
|
|
144
|
+
// Truncate KB to fit in prompt
|
|
145
|
+
const kbText = kb.length > MAX_KB_CHARS
|
|
146
|
+
? kb.slice(0, MAX_KB_CHARS - 20) + '\n[... truncated]'
|
|
147
|
+
: kb;
|
|
148
|
+
const allPriorTasks = [...priorProbeTasks];
|
|
271
149
|
const probes = [];
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
150
|
+
let attempts = 0;
|
|
151
|
+
while (probes.length < maxProbes && attempts <= MAX_TOPUP_ATTEMPTS) {
|
|
152
|
+
if (signal?.aborted)
|
|
153
|
+
break;
|
|
154
|
+
attempts++;
|
|
155
|
+
const remaining = maxProbes - probes.length;
|
|
156
|
+
const systemPrompt = buildProbeSystemPrompt(remaining);
|
|
157
|
+
const userPrompt = buildProbeUserPrompt(projectName, currentAgentsMd, kbText, allPriorTasks);
|
|
158
|
+
const messages = [
|
|
159
|
+
{ role: 'system', content: systemPrompt },
|
|
160
|
+
{ role: 'user', content: userPrompt },
|
|
161
|
+
];
|
|
162
|
+
let rawProbes = [];
|
|
163
|
+
try {
|
|
164
|
+
const response = await (0, llmUtil_1.chatWithTemp)(provider, messages, 0.9, signal);
|
|
165
|
+
rawProbes = parseProbeResponse(response);
|
|
166
|
+
}
|
|
167
|
+
catch (err) {
|
|
168
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
169
|
+
log?.warn(`Probe generation attempt ${attempts} failed: ${msg}`);
|
|
170
|
+
}
|
|
171
|
+
// Process and deduplicate
|
|
172
|
+
for (const raw of rawProbes) {
|
|
173
|
+
if (probes.length >= maxProbes)
|
|
174
|
+
break;
|
|
175
|
+
if (!raw.task || !raw.expected_behaviors?.length)
|
|
176
|
+
continue;
|
|
177
|
+
if (isDuplicate(raw.task, allPriorTasks))
|
|
178
|
+
continue;
|
|
179
|
+
const id = `probe-${probes.length + 1}-${normalizeProbeText(raw.task).slice(0, 30).replace(/\s/g, '-')}`;
|
|
180
|
+
probes.push({
|
|
181
|
+
id,
|
|
182
|
+
task: raw.task,
|
|
183
|
+
expectedBehaviors: raw.expected_behaviors.slice(0, 4),
|
|
184
|
+
rationale: raw.rationale,
|
|
185
|
+
});
|
|
186
|
+
allPriorTasks.push(raw.task);
|
|
187
|
+
}
|
|
188
|
+
// If first attempt got nothing, try again before falling back
|
|
189
|
+
if (rawProbes.length === 0 && attempts >= 2)
|
|
190
|
+
break;
|
|
291
191
|
}
|
|
292
|
-
//
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
192
|
+
// Fallback to hardcoded templates if still under target
|
|
193
|
+
if (probes.length < maxProbes) {
|
|
194
|
+
log?.debug(`Using fallback probes (generated ${probes.length}/${maxProbes})`);
|
|
195
|
+
for (const fallback of FALLBACK_PROBES) {
|
|
196
|
+
if (probes.length >= maxProbes)
|
|
197
|
+
break;
|
|
198
|
+
if (isDuplicate(fallback.task, allPriorTasks))
|
|
199
|
+
continue;
|
|
200
|
+
const id = `fallback-${probes.length + 1}`;
|
|
201
|
+
probes.push({
|
|
202
|
+
id,
|
|
203
|
+
task: fallback.task,
|
|
204
|
+
expectedBehaviors: FALLBACK_BEHAVIORS,
|
|
205
|
+
rationale: fallback.rationale,
|
|
206
|
+
});
|
|
207
|
+
allPriorTasks.push(fallback.task);
|
|
299
208
|
}
|
|
300
209
|
}
|
|
301
|
-
|
|
210
|
+
log?.info(`Generated ${probes.length} probes (${attempts} LLM attempts)`);
|
|
211
|
+
return probes;
|
|
302
212
|
}
|
|
303
213
|
//# sourceMappingURL=probes.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"probes.js","sourceRoot":"","sources":["../src/probes.ts"],"names":[],"mappings":";AAAA
|
|
1
|
+
{"version":3,"file":"probes.js","sourceRoot":"","sources":["../src/probes.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;AAoGH,gDAMC;AAED,kCAMC;AAUD,gDAuBC;AAWD,wCA0FC;AAjPD,uCAAyC;AAEzC,+DAA+D;AAE/D,MAAM,YAAY,GAAG,KAAM,CAAC;AAC5B,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAE7B,+DAA+D;AAE/D,SAAS,sBAAsB,CAAC,SAAiB;IAC/C,OAAO;;;;;;;;;;;;mBAYU,SAAS;;;;;;;YAOhB,SAAS,UAAU,CAAC;AAChC,CAAC;AAED,SAAS,oBAAoB,CAC3B,WAAmB,EACnB,QAAgB,EAChB,MAAc,EACd,UAAoB;IAEpB,MAAM,YAAY,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC;QACxC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;QACvD,CAAC,CAAC,QAAQ,CAAC;IAEb,OAAO,YAAY,WAAW;;;;EAI9B,QAAQ;;;;;EAKR,MAAM;;;;EAIN,YAAY;;;;;;;uFAOyE,CAAC;AACxF,CAAC;AAED,+DAA+D;AAE/D,MAAM,kBAAkB,GAAG;IACzB,iDAAiD;IACjD,sCAAsC;IACtC,iDAAiD;CAClD,CAAC;AAEF,MAAM,eAAe,GAA+C;IAClE,EAAE,IAAI,EAAE,+IAA+I,EAAE,SAAS,EAAE,2CAA2C,EAAE;IACjN,EAAE,IAAI,EAAE,gKAAgK,EAAE,SAAS,EAAE,wCAAwC,EAAE;IAC/N,EAAE,IAAI,EAAE,kHAAkH,EAAE,SAAS,EAAE,uCAAuC,EAAE;IAChL,EAAE,IAAI,EAAE,+HAA+H,EAAE,SAAS,EAAE,6BAA6B,EAAE;IACnL,EAAE,IAAI,EAAE,yIAAyI,EAAE,SAAS,EAAE,6CAA6C,EAAE;IAC7M,EAAE,IAAI,EAAE,yIAAyI,EAAE,SAAS,EAAE,qCAAqC,EAAE;IACrM,EAAE,IAAI,EAAE,8GAA8G,EAAE,SAAS,EAAE,6CAA6C,EAAE;IAClL,EAAE,IAAI,EAAE,sHAAsH,EAAE,SAAS,EAAE,+BAA+B,EAAE;IAC5K,EAAE,IAAI,EAAE,6HAA6H,EAAE,SAAS,EAAE,4BAA4B,EAAE;IAChL,EAAE,IAAI,EAAE,mHAAmH,EAAE,SAAS,EAAE,qCAAqC,EAAE;IAC/K,EAAE,IAAI,EAAE,uHAAuH,EAAE,SAAS,EAAE,oCAAoC,EAAE;IAClL,EAAE,IAAI,EAAE,0GAA0G,EAAE,SAAS,EAAE,2BAA2B,EAAE;IAC5J,EAAE,IAAI,EAAE,sGAAsG,EAAE,SAAS,EAAE,sCAAsC,EAAE;IACnK,EAAE,IAAI,EAAE,sHAAsH,EAAE,SAAS,EAAE,iCAAiC,EAAE;IAC9K,EAAE,IAAI,EAAE,yGAAyG,EAAE,SAAS,EAAE,gCAAgC,EAAE;CACjK,CAAC;AAEF,+DAA+D;AAE/D,SAAgB,kBAAkB,CAAC,IAAY;IAC7C,OAAO,IAAI;SACR,WAAW,EAAE;SACb,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;SAC3B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,SAAgB,WAAW,CAAC,IAAY,EAAE,QAAkB;IAC1D,MAAM,UAAU,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAC5C,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE;QACzB,MAAM,CAAC,GAAG,kBAAkB,CAAC,CAAC,CAAC,CAAC;QAChC,OAAO,CAAC,KAAK,UAAU,IAAI,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;IAC9E,CAAC,CAAC,CAAC;AACL,CAAC;AAUD,SAAgB,kBAAkB,CAAC,GAAW;IAC5C,sBAAsB;IACtB,IAAI,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,2BAA2B,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAElE,oBAAoB;IACpB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,sBAAsB,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAExF,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;YAAE,OAAO,MAAoB,CAAC;IACzD,CAAC;IAAC,MAAM,CAAC;QACP,yBAAyB;QACzB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC3C,IAAI,KAAK,EAAE,CAAC;YACV,IAAI,CAAC;gBACH,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAe,CAAC;YAC5C,CAAC;YAAC,MAAM,CAAC;gBACP,eAAe;YACjB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,+DAA+D;AAE/D;;;;;;GAMG;AACI,KAAK,UAAU,cAAc,CAAC,OAA8B;IACjE,MAAM,EACJ,EAAE,EACF,eAAe,EACf,eAAe,EACf,SAAS,GAAG,EAAE,EACd,QAAQ,EACR,WAAW,GAAG,SAAS,EACvB,GAAG,EACH,MAAM,GACP,GAAG,OAAO,CAAC;IAEZ,IAAI,MAAM,EAAE,OAAO;QAAE,OAAO,EAAE,CAAC;IAE/B,+BAA+B;IAC/B,MAAM,MAAM,GAAG,EAAE,CAAC,MAAM,GAAG,YAAY;QACrC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,YAAY,GAAG,EAAE,CAAC,GAAG,mBAAmB;QACtD,CAAC,CAAC,EAAE,CAAC;IAEP,MAAM,aAAa,GAAG,CAAC,GAAG,eAAe,CAAC,CAAC;IAC3C,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,OAAO,MAAM,CAAC,MAAM,GAAG,SAAS,IAAI,QAAQ,IAAI,kBAAkB,EAAE,CAAC;QACnE,IAAI,MAAM,EAAE,OAAO;YAAE,MAAM;QAC3B,QAAQ,EAAE,CAAC;QAEX,MAAM,SAAS,GAAG,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5C,MAAM,YAAY,GAAG,sBAAsB,CAAC,SAAS,CAAC,CAAC;QACvD,MAAM,UAAU,GAAG,oBAAoB,CACrC,WAAW,EACX,eAAe,EACf,MAAM,EACN,aAAa,CACd,CAAC;QAEF,MAAM,QAAQ,GAAkB;YAC9B,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE;YACzC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE;SACtC,CAAC;QAEF,IAAI,SAAS,GAAe,EAAE,CAAC;QAC/B,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAA,sBAAY,EAAC,QAAQ,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;YACrE,SAAS,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAC3C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,GAAG,EAAE,IAAI,CAAC,4BAA4B,QAAQ,YAAY,GAAG,EAAE,CAAC,CAAC;QACnE,CAAC;QAED,0BAA0B;QAC1B,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,IAAI,MAAM,CAAC,MAAM,IAAI,SAAS;gBAAE,MAAM;YACtC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,kBAAkB,EAAE,MAAM;gBAAE,SAAS;YAC3D,IAAI,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,aAAa,CAAC;gBAAE,SAAS;YAEnD,MAAM,EAAE,GAAG,SAAS,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,CAAC;YACzG,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE;gBACF,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,iBAAiB,EAAE,GAAG,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;gBACrD,SAAS,EAAE,GAAG,CAAC,SAAS;aACzB,CAAC,CAAC;YACH,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;QAED,8DAA8D;QAC9D,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,IAAI,CAAC;YAAE,MAAM;IACrD,CAAC;IAED,wDAAwD;IACxD,IAAI,MAAM,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;QAC9B,GAAG,EAAE,KAAK,CAAC,oCAAoC,MAAM,CAAC,MAAM,IAAI,SAAS,GAAG,CAAC,CAAC;QAC9E,KAAK,MAAM,QAAQ,IAAI,eAAe,EAAE,CAAC;YACvC,IAAI,MAAM,CAAC,MAAM,IAAI,SAAS;gBAAE,MAAM;YACtC,IAAI,WAAW,CAAC,QAAQ,CAAC,IAAI,EAAE,aAAa,CAAC;gBAAE,SAAS;YAExD,MAAM,EAAE,GAAG,YAAY,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE;gBACF,IAAI,EAAE,QAAQ,CAAC,IAAI;gBACnB,iBAAiB,EAAE,kBAAkB;gBACrC,SAAS,EAAE,QAAQ,CAAC,SAAS;aAC9B,CAAC,CAAC;YACH,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;IAED,GAAG,EAAE,IAAI,CAAC,aAAa,MAAM,CAAC,MAAM,YAAY,QAAQ,gBAAgB,CAAC,CAAC;IAC1E,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -2,43 +2,18 @@
|
|
|
2
2
|
* Probe runner — simulates AI responses to probes using AGENTS.md as context.
|
|
3
3
|
*
|
|
4
4
|
* For each probe, constructs a chat where:
|
|
5
|
-
* - System prompt = current AGENTS.md
|
|
5
|
+
* - System prompt = current AGENTS.md
|
|
6
6
|
* - User prompt = the probe's task
|
|
7
|
-
* Then sends it to the LLM and
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
import type { Probe, ProbeResult, BehaviorResult } from './types';
|
|
11
|
-
/**
|
|
12
|
-
* Build the system prompt for a probe run.
|
|
13
|
-
* Includes the AGENTS.md instructions and relevant file contents.
|
|
14
|
-
*/
|
|
15
|
-
declare function buildProbeSystemPrompt(agentsContent: string, probe: Probe, fileContents?: ReadonlyMap<string, string>): string;
|
|
16
|
-
/**
|
|
17
|
-
* Build the evaluation prompt that scores a probe response
|
|
18
|
-
* against expected behaviours.
|
|
7
|
+
* Then sends it to the LLM (temperature 0.0) and returns the raw response.
|
|
8
|
+
*
|
|
9
|
+
* Judging/evaluation is handled separately by the judge module.
|
|
19
10
|
*/
|
|
20
|
-
|
|
21
|
-
/** Parse the structured behaviour evaluation response. */
|
|
22
|
-
declare function parseBehaviorEval(response: string, expectedBehaviors: string[]): {
|
|
23
|
-
results: BehaviorResult[];
|
|
24
|
-
allPassed: boolean;
|
|
25
|
-
};
|
|
26
|
-
/** Callback invoked before/after each probe for live progress updates. */
|
|
27
|
-
export interface ProbeProgressCallback {
|
|
28
|
-
(info: {
|
|
29
|
-
probeIndex: number;
|
|
30
|
-
total: number;
|
|
31
|
-
probeId: string;
|
|
32
|
-
phase: 'starting' | 'done';
|
|
33
|
-
passed?: boolean;
|
|
34
|
-
}): void;
|
|
35
|
-
}
|
|
11
|
+
import type { Probe, SimulationResult, LlmProvider, OptLogger, ProbeProgressCallback } from './types';
|
|
36
12
|
/**
|
|
37
13
|
* Run all probes against the current AGENTS.md.
|
|
38
14
|
*
|
|
39
15
|
* Each probe is run sequentially (to respect rate limits).
|
|
40
|
-
* Returns results
|
|
16
|
+
* Returns simulation results (raw responses, no evaluation).
|
|
41
17
|
*/
|
|
42
|
-
export declare function runProbes(agentsContent: string, probes: Probe[], provider: LlmProvider,
|
|
43
|
-
export { buildProbeSystemPrompt, buildBehaviorEvalPrompt, parseBehaviorEval };
|
|
18
|
+
export declare function runProbes(agentsContent: string, probes: Probe[], provider: LlmProvider, log?: OptLogger, signal?: AbortSignal, onProbeProgress?: ProbeProgressCallback): Promise<SimulationResult[]>;
|
|
44
19
|
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,OAAO,KAAK,EACV,KAAK,EACL,gBAAgB,EAChB,WAAW,EACX,SAAS,EACT,qBAAqB,EACtB,MAAM,SAAS,CAAC;AAsCjB;;;;;GAKG;AACH,wBAAsB,SAAS,CAC7B,aAAa,EAAE,MAAM,EACrB,MAAM,EAAE,KAAK,EAAE,EACf,QAAQ,EAAE,WAAW,EACrB,GAAG,CAAC,EAAE,SAAS,EACf,MAAM,CAAC,EAAE,WAAW,EACpB,eAAe,CAAC,EAAE,qBAAqB,GACtC,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAkB7B"}
|