aspectcode 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/README.md +13 -0
  2. package/dist/agentsMdRenderer.d.ts +16 -0
  3. package/dist/agentsMdRenderer.d.ts.map +1 -0
  4. package/dist/agentsMdRenderer.js +137 -0
  5. package/dist/agentsMdRenderer.js.map +1 -0
  6. package/dist/auth.d.ts +31 -0
  7. package/dist/auth.d.ts.map +1 -0
  8. package/dist/auth.js +385 -0
  9. package/dist/auth.js.map +1 -0
  10. package/dist/autoResolve.d.ts +41 -0
  11. package/dist/autoResolve.d.ts.map +1 -0
  12. package/dist/autoResolve.js +196 -0
  13. package/dist/autoResolve.js.map +1 -0
  14. package/dist/changeEvaluator.d.ts +56 -0
  15. package/dist/changeEvaluator.d.ts.map +1 -0
  16. package/dist/changeEvaluator.js +674 -0
  17. package/dist/changeEvaluator.js.map +1 -0
  18. package/dist/cli.d.ts +12 -1
  19. package/dist/cli.d.ts.map +1 -1
  20. package/dist/cli.js +1 -1
  21. package/dist/cli.js.map +1 -1
  22. package/dist/config.d.ts +37 -17
  23. package/dist/config.d.ts.map +1 -1
  24. package/dist/config.js +50 -2
  25. package/dist/config.js.map +1 -1
  26. package/dist/dreamCycle.d.ts +57 -0
  27. package/dist/dreamCycle.d.ts.map +1 -0
  28. package/dist/dreamCycle.js +334 -0
  29. package/dist/dreamCycle.js.map +1 -0
  30. package/dist/kbBuilder.d.ts +1 -2
  31. package/dist/kbBuilder.d.ts.map +1 -1
  32. package/dist/kbBuilder.js +1 -2
  33. package/dist/kbBuilder.js.map +1 -1
  34. package/dist/main.d.ts +2 -1
  35. package/dist/main.d.ts.map +1 -1
  36. package/dist/main.js +149 -8
  37. package/dist/main.js.map +1 -1
  38. package/dist/optimize.d.ts +13 -6
  39. package/dist/optimize.d.ts.map +1 -1
  40. package/dist/optimize.js +433 -142
  41. package/dist/optimize.js.map +1 -1
  42. package/dist/pipeline.d.ts +21 -18
  43. package/dist/pipeline.d.ts.map +1 -1
  44. package/dist/pipeline.js +1139 -162
  45. package/dist/pipeline.js.map +1 -1
  46. package/dist/preferences.d.ts +80 -0
  47. package/dist/preferences.d.ts.map +1 -0
  48. package/dist/preferences.js +238 -0
  49. package/dist/preferences.js.map +1 -0
  50. package/dist/runtimeState.d.ts +30 -0
  51. package/dist/runtimeState.d.ts.map +1 -0
  52. package/dist/runtimeState.js +39 -0
  53. package/dist/runtimeState.js.map +1 -0
  54. package/dist/scopedRules.d.ts +84 -0
  55. package/dist/scopedRules.d.ts.map +1 -0
  56. package/dist/scopedRules.js +449 -0
  57. package/dist/scopedRules.js.map +1 -0
  58. package/dist/ui/Dashboard.d.ts +4 -16
  59. package/dist/ui/Dashboard.d.ts.map +1 -1
  60. package/dist/ui/Dashboard.js +339 -141
  61. package/dist/ui/Dashboard.js.map +1 -1
  62. package/dist/ui/MemoryMap.d.ts +16 -0
  63. package/dist/ui/MemoryMap.d.ts.map +1 -0
  64. package/dist/ui/MemoryMap.js +266 -0
  65. package/dist/ui/MemoryMap.js.map +1 -0
  66. package/dist/ui/SettingsPanel.d.ts +18 -0
  67. package/dist/ui/SettingsPanel.d.ts.map +1 -0
  68. package/dist/ui/SettingsPanel.js +241 -0
  69. package/dist/ui/SettingsPanel.js.map +1 -0
  70. package/dist/ui/prompts.d.ts +7 -0
  71. package/dist/ui/prompts.d.ts.map +1 -1
  72. package/dist/ui/prompts.js +63 -0
  73. package/dist/ui/prompts.js.map +1 -1
  74. package/dist/ui/store.d.ts +154 -18
  75. package/dist/ui/store.d.ts.map +1 -1
  76. package/dist/ui/store.js +154 -24
  77. package/dist/ui/store.js.map +1 -1
  78. package/dist/ui/theme.d.ts +1 -8
  79. package/dist/ui/theme.d.ts.map +1 -1
  80. package/dist/ui/theme.js +2 -20
  81. package/dist/ui/theme.js.map +1 -1
  82. package/dist/updateChecker.d.ts +13 -0
  83. package/dist/updateChecker.d.ts.map +1 -0
  84. package/dist/updateChecker.js +66 -0
  85. package/dist/updateChecker.js.map +1 -0
  86. package/dist/usageTracker.d.ts +12 -0
  87. package/dist/usageTracker.d.ts.map +1 -0
  88. package/dist/usageTracker.js +89 -0
  89. package/dist/usageTracker.js.map +1 -0
  90. package/dist/writer.d.ts +1 -7
  91. package/dist/writer.d.ts.map +1 -1
  92. package/dist/writer.js +1 -11
  93. package/dist/writer.js.map +1 -1
  94. package/node_modules/@aspectcode/core/dist/analysis/repo.d.ts.map +1 -1
  95. package/node_modules/@aspectcode/core/dist/analysis/repo.js +13 -2
  96. package/node_modules/@aspectcode/core/dist/analysis/repo.js.map +1 -1
  97. package/node_modules/@aspectcode/core/dist/index.d.ts +1 -3
  98. package/node_modules/@aspectcode/core/dist/index.d.ts.map +1 -1
  99. package/node_modules/@aspectcode/core/dist/index.js +1 -3
  100. package/node_modules/@aspectcode/core/dist/index.js.map +1 -1
  101. package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.d.ts +14 -0
  102. package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.d.ts.map +1 -0
  103. package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.js +191 -0
  104. package/node_modules/@aspectcode/core/dist/parsers/genericExtractors.js.map +1 -0
  105. package/node_modules/@aspectcode/core/dist/parsers/index.d.ts +1 -0
  106. package/node_modules/@aspectcode/core/dist/parsers/index.d.ts.map +1 -1
  107. package/node_modules/@aspectcode/core/dist/parsers/index.js +6 -1
  108. package/node_modules/@aspectcode/core/dist/parsers/index.js.map +1 -1
  109. package/node_modules/@aspectcode/core/dist/parsers/languages.d.ts +20 -0
  110. package/node_modules/@aspectcode/core/dist/parsers/languages.d.ts.map +1 -1
  111. package/node_modules/@aspectcode/core/dist/parsers/languages.js +25 -0
  112. package/node_modules/@aspectcode/core/dist/parsers/languages.js.map +1 -1
  113. package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.d.ts.map +1 -1
  114. package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.js +4 -1
  115. package/node_modules/@aspectcode/core/dist/parsers/tsJsExtractors.js.map +1 -1
  116. package/node_modules/@aspectcode/core/package.json +2 -2
  117. package/node_modules/@aspectcode/core/parsers/cpp.wasm +0 -0
  118. package/node_modules/@aspectcode/core/parsers/go.wasm +0 -0
  119. package/node_modules/@aspectcode/core/parsers/php.wasm +0 -0
  120. package/node_modules/@aspectcode/core/parsers/ruby.wasm +0 -0
  121. package/node_modules/@aspectcode/core/parsers/rust.wasm +0 -0
  122. package/node_modules/@aspectcode/emitters/dist/index.d.ts +1 -17
  123. package/node_modules/@aspectcode/emitters/dist/index.d.ts.map +1 -1
  124. package/node_modules/@aspectcode/emitters/dist/index.js +2 -89
  125. package/node_modules/@aspectcode/emitters/dist/index.js.map +1 -1
  126. package/node_modules/@aspectcode/emitters/dist/instructions/index.d.ts +0 -2
  127. package/node_modules/@aspectcode/emitters/dist/instructions/index.d.ts.map +1 -1
  128. package/node_modules/@aspectcode/emitters/dist/instructions/index.js +1 -7
  129. package/node_modules/@aspectcode/emitters/dist/instructions/index.js.map +1 -1
  130. package/node_modules/@aspectcode/emitters/dist/kb/analyzers.d.ts +0 -18
  131. package/node_modules/@aspectcode/emitters/dist/kb/analyzers.d.ts.map +1 -1
  132. package/node_modules/@aspectcode/emitters/dist/kb/analyzers.js +0 -57
  133. package/node_modules/@aspectcode/emitters/dist/kb/analyzers.js.map +1 -1
  134. package/node_modules/@aspectcode/emitters/dist/kb/conventions.d.ts +0 -18
  135. package/node_modules/@aspectcode/emitters/dist/kb/conventions.d.ts.map +1 -1
  136. package/node_modules/@aspectcode/emitters/dist/kb/conventions.js +0 -130
  137. package/node_modules/@aspectcode/emitters/dist/kb/conventions.js.map +1 -1
  138. package/node_modules/@aspectcode/emitters/dist/kb/index.d.ts +2 -4
  139. package/node_modules/@aspectcode/emitters/dist/kb/index.d.ts.map +1 -1
  140. package/node_modules/@aspectcode/emitters/dist/kb/index.js +1 -11
  141. package/node_modules/@aspectcode/emitters/dist/kb/index.js.map +1 -1
  142. package/node_modules/@aspectcode/emitters/package.json +3 -3
  143. package/node_modules/@aspectcode/evaluator/dist/apply.d.ts +55 -0
  144. package/node_modules/@aspectcode/evaluator/dist/apply.d.ts.map +1 -0
  145. package/node_modules/@aspectcode/evaluator/dist/apply.js +368 -0
  146. package/node_modules/@aspectcode/evaluator/dist/apply.js.map +1 -0
  147. package/node_modules/@aspectcode/evaluator/dist/diagnosis.d.ts +16 -25
  148. package/node_modules/@aspectcode/evaluator/dist/diagnosis.d.ts.map +1 -1
  149. package/node_modules/@aspectcode/evaluator/dist/diagnosis.js +115 -138
  150. package/node_modules/@aspectcode/evaluator/dist/diagnosis.js.map +1 -1
  151. package/node_modules/@aspectcode/evaluator/dist/index.d.ts +8 -43
  152. package/node_modules/@aspectcode/evaluator/dist/index.d.ts.map +1 -1
  153. package/node_modules/@aspectcode/evaluator/dist/index.js +15 -61
  154. package/node_modules/@aspectcode/evaluator/dist/index.js.map +1 -1
  155. package/node_modules/@aspectcode/evaluator/dist/judge.d.ts +32 -0
  156. package/node_modules/@aspectcode/evaluator/dist/judge.d.ts.map +1 -0
  157. package/node_modules/@aspectcode/evaluator/dist/judge.js +165 -0
  158. package/node_modules/@aspectcode/evaluator/dist/judge.js.map +1 -0
  159. package/node_modules/@aspectcode/evaluator/dist/llmUtil.d.ts +15 -0
  160. package/node_modules/@aspectcode/evaluator/dist/llmUtil.d.ts.map +1 -0
  161. package/node_modules/@aspectcode/evaluator/dist/llmUtil.js +41 -0
  162. package/node_modules/@aspectcode/evaluator/dist/llmUtil.js.map +1 -0
  163. package/node_modules/@aspectcode/evaluator/dist/probes.d.ts +20 -29
  164. package/node_modules/@aspectcode/evaluator/dist/probes.d.ts.map +1 -1
  165. package/node_modules/@aspectcode/evaluator/dist/probes.js +188 -204
  166. package/node_modules/@aspectcode/evaluator/dist/probes.js.map +1 -1
  167. package/node_modules/@aspectcode/evaluator/dist/runner.d.ts +7 -32
  168. package/node_modules/@aspectcode/evaluator/dist/runner.d.ts.map +1 -1
  169. package/node_modules/@aspectcode/evaluator/dist/runner.js +21 -146
  170. package/node_modules/@aspectcode/evaluator/dist/runner.js.map +1 -1
  171. package/node_modules/@aspectcode/evaluator/dist/types.d.ts +141 -99
  172. package/node_modules/@aspectcode/evaluator/dist/types.d.ts.map +1 -1
  173. package/node_modules/@aspectcode/evaluator/dist/types.js +10 -2
  174. package/node_modules/@aspectcode/evaluator/dist/types.js.map +1 -1
  175. package/node_modules/@aspectcode/evaluator/package.json +4 -4
  176. package/node_modules/@aspectcode/optimizer/dist/index.d.ts +3 -10
  177. package/node_modules/@aspectcode/optimizer/dist/index.d.ts.map +1 -1
  178. package/node_modules/@aspectcode/optimizer/dist/index.js +1 -19
  179. package/node_modules/@aspectcode/optimizer/dist/index.js.map +1 -1
  180. package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.d.ts.map +1 -1
  181. package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.js +40 -0
  182. package/node_modules/@aspectcode/optimizer/dist/providers/anthropic.js.map +1 -1
  183. package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.d.ts +9 -0
  184. package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.d.ts.map +1 -0
  185. package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.js +83 -0
  186. package/node_modules/@aspectcode/optimizer/dist/providers/aspectcode.js.map +1 -0
  187. package/node_modules/@aspectcode/optimizer/dist/providers/index.d.ts +4 -3
  188. package/node_modules/@aspectcode/optimizer/dist/providers/index.d.ts.map +1 -1
  189. package/node_modules/@aspectcode/optimizer/dist/providers/index.js +24 -10
  190. package/node_modules/@aspectcode/optimizer/dist/providers/index.js.map +1 -1
  191. package/node_modules/@aspectcode/optimizer/dist/providers/openai.d.ts.map +1 -1
  192. package/node_modules/@aspectcode/optimizer/dist/providers/openai.js +22 -0
  193. package/node_modules/@aspectcode/optimizer/dist/providers/openai.js.map +1 -1
  194. package/node_modules/@aspectcode/optimizer/dist/providers/retry.d.ts +14 -0
  195. package/node_modules/@aspectcode/optimizer/dist/providers/retry.d.ts.map +1 -1
  196. package/node_modules/@aspectcode/optimizer/dist/providers/retry.js +1 -0
  197. package/node_modules/@aspectcode/optimizer/dist/providers/retry.js.map +1 -1
  198. package/node_modules/@aspectcode/optimizer/dist/types.d.ts +14 -0
  199. package/node_modules/@aspectcode/optimizer/dist/types.d.ts.map +1 -1
  200. package/node_modules/@aspectcode/optimizer/dist/types.js.map +1 -1
  201. package/node_modules/@aspectcode/optimizer/package.json +2 -2
  202. package/package.json +6 -7
  203. package/dist/complaintProcessor.d.ts +0 -16
  204. package/dist/complaintProcessor.d.ts.map +0 -1
  205. package/dist/complaintProcessor.js +0 -134
  206. package/dist/complaintProcessor.js.map +0 -1
  207. package/node_modules/@aspectcode/emitters/dist/emitter.d.ts +0 -72
  208. package/node_modules/@aspectcode/emitters/dist/emitter.d.ts.map +0 -1
  209. package/node_modules/@aspectcode/emitters/dist/emitter.js +0 -10
  210. package/node_modules/@aspectcode/emitters/dist/emitter.js.map +0 -1
  211. package/node_modules/@aspectcode/emitters/dist/instructions/content.d.ts +0 -15
  212. package/node_modules/@aspectcode/emitters/dist/instructions/content.d.ts.map +0 -1
  213. package/node_modules/@aspectcode/emitters/dist/instructions/content.js +0 -289
  214. package/node_modules/@aspectcode/emitters/dist/instructions/content.js.map +0 -1
  215. package/node_modules/@aspectcode/emitters/dist/instructions/detection.d.ts +0 -13
  216. package/node_modules/@aspectcode/emitters/dist/instructions/detection.d.ts.map +0 -1
  217. package/node_modules/@aspectcode/emitters/dist/instructions/detection.js +0 -55
  218. package/node_modules/@aspectcode/emitters/dist/instructions/detection.js.map +0 -1
  219. package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.d.ts +0 -9
  220. package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.d.ts.map +0 -1
  221. package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.js +0 -30
  222. package/node_modules/@aspectcode/emitters/dist/instructions/instructionsEmitter.js.map +0 -1
  223. package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.d.ts +0 -21
  224. package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.d.ts.map +0 -1
  225. package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.js +0 -125
  226. package/node_modules/@aspectcode/emitters/dist/kb/kbEmitter.js.map +0 -1
  227. package/node_modules/@aspectcode/emitters/dist/manifest.d.ts +0 -37
  228. package/node_modules/@aspectcode/emitters/dist/manifest.d.ts.map +0 -1
  229. package/node_modules/@aspectcode/emitters/dist/manifest.js +0 -50
  230. package/node_modules/@aspectcode/emitters/dist/manifest.js.map +0 -1
  231. package/node_modules/@aspectcode/emitters/dist/report.d.ts +0 -22
  232. package/node_modules/@aspectcode/emitters/dist/report.d.ts.map +0 -1
  233. package/node_modules/@aspectcode/emitters/dist/report.js +0 -3
  234. package/node_modules/@aspectcode/emitters/dist/report.js.map +0 -1
  235. package/node_modules/@aspectcode/emitters/dist/stableJson.d.ts +0 -14
  236. package/node_modules/@aspectcode/emitters/dist/stableJson.d.ts.map +0 -1
  237. package/node_modules/@aspectcode/emitters/dist/stableJson.js +0 -40
  238. package/node_modules/@aspectcode/emitters/dist/stableJson.js.map +0 -1
  239. package/node_modules/@aspectcode/emitters/dist/transaction.d.ts +0 -29
  240. package/node_modules/@aspectcode/emitters/dist/transaction.d.ts.map +0 -1
  241. package/node_modules/@aspectcode/emitters/dist/transaction.js +0 -104
  242. package/node_modules/@aspectcode/emitters/dist/transaction.js.map +0 -1
@@ -0,0 +1,165 @@
1
+ "use strict";
2
+ /**
3
+ * Per-probe judge — evaluates AI responses with strong/partial/missing assessments.
4
+ *
5
+ * For each probe, the judge reviews the simulated response against expected
6
+ * behaviours and proposes targeted AGENTS.md edits.
7
+ *
8
+ * Ported from sweagent_bench oracle/judge.py.
9
+ */
10
+ Object.defineProperty(exports, "__esModule", { value: true });
11
+ exports.parseJudgeResponse = parseJudgeResponse;
12
+ exports.judgeProbe = judgeProbe;
13
+ const llmUtil_1 = require("./llmUtil");
14
+ // ── Prompts ─────────────────────────────────────────────────
15
+ const JUDGE_SYSTEM = `You are an evaluator/editor for AGENTS.md quality.
16
+ You will be given a TASK, the assistant RESPONSE, and EXPECTED BEHAVIORS.
17
+
18
+ Assess each behavior with one of: "strong", "partial", "missing".
19
+
20
+ Return a JSON object with this exact shape:
21
+ {
22
+ "behavior_reviews": [
23
+ {
24
+ "behavior": "...",
25
+ "assessment": "strong|partial|missing",
26
+ "evidence": "short evidence from response",
27
+ "improvement": "what AGENTS.md should add/change"
28
+ }
29
+ ],
30
+ "proposed_edits": [
31
+ {"section": "Operating Mode|Procedural Standards|High-Impact Hubs|Entry Points|Import Chains|Validation|Integration Risk|Conventions|Guardrails", "action": "add|modify|strengthen|remove", "content": "..."}
32
+ ],
33
+ "overall_notes": "short summary"
34
+ }
35
+
36
+ Rules:
37
+ - Judge whether the response produced a focused, plausible fix grounded in repo evidence.
38
+ - Prefer edits that improve repo-specific guidance, not generic checklists.
39
+ - The "content" field must be the ACTUAL guideline text to appear in AGENTS.md as a bullet point.
40
+ Write it as a direct imperative (e.g. "Verify component exists before importing").
41
+ NEVER write meta-instructions like "Add a step to..." or "Include an example of...".
42
+ - Content must be general enough to help across the repo, not tied to one probe scenario.
43
+ - Edits are optional; return [] if behavior is already strong.
44
+ - Return at most 3 proposed edits.
45
+ - Output ONLY valid JSON.`;
46
+ function buildJudgeUserPrompt(task, response, expectedBehaviors) {
47
+ const behaviors = expectedBehaviors.map((b, i) => `${i + 1}. ${b}`).join('\n');
48
+ return `TASK:\n${task}\n\nRESPONSE:\n${response}\n\nEXPECTED BEHAVIORS:\n${behaviors}\n\nProduce behavior_reviews and proposed_edits JSON.`;
49
+ }
50
+ function parseJudgeResponse(raw) {
51
+ // Strip thinking tags if present
52
+ let cleaned = raw.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
53
+ // Strip code fences
54
+ cleaned = cleaned.replace(/^```(?:json)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
55
+ try {
56
+ return JSON.parse(cleaned);
57
+ }
58
+ catch {
59
+ // Try to find JSON object in the response
60
+ const match = cleaned.match(/\{[\s\S]*\}/);
61
+ if (match) {
62
+ try {
63
+ return JSON.parse(match[0]);
64
+ }
65
+ catch {
66
+ return null;
67
+ }
68
+ }
69
+ return null;
70
+ }
71
+ }
72
+ // ── Public API ──────────────────────────────────────────────
73
+ /**
74
+ * Judge a single probe's response against expected behaviours.
75
+ *
76
+ * Returns structured assessments (strong/partial/missing) and
77
+ * up to 3 proposed AGENTS.md edits.
78
+ */
79
+ async function judgeProbe(options) {
80
+ const { task, response, expectedBehaviors, probeId, provider, log, signal } = options;
81
+ if (signal?.aborted) {
82
+ return {
83
+ probeId,
84
+ task,
85
+ response,
86
+ behaviorReviews: [],
87
+ proposedEdits: [],
88
+ overallNotes: 'Cancelled',
89
+ };
90
+ }
91
+ log?.debug(`Judging probe: ${probeId}`);
92
+ const userPrompt = buildJudgeUserPrompt(task, response, expectedBehaviors);
93
+ const messages = [
94
+ { role: 'system', content: JUDGE_SYSTEM },
95
+ { role: 'user', content: userPrompt },
96
+ ];
97
+ let llmResponse;
98
+ try {
99
+ llmResponse = await (0, llmUtil_1.chatWithTemp)(provider, messages, 0.0, signal);
100
+ }
101
+ catch (err) {
102
+ const msg = err instanceof Error ? err.message : String(err);
103
+ log?.warn(`Judge call failed for ${probeId}: ${msg}`);
104
+ return {
105
+ probeId,
106
+ task,
107
+ response,
108
+ behaviorReviews: expectedBehaviors.map((b) => ({
109
+ behavior: b,
110
+ assessment: 'missing',
111
+ evidence: '',
112
+ improvement: `Judge call failed: ${msg}`,
113
+ })),
114
+ proposedEdits: [],
115
+ overallNotes: `Judge call failed: ${msg}`,
116
+ };
117
+ }
118
+ const parsed = parseJudgeResponse(llmResponse);
119
+ if (!parsed) {
120
+ log?.warn(`Could not parse judge response for ${probeId}`);
121
+ return {
122
+ probeId,
123
+ task,
124
+ response,
125
+ behaviorReviews: expectedBehaviors.map((b) => ({
126
+ behavior: b,
127
+ assessment: 'missing',
128
+ evidence: '',
129
+ improvement: 'Could not parse judge response',
130
+ })),
131
+ proposedEdits: [],
132
+ overallNotes: 'Failed to parse judge response',
133
+ };
134
+ }
135
+ // Map behavior reviews
136
+ const behaviorReviews = (parsed.behavior_reviews || []).map((br) => ({
137
+ behavior: br.behavior,
138
+ assessment: (['strong', 'partial', 'missing'].includes(br.assessment)
139
+ ? br.assessment
140
+ : 'missing'),
141
+ evidence: br.evidence || '',
142
+ improvement: br.improvement || '',
143
+ }));
144
+ // Map proposed edits
145
+ const proposedEdits = (parsed.proposed_edits || [])
146
+ .slice(0, 3)
147
+ .filter((e) => e.section && e.action && e.content)
148
+ .map((e) => ({
149
+ section: e.section,
150
+ action: (['add', 'modify', 'strengthen', 'remove'].includes(e.action)
151
+ ? e.action
152
+ : 'add'),
153
+ content: e.content,
154
+ motivatedBy: [probeId],
155
+ }));
156
+ return {
157
+ probeId,
158
+ task,
159
+ response,
160
+ behaviorReviews,
161
+ proposedEdits,
162
+ overallNotes: parsed.overall_notes || '',
163
+ };
164
+ }
165
+ //# sourceMappingURL=judge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"judge.js","sourceRoot":"","sources":["../src/judge.ts"],"names":[],"mappings":";AAAA;;;;;;;GAOG;;AAuEH,gDAqBC;AAUD,gCA6FC;AA1LD,uCAAyC;AAEzC,+DAA+D;AAE/D,MAAM,YAAY,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;0BA8BK,CAAC;AAE3B,SAAS,oBAAoB,CAC3B,IAAY,EACZ,QAAgB,EAChB,iBAA2B;IAE3B,MAAM,SAAS,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/E,OAAO,UAAU,IAAI,kBAAkB,QAAQ,4BAA4B,SAAS,uDAAuD,CAAC;AAC9I,CAAC;AAmBD,SAAgB,kBAAkB,CAAC,GAAW;IAC5C,iCAAiC;IACjC,IAAI,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,2BAA2B,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAElE,oBAAoB;IACpB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,sBAAsB,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAExF,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAkB,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,0CAA0C;QAC1C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC3C,IAAI,KAAK,EAAE,CAAC;YACV,IAAI,CAAC;gBACH,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAkB,CAAC;YAC/C,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,+DAA+D;AAE/D;;;;;GAKG;AACI,KAAK,UAAU,UAAU,CAAC,OAAqB;IACpD,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,iBAAiB,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAEtF,IAAI,MAAM,EAAE,OAAO,EAAE,CAAC;QACpB,OAAO;YACL,OAAO;YACP,IAAI;YACJ,QAAQ;YACR,eAAe,EAAE,EAAE;YACnB,aAAa,EAAE,EAAE;YACjB,YAAY,EAAE,WAAW;SAC1B,CAAC;IACJ,CAAC;IAED,GAAG,EAAE,KAAK,CAAC,kBAAkB,OAAO,EAAE,CAAC,CAAC;IAExC,MAAM,UAAU,GAAG,oBAAoB,CAAC,IAAI,EAAE,QAAQ,EAAE,iBAAiB,CAAC,CAAC;IAC3E,MAAM,QAAQ,GAAkB;QAC9B,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE;QACzC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE;KACtC,CAAC;IAEF,IAAI,WAAmB,CAAC;IACxB,IAAI,CAAC;QACH,WAAW,GAAG,MAAM,IAAA,sBAAY,EAAC,QAAQ,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;IACpE,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC7D,GAAG,EAAE,IAAI,CAAC,yBAAyB,OAAO,KAAK,GAAG,EAAE,CAAC,CAAC;QACtD,OAAO;YACL,OAAO;YACP,IAAI;YACJ,QAAQ;YACR,eAAe,EAAE,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC7C,QAAQ,EAAE,CAAC;gBACX,UAAU,EAAE,SAAkB;gBAC9B,QAAQ,EAAE,EAAE;gBACZ,WAAW,EAAE,sBAAsB,GAAG,EAAE;aACzC,CAAC,CAAC;YACH,aAAa,EAAE,EAAE;YACjB,YAAY,EAAE,sBAAsB,GAAG,EAAE;SAC1C,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;IAE/C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,GAAG,EAAE,IAAI,CAAC,sCAAsC,OAAO,EAAE,CAAC,CAAC;QAC3D,OAAO;YACL,OAAO;YACP,IAAI;YACJ,QAAQ;YACR,eAAe,EAAE,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC7C,QAAQ,EAAE,CAAC;gBACX,UAAU,EAAE,SAAkB;gBAC9B,QAAQ,EAAE,EAAE;gBACZ,WAAW,EAAE,gCAAgC;aAC9C,CAAC,CAAC;YACH,aAAa,EAAE,EAAE;YACjB,YAAY,EAAE,gCAAgC;SAC/C,CAAC;IACJ,CAAC;IAED,uBAAuB;IACvB,MAAM,eAAe,GAAqB,CAAC,MAAM,CAAC,gBAAgB,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;QACrF,QAAQ,EAAE,EAAE,CAAC,QAAQ;QACrB,UAAU,EAAE,CAAC,CAAC,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,UAAU,CAAC;YACnE,CAAC,CAAC,EAAE,CAAC,UAAU;YACf,CAAC,CAAC,SAAS,CAAiC;QAC9C,QAAQ,EAAE,EAAE,CAAC,QAAQ,IAAI,EAAE;QAC3B,WAAW,EAAE,EAAE,CAAC,WAAW,IAAI,EAAE;KAClC,CAAC,CAAC,CAAC;IAEJ,qBAAqB;IACrB,MAAM,aAAa,GAAiB,CAAC,MAAM,CAAC,cAAc,IAAI,EAAE,CAAC;SAC9D,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;SACX,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,OAAO,CAAC;SACjD,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACX,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,MAAM,EAAE,CAAC,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC;YACnE,CAAC,CAAC,CAAC,CAAC,MAAM;YACV,CAAC,CAAC,KAAK,CAAyB;QAClC,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,WAAW,EAAE,CAAC,OAAO,CAAC;KACvB,CAAC,CAAC,CAAC;IAEN,OAAO;QACL,OAAO;QACP,IAAI;QACJ,QAAQ;QACR,eAAe;QACf,aAAa;QACb,YAAY,EAAE,MAAM,CAAC,aAAa,IAAI,EAAE;KACzC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,15 @@
1
+ /**
2
+ * LLM utility — helpers for calling providers with per-call options.
3
+ */
4
+ import type { ChatMessage } from '@aspectcode/optimizer';
5
+ import type { LlmProvider } from './types';
6
+ /**
7
+ * Call the LLM with a specific temperature.
8
+ * Uses `chatWithOptions` if available, falls back to `chat()`.
9
+ *
10
+ * When an AbortSignal is provided and fires, the returned promise
11
+ * rejects immediately (the underlying HTTP call may still finish
12
+ * in the background, but the caller stops waiting).
13
+ */
14
+ export declare function chatWithTemp(provider: LlmProvider, messages: ChatMessage[], temperature: number, signal?: AbortSignal): Promise<string>;
15
+ //# sourceMappingURL=llmUtil.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llmUtil.d.ts","sourceRoot":"","sources":["../src/llmUtil.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAE3C;;;;;;;GAOG;AACH,wBAAsB,YAAY,CAChC,QAAQ,EAAE,WAAW,EACrB,QAAQ,EAAE,WAAW,EAAE,EACvB,WAAW,EAAE,MAAM,EACnB,MAAM,CAAC,EAAE,WAAW,GACnB,OAAO,CAAC,MAAM,CAAC,CA0BjB"}
@@ -0,0 +1,41 @@
1
+ "use strict";
2
+ /**
3
+ * LLM utility — helpers for calling providers with per-call options.
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.chatWithTemp = chatWithTemp;
7
+ /**
8
+ * Call the LLM with a specific temperature.
9
+ * Uses `chatWithOptions` if available, falls back to `chat()`.
10
+ *
11
+ * When an AbortSignal is provided and fires, the returned promise
12
+ * rejects immediately (the underlying HTTP call may still finish
13
+ * in the background, but the caller stops waiting).
14
+ */
15
+ async function chatWithTemp(provider, messages, temperature, signal) {
16
+ if (signal?.aborted)
17
+ throw new DOMException('Aborted', 'AbortError');
18
+ const chatPromise = provider.chatWithOptions
19
+ ? provider.chatWithOptions(messages, { temperature })
20
+ : provider.chat(messages);
21
+ if (!signal)
22
+ return chatPromise;
23
+ // Race the chat against the abort signal, cleaning up the listener afterward
24
+ let cleanup;
25
+ const abortPromise = new Promise((_, reject) => {
26
+ if (signal.aborted) {
27
+ reject(new DOMException('Aborted', 'AbortError'));
28
+ return;
29
+ }
30
+ const handler = () => reject(new DOMException('Aborted', 'AbortError'));
31
+ signal.addEventListener('abort', handler, { once: true });
32
+ cleanup = () => signal.removeEventListener('abort', handler);
33
+ });
34
+ try {
35
+ return await Promise.race([chatPromise, abortPromise]);
36
+ }
37
+ finally {
38
+ cleanup?.();
39
+ }
40
+ }
41
+ //# sourceMappingURL=llmUtil.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llmUtil.js","sourceRoot":"","sources":["../src/llmUtil.ts"],"names":[],"mappings":";AAAA;;GAEG;;AAaH,oCA+BC;AAvCD;;;;;;;GAOG;AACI,KAAK,UAAU,YAAY,CAChC,QAAqB,EACrB,QAAuB,EACvB,WAAmB,EACnB,MAAoB;IAEpB,IAAI,MAAM,EAAE,OAAO;QAAE,MAAM,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IAErE,MAAM,WAAW,GAAG,QAAQ,CAAC,eAAe;QAC1C,CAAC,CAAC,QAAQ,CAAC,eAAe,CAAC,QAAQ,EAAE,EAAE,WAAW,EAAE,CAAC;QACrD,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAE5B,IAAI,CAAC,MAAM;QAAE,OAAO,WAAW,CAAC;IAEhC,6EAA6E;IAC7E,IAAI,OAAiC,CAAC;IACtC,MAAM,YAAY,GAAG,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE;QACpD,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,CAAC,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC,CAAC;YAClD,OAAO;QACT,CAAC;QACD,MAAM,OAAO,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC,CAAC;QACxE,MAAM,CAAC,gBAAgB,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,OAAO,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,OAAO,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,YAAY,CAAC,CAAC,CAAC;IACzD,CAAC;YAAS,CAAC;QACT,OAAO,EAAE,EAAE,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -1,38 +1,29 @@
1
1
  /**
2
- * Probe generator — creates scoped micro-tests from KB content.
2
+ * LLM-powered probe generator — creates synthetic bug-fix tasks.
3
3
  *
4
- * Probes are derived from:
5
- * 1. KB structure (hubs, entry points, naming conventions, integrations)
6
- * 2. KB diff (changed areas and their 1-hop dependents)
7
- * 3. Harvested prompts (real user interactions that reveal problem areas)
4
+ * Each probe is a realistic coding-assistant request with expected
5
+ * behaviours. Probes are generated via LLM at temperature 0.9 for
6
+ * diversity, with deduplication across iterations and a fallback
7
+ * pool of hardcoded templates.
8
8
  *
9
- * Each probe is a self-contained scenario that can be "run" by sending it
10
- * to an LLM with AGENTS.md as context and evaluating the response.
9
+ * Ported from sweagent_bench oracle/probes.py.
11
10
  */
12
11
  import type { Probe, ProbeGeneratorOptions } from './types';
13
- /** Extract a section from KB text by heading prefix. */
14
- declare function extractSection(kb: string, heading: string): string;
15
- /** Parse "High-Risk Architectural Hubs" table rows: | path | in | out | */
16
- declare function parseHubs(architecture: string): Array<{
17
- file: string;
18
- inDegree: number;
19
- outDegree: number;
20
- }>;
21
- /** Parse "Entry Points" from architecture section. */
22
- declare function parseEntryPoints(architecture: string): Array<{
23
- file: string;
24
- kind: string;
25
- }>;
26
- /** Parse naming conventions from the map section. */
27
- declare function parseConventions(mapSection: string): string[];
28
- /** Parse file paths mentioned in a diff string. */
29
- declare function parseDiffFiles(diff: string): string[];
12
+ export declare function normalizeProbeText(text: string): string;
13
+ export declare function isDuplicate(task: string, existing: string[]): boolean;
14
+ interface RawProbe {
15
+ task: string;
16
+ expected_behaviors: string[];
17
+ rationale?: string;
18
+ }
19
+ export declare function parseProbeResponse(raw: string): RawProbe[];
30
20
  /**
31
- * Generate probes scoped to the KB content and optional diff.
21
+ * Generate probes via LLM with fallback to hardcoded templates.
32
22
  *
33
- * When a diff is provided, probes focus on changed areas.
34
- * Otherwise, probes cover the full KB (hubs, entry points, conventions).
23
+ * Uses temperature 0.9 for diverse probe generation. Deduplicates
24
+ * against prior tasks across iterations. Falls back to a pool of
25
+ * hardcoded templates when LLM generation fails.
35
26
  */
36
- export declare function generateProbes(options: ProbeGeneratorOptions): Probe[];
37
- export { extractSection, parseHubs, parseEntryPoints, parseConventions, parseDiffFiles };
27
+ export declare function generateProbes(options: ProbeGeneratorOptions): Promise<Probe[]>;
28
+ export {};
38
29
  //# sourceMappingURL=probes.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"probes.d.ts","sourceRoot":"","sources":["../src/probes.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EACV,KAAK,EAEL,qBAAqB,EAEtB,MAAM,SAAS,CAAC;AAIjB,wDAAwD;AACxD,iBAAS,cAAc,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAO3D;AAED,2EAA2E;AAC3E,iBAAS,SAAS,CAAC,YAAY,EAAE,MAAM,GAAG,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,CAAC,CAarG;AAED,sDAAsD;AACtD,iBAAS,gBAAgB,CAAC,YAAY,EAAE,MAAM,GAAG,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,CAarF;AAED,qDAAqD;AACrD,iBAAS,gBAAgB,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,EAAE,CAOtD;AAaD,mDAAmD;AACnD,iBAAS,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAa9C;AAsGD;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,OAAO,EAAE,qBAAqB,GAAG,KAAK,EAAE,CA4CtE;AAGD,OAAO,EAAE,cAAc,EAAE,SAAS,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,cAAc,EAAE,CAAC"}
1
+ {"version":3,"file":"probes.d.ts","sourceRoot":"","sources":["../src/probes.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,OAAO,KAAK,EACV,KAAK,EACL,qBAAqB,EACtB,MAAM,SAAS,CAAC;AA8FjB,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMvD;AAED,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAMrE;AAID,UAAU,QAAQ;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,kBAAkB,EAAE,MAAM,EAAE,CAAC;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,QAAQ,EAAE,CAuB1D;AAID;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,qBAAqB,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC,CA0FrF"}