@zhixuan92/multi-model-agent-core 4.0.5 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/intake/brief-compiler-slots/delegate.d.ts +18 -0
- package/dist/intake/brief-compiler-slots/delegate.d.ts.map +1 -1
- package/dist/intake/brief-compiler-slots/delegate.js +36 -3
- package/dist/intake/brief-compiler-slots/delegate.js.map +1 -1
- package/dist/lifecycle/task-executor.d.ts.map +1 -1
- package/dist/lifecycle/task-executor.js +66 -77
- package/dist/lifecycle/task-executor.js.map +1 -1
- package/dist/reporting/report-parser-slots/investigate-report.d.ts.map +1 -1
- package/dist/reporting/report-parser-slots/investigate-report.js +41 -2
- package/dist/reporting/report-parser-slots/investigate-report.js.map +1 -1
- package/dist/review/templates/annotator-audit.d.ts.map +1 -1
- package/dist/review/templates/annotator-audit.js +5 -3
- package/dist/review/templates/annotator-audit.js.map +1 -1
- package/dist/review/templates/annotator-debug.d.ts.map +1 -1
- package/dist/review/templates/annotator-debug.js +11 -7
- package/dist/review/templates/annotator-debug.js.map +1 -1
- package/dist/review/templates/annotator-investigate.d.ts.map +1 -1
- package/dist/review/templates/annotator-investigate.js +6 -3
- package/dist/review/templates/annotator-investigate.js.map +1 -1
- package/dist/review/templates/annotator-review.d.ts.map +1 -1
- package/dist/review/templates/annotator-review.js +13 -6
- package/dist/review/templates/annotator-review.js.map +1 -1
- package/dist/review/templates/annotator-verify.d.ts.map +1 -1
- package/dist/review/templates/annotator-verify.js +11 -5
- package/dist/review/templates/annotator-verify.js.map +1 -1
- package/dist/tools/audit/implementer-criteria.d.ts +55 -4
- package/dist/tools/audit/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/audit/implementer-criteria.js +113 -11
- package/dist/tools/audit/implementer-criteria.js.map +1 -1
- package/dist/tools/audit/schema.d.ts +3 -10
- package/dist/tools/audit/schema.d.ts.map +1 -1
- package/dist/tools/audit/schema.js +3 -4
- package/dist/tools/audit/schema.js.map +1 -1
- package/dist/tools/audit/tool-config.d.ts.map +1 -1
- package/dist/tools/audit/tool-config.js +42 -21
- package/dist/tools/audit/tool-config.js.map +1 -1
- package/dist/tools/debug/implementer-criteria.d.ts +44 -4
- package/dist/tools/debug/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/debug/implementer-criteria.js +105 -13
- package/dist/tools/debug/implementer-criteria.js.map +1 -1
- package/dist/tools/debug/tool-config.d.ts.map +1 -1
- package/dist/tools/debug/tool-config.js +22 -4
- package/dist/tools/debug/tool-config.js.map +1 -1
- package/dist/tools/delegate/implementer-criteria.d.ts +62 -0
- package/dist/tools/delegate/implementer-criteria.d.ts.map +1 -0
- package/dist/tools/delegate/implementer-criteria.js +114 -0
- package/dist/tools/delegate/implementer-criteria.js.map +1 -0
- package/dist/tools/execute-plan/implementer-criteria.d.ts +52 -0
- package/dist/tools/execute-plan/implementer-criteria.d.ts.map +1 -0
- package/dist/tools/execute-plan/implementer-criteria.js +104 -0
- package/dist/tools/execute-plan/implementer-criteria.js.map +1 -0
- package/dist/tools/execute-plan/tool-config.d.ts.map +1 -1
- package/dist/tools/execute-plan/tool-config.js +17 -3
- package/dist/tools/execute-plan/tool-config.js.map +1 -1
- package/dist/tools/investigate/implementer-criteria.d.ts +48 -5
- package/dist/tools/investigate/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/investigate/implementer-criteria.js +103 -13
- package/dist/tools/investigate/implementer-criteria.js.map +1 -1
- package/dist/tools/investigate/tool-config.d.ts.map +1 -1
- package/dist/tools/investigate/tool-config.js +15 -8
- package/dist/tools/investigate/tool-config.js.map +1 -1
- package/dist/tools/review/implementer-criteria.d.ts +47 -1
- package/dist/tools/review/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/review/implementer-criteria.js +110 -9
- package/dist/tools/review/implementer-criteria.js.map +1 -1
- package/dist/tools/review/tool-config.d.ts.map +1 -1
- package/dist/tools/review/tool-config.js +39 -7
- package/dist/tools/review/tool-config.js.map +1 -1
- package/dist/tools/verify/implementer-criteria.d.ts +46 -0
- package/dist/tools/verify/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/verify/implementer-criteria.js +103 -8
- package/dist/tools/verify/implementer-criteria.js.map +1 -1
- package/dist/tools/verify/tool-config.d.ts.map +1 -1
- package/dist/tools/verify/tool-config.js +18 -2
- package/dist/tools/verify/tool-config.js.map +1 -1
- package/package.json +1 -1
|
@@ -5,6 +5,7 @@ import { executePlanHeadlineTemplate } from '../../reporting/headline-templates/
|
|
|
5
5
|
import { executePlanReportSchema } from '../../reporting/report-parser-slots/execute-plan-report.js';
|
|
6
6
|
import { DEFAULT_TASK_TIMEOUT_MS } from '../../config/schema.js';
|
|
7
7
|
import { REVIEWER_AWARENESS_AP } from '../../review/templates/finding-criteria.js';
|
|
8
|
+
import { EXECUTE_PLAN_PURPOSE_ORIENTATION, EXECUTE_PLAN_SCOPE_RULE, EXECUTE_PLAN_FAILURE_MODES, PLAN_FIDELITY_REMINDER, } from './implementer-criteria.js';
|
|
8
9
|
export const executePlanInputSchema = z.object({
|
|
9
10
|
filePaths: z.array(z.string()).length(1, { message: "execute_plan requires exactly one plan filePath" }),
|
|
10
11
|
taskDescriptors: z.array(z.string()).min(1),
|
|
@@ -29,9 +30,22 @@ export function registerExecutePlan(registry) {
|
|
|
29
30
|
/**
|
|
30
31
|
* Build a compact worker prompt for one plan task. Extracted from the legacy
|
|
31
32
|
* executor — just the section matched by the slot, not the full plan file.
|
|
33
|
+
*
|
|
34
|
+
* The prompt is structured top-down: orientation (why this exists) →
|
|
35
|
+
* task descriptor → matched plan section → file paths → fidelity rules
|
|
36
|
+
* (RESTORED in 4.1.0; the older `compileExecutePlan` had them, the
|
|
37
|
+
* slot-style refactor that became the canonical path dropped them) →
|
|
38
|
+
* failure-mode taxonomy → reviewer awareness. Without the orientation
|
|
39
|
+
* + fidelity blocks, workers default to "implement the goal" and treat
|
|
40
|
+
* the plan as a starting suggestion rather than the contract.
|
|
32
41
|
*/
|
|
33
42
|
function buildExecutePlanPrompt(filePaths, task, taskSection) {
|
|
34
43
|
const parts = [
|
|
44
|
+
// Orientation goes FIRST — fidelity-first framing before the
|
|
45
|
+
// task descriptor, so the worker reads the section through the
|
|
46
|
+
// execution lens instead of the "improve it" lens.
|
|
47
|
+
EXECUTE_PLAN_PURPOSE_ORIENTATION,
|
|
48
|
+
'',
|
|
35
49
|
`Execute this task from the plan: "${task}"`,
|
|
36
50
|
'',
|
|
37
51
|
];
|
|
@@ -39,10 +53,10 @@ function buildExecutePlanPrompt(filePaths, task, taskSection) {
|
|
|
39
53
|
parts.push('Relevant plan section:', '', '---', taskSection.trim(), '---', '');
|
|
40
54
|
}
|
|
41
55
|
else {
|
|
42
|
-
parts.push('No unique plan section matched that task heading. The full plan file is at:', ...filePaths.map((p) => ` - ${p}`), 'Read the plan file(s) yourself to find the task.', '');
|
|
56
|
+
parts.push('No unique plan section matched that task heading. The full plan file is at:', ...filePaths.map((p) => ` - ${p}`), 'Read the plan file(s) yourself to find the task. If still no unique match, report that and stop — do not implement anything.', '');
|
|
43
57
|
}
|
|
44
|
-
parts.push('Plan files for reference (read on demand if you need adjacent context):', ...filePaths.map((p) => ` - ${p}`), '');
|
|
45
|
-
parts.push('Implement the task fully. Follow any acceptance criteria, file paths, and', 'constraints in the plan section above. If you cannot find or understand', 'the task, report that explicitly and do not implement anything.', '',
|
|
58
|
+
parts.push('Plan files for reference (read on demand if you need adjacent context — but do not enlarge scope into other tasks):', ...filePaths.map((p) => ` - ${p}`), '');
|
|
59
|
+
parts.push('Implement the task fully. Follow any acceptance criteria, file paths, and', 'constraints in the plan section above. If you cannot find or understand', 'the task, report that explicitly and do not implement anything.', '', EXECUTE_PLAN_SCOPE_RULE, '', EXECUTE_PLAN_FAILURE_MODES, '', PLAN_FIDELITY_REMINDER, '',
|
|
46
60
|
// Tool sweep #12: share spec + quality reviewer rubric so the
|
|
47
61
|
// worker self-aligns on what each reviewer will judge against.
|
|
48
62
|
REVIEWER_AWARENESS_AP);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/execute-plan/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EACL,YAAY,EACZ,iBAAiB,EACjB,YAAY,GACb,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EAAE,wBAAwB,EAA6B,MAAM,mDAAmD,CAAC;AACxH,OAAO,EAAE,2BAA2B,EAAE,MAAM,oDAAoD,CAAC;AACjG,OAAO,EAAE,uBAAuB,EAAE,MAAM,4DAA4D,CAAC;AACrG,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,4CAA4C,CAAC;
|
|
1
|
+
{"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/execute-plan/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EACL,YAAY,EACZ,iBAAiB,EACjB,YAAY,GACb,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EAAE,wBAAwB,EAA6B,MAAM,mDAAmD,CAAC;AACxH,OAAO,EAAE,2BAA2B,EAAE,MAAM,oDAAoD,CAAC;AACjG,OAAO,EAAE,uBAAuB,EAAE,MAAM,4DAA4D,CAAC;AACrG,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,4CAA4C,CAAC;AACnF,OAAO,EACL,gCAAgC,EAChC,uBAAuB,EACvB,0BAA0B,EAC1B,sBAAsB,GACvB,MAAM,2BAA2B,CAAC;AAEnC,MAAM,CAAC,MAAM,sBAAsB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC7C,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,OAAO,EAAE,iDAAiD,EAAE,CAAC;IACxG,eAAe,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3C,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC1B,mBAAmB,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE;IAC3G,eAAe,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;IAC/C,aAAa,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;CAC9C,CAAC,CAAC,MAAM,EAAE,CAAC;AAIZ,MAAM,UAAU,mBAAmB,CAAC,QAA6B;IAC/D,QAAQ,CAAC,QAAQ,CAAC;QAChB,SAAS,EAAE,cAAc;QACzB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,sBAAsB;QAC9B,YAAY,EAAE,oBAAoB;QAClC,gBAAgB,EAAE,UAAU;QAC5B,oBAAoB,EAAE,KAAK;QAC3B,iBAAiB,EAAE,eAAe;KACnC,CAAC,CAAC;AACL,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAS,sBAAsB,CAC7B,SAAmB,EACnB,IAAY,EACZ,WAA+B;IAE/B,MAAM,KAAK,GAAa;QACtB,6DAA6D;QAC7D,+DAA+D;QAC/D,mDAAmD;QACnD,gCAAgC;QAChC,EAAE;QACF,qCAAqC,IAAI,GAAG;QAC5C,EAAE;KACH,CAAC;IACF,IAAI,WAAW,EAAE,CAAC;QAChB,KAAK,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,EAAE,KAAK,EAAE,WAAW,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;IACjF,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CACR,6EAA6E,EAC7E,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,EACnC,8HAA8H,EAC9H,EAAE,CACH,CAAC;IACJ,CAAC;IACD,KAAK,CAAC,IAAI,CACR,qHAAqH,EACrH,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,EACnC,EAAE,CACH,CAAC;IACF,KAAK,CAAC,IAAI,CACR,2EAA2E,EAC3E,yEAAyE,EACzE,iEAAiE,EACjE,EAAE,EACF,uBAAuB,EACvB,EAAE,EACF,0BAA0B,EAC1B,EAAE,EACF,sBAAsB,EACtB,EAAE;IACF,8DAA8D;IAC9D,+DAA+D;IAC/D,qBAAqB,CACtB,CAAC;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAA2D;IAChF,IAAI,EAAE,cAAc;IACpB,QAAQ,EAAE,oBAAoB;IAC9B,SAAS,EAAE,UAAU;IACrB,SAAS,EAAE,wBAAwB;IACnC,aAAa,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;QAC9B,MAAM,EAAE,sBAAsB,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,cAAc,EAAE,KAAK,CAAC,WAAW,CAAC;QACxF,SAAS,EAAE,UAAU;QACrB,YAAY,EAAE,KAAK,CAAC,YAAY;QAChC,IAAI,EAAE,gOAAgO;QACtO,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;QAC3C,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;QACpE,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;QACjD,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;QAC/D,GAAG,EAAE,KAAK,CAAC,GAAG;QACd,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,eAAe,EAAE,KAAK,CAAC,eAAe;QACtC,UAAU,EAAE,IAAI;QAChB,aAAa,EAAE,KAAK,CAAC,aAAa;QAClC,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACjE,CAAC;IACF,YAAY,EAAE,uBAAuB;IACrC,gBAAgB,EAAE,2BAA2B;IAC7C,eAAe,EAAE;QACf,IAAI,EAAE,YAAY;QAClB,SAAS,EAAE,iBAAiB;QAC5B,IAAI,EAAE,YAAY;KACnB;CACF,CAAC"}
|
|
@@ -1,17 +1,60 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Investigate-specific implementer criteria.
|
|
3
3
|
*
|
|
4
|
+
* INVESTIGATE'S PURPOSE — read this before adding categories.
|
|
5
|
+
* mma-investigate answers a question about the codebase. The caller is
|
|
6
|
+
* about to ACT on your answer — write code, edit a file, choose between
|
|
7
|
+
* approaches. The success criterion is:
|
|
8
|
+
*
|
|
9
|
+
* "If the caller acts on this answer literally — opens the cited
|
|
10
|
+
* files, follows the cited chain, takes the synthesis at face value
|
|
11
|
+
* — will they end up with correct code?"
|
|
12
|
+
*
|
|
13
|
+
* That criterion is what makes a finding load-bearing. A wrong file
|
|
14
|
+
* path, a stale quote, a hand-waved synthesis, an overstated confidence
|
|
15
|
+
* — all become bugs the caller writes. The investigate-equivalent of
|
|
16
|
+
* "fix is unimplementable" is "the answer points at a file that does
|
|
17
|
+
* not contain what you said it contained."
|
|
18
|
+
*
|
|
4
19
|
* Investigate answers a question about the codebase. Findings can be
|
|
5
20
|
* code-level citations, project-level synthesis, or NEGATIVE results
|
|
6
|
-
* ("searched X, not found").
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* "where does Y live?".
|
|
21
|
+
* ("searched X, not found"). Negative findings are legitimate answers
|
|
22
|
+
* to "is X still used?" or "where does Y live?" and must not be
|
|
23
|
+
* suppressed.
|
|
10
24
|
*
|
|
11
25
|
* Note: investigate does NOT use SEVERITY_LADDER — its findings are
|
|
12
|
-
* citations and synthesis, not severity-rated issues.
|
|
26
|
+
* citations and synthesis, not severity-rated issues. Confidence is the
|
|
27
|
+
* calibration dial, not severity.
|
|
28
|
+
*/
|
|
29
|
+
/**
|
|
30
|
+
* The orientation block. Goes at the TOP of every investigate prompt.
|
|
31
|
+
*
|
|
32
|
+
* Without an explicit purpose statement, workers default to "give a
|
|
33
|
+
* plausible-sounding answer" — which produces hallucinated citations
|
|
34
|
+
* and overstated confidence. With this orientation, every claim is
|
|
35
|
+
* ground-truthed against the file system.
|
|
13
36
|
*/
|
|
37
|
+
export declare const INVESTIGATE_PURPOSE_ORIENTATION: string;
|
|
14
38
|
export declare const EVIDENCE_RULE_INVESTIGATE: string;
|
|
15
39
|
export declare const SCOPE_RULE_INVESTIGATE: string;
|
|
40
|
+
/**
|
|
41
|
+
* The failure-mode taxonomy for investigations.
|
|
42
|
+
*
|
|
43
|
+
* Without this block, workers tend to give plausible-sounding answers
|
|
44
|
+
* with shaky citations. The 8 categories below are the specific ways
|
|
45
|
+
* an investigation answer becomes a bug when the caller acts on it.
|
|
46
|
+
*/
|
|
47
|
+
export declare const INVESTIGATE_FAILURE_MODES: string;
|
|
48
|
+
/**
|
|
49
|
+
* Confidence-discipline reminder.
|
|
50
|
+
*
|
|
51
|
+
* The shared SEVERITY_LADDER does not apply to investigate (findings
|
|
52
|
+
* are citations, not severity-rated). Instead, confidence is the
|
|
53
|
+
* calibration dial. The common failure mode is over-confidence —
|
|
54
|
+
* stating "high confidence" because the worker sounds certain,
|
|
55
|
+
* not because the evidence is strong. This block tells the worker
|
|
56
|
+
* confidence reflects evidence strength only.
|
|
57
|
+
*/
|
|
58
|
+
export declare const CONFIDENCE_REMINDER_INVESTIGATE: string;
|
|
16
59
|
export declare const ANNOTATOR_AWARENESS_INVESTIGATE: string;
|
|
17
60
|
//# sourceMappingURL=implementer-criteria.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/investigate/implementer-criteria.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/investigate/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH;;;;;;;GAOG;AACH,eAAO,MAAM,+BAA+B,QAahC,CAAC;AAEb,eAAO,MAAM,yBAAyB,QAO1B,CAAC;AAEb,eAAO,MAAM,sBAAsB,QAKvB,CAAC;AAEb;;;;;;GAMG;AACH,eAAO,MAAM,yBAAyB,QAiB1B,CAAC;AAEb;;;;;;;;;GASG;AACH,eAAO,MAAM,+BAA+B,QAYhC,CAAC;AAEb,eAAO,MAAM,+BAA+B,QAShC,CAAC"}
|
|
@@ -1,33 +1,123 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Investigate-specific implementer criteria.
|
|
3
3
|
*
|
|
4
|
+
* INVESTIGATE'S PURPOSE — read this before adding categories.
|
|
5
|
+
* mma-investigate answers a question about the codebase. The caller is
|
|
6
|
+
* about to ACT on your answer — write code, edit a file, choose between
|
|
7
|
+
* approaches. The success criterion is:
|
|
8
|
+
*
|
|
9
|
+
* "If the caller acts on this answer literally — opens the cited
|
|
10
|
+
* files, follows the cited chain, takes the synthesis at face value
|
|
11
|
+
* — will they end up with correct code?"
|
|
12
|
+
*
|
|
13
|
+
* That criterion is what makes a finding load-bearing. A wrong file
|
|
14
|
+
* path, a stale quote, a hand-waved synthesis, an overstated confidence
|
|
15
|
+
* — all become bugs the caller writes. The investigate-equivalent of
|
|
16
|
+
* "fix is unimplementable" is "the answer points at a file that does
|
|
17
|
+
* not contain what you said it contained."
|
|
18
|
+
*
|
|
4
19
|
* Investigate answers a question about the codebase. Findings can be
|
|
5
20
|
* code-level citations, project-level synthesis, or NEGATIVE results
|
|
6
|
-
* ("searched X, not found").
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* "where does Y live?".
|
|
21
|
+
* ("searched X, not found"). Negative findings are legitimate answers
|
|
22
|
+
* to "is X still used?" or "where does Y live?" and must not be
|
|
23
|
+
* suppressed.
|
|
10
24
|
*
|
|
11
25
|
* Note: investigate does NOT use SEVERITY_LADDER — its findings are
|
|
12
|
-
* citations and synthesis, not severity-rated issues.
|
|
26
|
+
* citations and synthesis, not severity-rated issues. Confidence is the
|
|
27
|
+
* calibration dial, not severity.
|
|
28
|
+
*/
|
|
29
|
+
/**
|
|
30
|
+
* The orientation block. Goes at the TOP of every investigate prompt.
|
|
31
|
+
*
|
|
32
|
+
* Without an explicit purpose statement, workers default to "give a
|
|
33
|
+
* plausible-sounding answer" — which produces hallucinated citations
|
|
34
|
+
* and overstated confidence. With this orientation, every claim is
|
|
35
|
+
* ground-truthed against the file system.
|
|
13
36
|
*/
|
|
37
|
+
export const INVESTIGATE_PURPOSE_ORIENTATION = [
|
|
38
|
+
'Why this investigation exists:',
|
|
39
|
+
'mma-investigate is the answer-and-act loop. The caller will use your answer to make code edits — open the cited files, take the synthesis at face value, choose an approach based on your confidence rating. A wrong file path becomes a bug; a stale quote becomes a wrong edit; an overstated confidence becomes a misallocated effort.',
|
|
40
|
+
'',
|
|
41
|
+
'For your output to clear that bar, every load-bearing claim must answer:',
|
|
42
|
+
'- Where exactly is this — file:line for present things, or "searched <pattern> in <path>, not found" for absent things?',
|
|
43
|
+
'- Did I read the file just now, or am I reasoning from training data? (only the former counts as evidence)',
|
|
44
|
+
'- For synthesis claims (e.g. "X is used by Y via Z"), is each link in the chain backed by a file:line?',
|
|
45
|
+
'- Is my confidence calibrated to evidence strength, or to how certain I sound?',
|
|
46
|
+
'',
|
|
47
|
+
'A claim without a citation is a guess. A citation that does not match the file currently on disk is a hallucination. A "high confidence" verdict on a synthesis with one weak link is overstatement.',
|
|
48
|
+
'',
|
|
49
|
+
'The completion test: would a caller who reads only your investigation report and the named files end up with the same answer if they re-investigated themselves — or would they find the cited file does not say what you said it said?',
|
|
50
|
+
].join('\n');
|
|
14
51
|
export const EVIDENCE_RULE_INVESTIGATE = [
|
|
15
52
|
'Evidence grounding (REQUIRED for every citation):',
|
|
16
|
-
'- For present things: `file:line` (or `file:line-line` for spans) plus a quote or summary of what you found.',
|
|
53
|
+
'- For present things: `file:line` (or `file:line-line` for spans) plus a quote or summary of what you found. The cited line MUST contain the cited content as of your read — do NOT cite from training-data memory.',
|
|
17
54
|
'- For absent things: explicit `searched <pattern> in <path>, no matches` — negative findings are legitimate answers and should be emitted, not suppressed.',
|
|
18
|
-
'- For synthesis findings (e.g. "X uses Y indirectly via Z"): cite each link in the chain by `file:line`.',
|
|
55
|
+
'- For synthesis findings (e.g. "X uses Y indirectly via Z"): cite each link in the chain by `file:line`. A synthesis claim with even one un-cited link is a hand-wave.',
|
|
56
|
+
'- For project-level claims that no single file demonstrates (e.g. "the codebase has no shared error type"): write the negative ("searched the repo for `class.*Error` declarations: only X, Y, Z found, none shared") rather than asserting the absence without evidence.',
|
|
57
|
+
'- If you have not read a file, do NOT cite from it. Reasoning-from-training-data is the most common hallucination source — refuse it explicitly.',
|
|
19
58
|
].join('\n');
|
|
20
59
|
export const SCOPE_RULE_INVESTIGATE = [
|
|
21
60
|
'Scope:',
|
|
22
61
|
'- Wherever the question leads. The question may not name files; you choose where to look.',
|
|
23
|
-
'-
|
|
62
|
+
'- If the question is broad (e.g. "how does X work overall?"), break it into sub-questions and answer each with citations rather than producing one un-grounded narrative.',
|
|
63
|
+
'- Out of scope: drift into issues unrelated to the question; opportunistic code review of the code you are investigating (raise that separately, not as an investigation finding); fixes / suggestions / improvements (this is a read-only Q&A — propose nothing).',
|
|
64
|
+
].join('\n');
|
|
65
|
+
/**
|
|
66
|
+
* The failure-mode taxonomy for investigations.
|
|
67
|
+
*
|
|
68
|
+
* Without this block, workers tend to give plausible-sounding answers
|
|
69
|
+
* with shaky citations. The 8 categories below are the specific ways
|
|
70
|
+
* an investigation answer becomes a bug when the caller acts on it.
|
|
71
|
+
*/
|
|
72
|
+
export const INVESTIGATE_FAILURE_MODES = [
|
|
73
|
+
'Patterns to consciously check for. Apply on EVERY investigation:',
|
|
74
|
+
'',
|
|
75
|
+
'1. WRONG FILE — a close-named file in a different package/module is cited instead of the actual one (e.g. `src/foo/utils.ts` when the real answer is in `src/bar/utils.ts`). When a name is ambiguous, list all matches and identify which one the question is about.',
|
|
76
|
+
'2. STALE QUOTE — the cited content was at the cited line in your training data but the file has been refactored. Always re-read before quoting; do NOT quote from memory. If the file does not currently contain the quoted content, the citation is invalid.',
|
|
77
|
+
'3. HALLUCINATED CITATION — a `file:line` that does not exist on disk. Verify each citation by actually reading the file at the cited line range. Hallucinated citations are the most caller-actionable failure mode — the caller opens the file and finds nothing there.',
|
|
78
|
+
'4. CONFIDENCE OVERSTATEMENT — claiming "high confidence" when the chain has gaps, when there are multiple plausible answers, or when the citation is partial. Confidence reflects EVIDENCE strength, not how certain you sound.',
|
|
79
|
+
'5. CITATION GAP — a load-bearing claim made without a `file:line`. Synthesis findings without per-link citations are hand-waves. The fix: add the citation, OR downgrade the claim to "I infer X from Y, Z; verify by re-reading <file>".',
|
|
80
|
+
'6. QUESTION SHIFT — answered an adjacent question rather than the one asked. Re-read the question literally before writing the Summary. If the asked question is "where is X declared?" do not answer "where is X used?" without saying so.',
|
|
81
|
+
'7. SYNTHESIS WITHOUT GROUNDING — combined facts into a conclusion that no single citation supports. Either: (a) cite each link explicitly, or (b) mark the conclusion as inference and lower confidence.',
|
|
82
|
+
'8. ASSUMED-CURRENT-STATE — wrote answer from training-data assumption ("normally Foo is implemented this way") instead of the file currently on disk. The codebase may have diverged. Always read; never assume.',
|
|
83
|
+
'',
|
|
84
|
+
'Confidence calibration for investigations:',
|
|
85
|
+
'- high: every load-bearing claim has a file:line citation you read this session; the citation matches the question precisely; no plausible alternative answers were found in your search.',
|
|
86
|
+
'- medium: most claims are cited; one or two links rely on inference from cited facts; alternative answers exist but were ruled out with evidence.',
|
|
87
|
+
'- low: partial answer; significant gaps in the citation chain; the file system has answers you have not searched; or the question is broader than the time spent investigating.',
|
|
88
|
+
'- Use `(none)` for Citations and `low` for Confidence ONLY when the question is genuinely project-level and no code evidence applies. Most "I think it works this way" answers should be `low` confidence with a partial citation, not zero citations.',
|
|
89
|
+
].join('\n');
|
|
90
|
+
/**
|
|
91
|
+
* Confidence-discipline reminder.
|
|
92
|
+
*
|
|
93
|
+
* The shared SEVERITY_LADDER does not apply to investigate (findings
|
|
94
|
+
* are citations, not severity-rated). Instead, confidence is the
|
|
95
|
+
* calibration dial. The common failure mode is over-confidence —
|
|
96
|
+
* stating "high confidence" because the worker sounds certain,
|
|
97
|
+
* not because the evidence is strong. This block tells the worker
|
|
98
|
+
* confidence reflects evidence strength only.
|
|
99
|
+
*/
|
|
100
|
+
export const CONFIDENCE_REMINDER_INVESTIGATE = [
|
|
101
|
+
'Confidence-discipline reminder:',
|
|
102
|
+
'- Confidence reflects EVIDENCE STRENGTH (how completely the citation chain supports the answer), not ASSERTION STRENGTH (how certain you sound).',
|
|
103
|
+
'- For each load-bearing claim, ask: "if the caller followed this citation and re-read the file themselves, would they reach the same conclusion?" If yes for every claim → `high`. If yes for most but inference fills the gaps → `medium`. If significant gaps remain → `low`.',
|
|
104
|
+
'- Do NOT use confidence to communicate certainty about the question being answered. Use it to communicate certainty that your answer is CORRECT given your evidence.',
|
|
105
|
+
'- A short investigation that found a clean answer can legitimately be `high`. A long investigation that found a partial answer is `medium` or `low`, no matter how thorough it felt.',
|
|
106
|
+
'',
|
|
107
|
+
'Citation-chain walk (REQUIRED on every load-bearing claim):',
|
|
108
|
+
'- Before writing the Summary, list every claim that drives the answer. For each, ask: "do I have a file:line for this, and did I read the file in this session?"',
|
|
109
|
+
'- If the answer to either is no, the claim is inference. Either downgrade Confidence, or add the citation by reading the file now.',
|
|
110
|
+
'- Worked example. Question: "how does the audit prompt assemble the failure-mode taxonomy?" Naive answer: "The audit tool config imports DOC_AUDIT_FAILURE_MODES from implementer-criteria.ts and joins it into the prompt — confidence: high." Better answer: cite the import line (e.g. `tool-config.ts:14 — import { DOC_AUDIT_FAILURE_MODES, ... }`) AND the consumer line where it is joined into the prompt (e.g. `tool-config.ts:152 — DOC_AUDIT_FAILURE_MODES,` inside the FINDING_FORMAT_INSTRUCTIONS array). Two citations, both verified by reading the file → high confidence is now backed. The naive version asserts the same conclusion but with no actual file:line; if the file has been refactored, the answer is silently wrong.',
|
|
111
|
+
'- Most workers miss findings of this shape on first pass because the answer "feels right". The citation-chain walk forces the file-system check.',
|
|
24
112
|
].join('\n');
|
|
25
113
|
export const ANNOTATOR_AWARENESS_INVESTIGATE = [
|
|
26
114
|
'After your output, an annotator validates each finding against this investigate rubric:',
|
|
27
|
-
'- Does each
|
|
28
|
-
'- Are present-thing citations to real `file:line` from files
|
|
29
|
-
'- Are negative findings explicit ("searched X, not found") rather than silent omissions?',
|
|
30
|
-
'-
|
|
31
|
-
'
|
|
115
|
+
'- Does each citation answer some part of the question (not an adjacent question)?',
|
|
116
|
+
'- Are present-thing citations to real `file:line` from files actually read this session?',
|
|
117
|
+
'- Are negative findings explicit ("searched X in Y, not found") rather than silent omissions?',
|
|
118
|
+
'- For synthesis claims, is each link in the chain cited?',
|
|
119
|
+
'- Does the confidence reflect evidence strength (not assertion strength)?',
|
|
120
|
+
'- Is the answer to the asked question, not a shifted version of it?',
|
|
121
|
+
'Self-check before emitting. Findings that fail any check are downgraded or dropped — but negative findings ("searched, not found") and inference-with-citations ("I infer X from Y:42, Z:18") are FULLY VALID. Do NOT downgrade negative findings for lacking a code quote, and do NOT downgrade inference-with-citations as "speculation" if the cited links are real.',
|
|
32
122
|
].join('\n');
|
|
33
123
|
//# sourceMappingURL=implementer-criteria.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/investigate/implementer-criteria.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/investigate/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,+BAA+B,GAAG;IAC7C,gCAAgC;IAChC,2UAA2U;IAC3U,EAAE;IACF,0EAA0E;IAC1E,yHAAyH;IACzH,4GAA4G;IAC5G,wGAAwG;IACxG,gFAAgF;IAChF,EAAE;IACF,sMAAsM;IACtM,EAAE;IACF,yOAAyO;CAC1O,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,yBAAyB,GAAG;IACvC,mDAAmD;IACnD,qNAAqN;IACrN,4JAA4J;IAC5J,wKAAwK;IACxK,2QAA2Q;IAC3Q,kJAAkJ;CACnJ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,sBAAsB,GAAG;IACpC,QAAQ;IACR,2FAA2F;IAC3F,2KAA2K;IAC3K,oQAAoQ;CACrQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG;IACvC,kEAAkE;IAClE,EAAE;IACF,uQAAuQ;IACvQ,+PAA+P;IAC/P,0QAA0Q;IAC1Q,iOAAiO;IACjO,2OAA2O;IAC3O,6OAA6O;IAC7O,0MAA0M;IAC1M,kNAAkN;IAClN,EAAE;IACF,4CAA4C;IAC5C,2LAA2L;IAC3L,mJAAmJ;IACnJ,iLAAiL;IACjL,wPAAwP;CACzP,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,+BAA+B,GAAG;IAC7C,iCAAiC;IACjC,kJAAkJ;IAClJ,iRAAiR;IACjR,sKAAsK;IACtK,sLAAsL;IACtL,EAAE;IACF,6DAA6D;IAC7D,kKAAkK;IAClK,oIAAoI;IACpI,qtBAAqtB;IACrtB,kJAAkJ;CACnJ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,+BAA+B,GAAG;IAC7C,yFAAyF;IACzF,mFAAmF;IACnF,0FAA0F;IAC1F,+FAA+F;IAC/F,0DAA0D;IAC1D,2EAA2E;IAC3E,qEAAqE;IACrE,yWAAyW;CAC1W,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/investigate/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAElF,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AAGvE,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,2DAA2D,CAAC;
|
|
1
|
+
{"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/investigate/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAElF,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AAGvE,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,2DAA2D,CAAC;AAazG,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CAYvE;AAKD,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,wBAAyB,SAAQ,KAAK;IACrD,qBAAqB,EAAE,oBAAoB,EAAE,CAAC;IAC9C,sBAAsB,EAAE,MAAM,EAAE,CAAC;IACjC,0BAA0B,EAAE,MAAM,EAAE,CAAC;CACtC;AAED,MAAM,WAAW,gBAAgB;IAC/B,+DAA+D;IAC/D,QAAQ,EAAE,MAAM,CAAC;IACjB;;;;;;;;;;OAUG;IACH,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,GAAG,UAAU,CAAC;CAC7B;AAkED,eAAO,MAAM,UAAU,EAAE,UAAU,CAAC,wBAAwB,EAAE,gBAAgB,EAAE,uBAAuB,CA2DtG,CAAC"}
|
|
@@ -4,7 +4,7 @@ import { investigateReportSchema } from '../../reporting/report-parser-slots/inv
|
|
|
4
4
|
import { investigateHeadlineTemplate } from '../../reporting/headline-templates/investigate.js';
|
|
5
5
|
import { deriveInvestigateWorkerStatus } from '../../reporting/derive-investigate-status.js';
|
|
6
6
|
import { DEFAULT_TASK_TIMEOUT_MS } from '../../config/schema.js';
|
|
7
|
-
import { EVIDENCE_RULE_INVESTIGATE, SCOPE_RULE_INVESTIGATE, ANNOTATOR_AWARENESS_INVESTIGATE, } from './implementer-criteria.js';
|
|
7
|
+
import { INVESTIGATE_PURPOSE_ORIENTATION, EVIDENCE_RULE_INVESTIGATE, SCOPE_RULE_INVESTIGATE, ANNOTATOR_AWARENESS_INVESTIGATE, INVESTIGATE_FAILURE_MODES, CONFIDENCE_REMINDER_INVESTIGATE, } from './implementer-criteria.js';
|
|
8
8
|
export function registerInvestigate(registry) {
|
|
9
9
|
registry.register({
|
|
10
10
|
routeName: 'investigate',
|
|
@@ -20,6 +20,12 @@ export function registerInvestigate(registry) {
|
|
|
20
20
|
}
|
|
21
21
|
function compilePrompt(input) {
|
|
22
22
|
const promptParts = [];
|
|
23
|
+
// Orientation goes FIRST — the worker needs to know why this
|
|
24
|
+
// investigation exists (caller will act on this answer; wrong file
|
|
25
|
+
// path becomes a bug) before reading the format spec / taxonomy.
|
|
26
|
+
// Without it, workers default to plausible-sounding answers with
|
|
27
|
+
// shaky citations.
|
|
28
|
+
promptParts.push(INVESTIGATE_PURPOSE_ORIENTATION);
|
|
23
29
|
promptParts.push([
|
|
24
30
|
'Produce an investigation report in this EXACT structured format. The deterministic',
|
|
25
31
|
'parser extracts citations, confidence, and unresolved items by section — do NOT emit',
|
|
@@ -29,14 +35,15 @@ function compilePrompt(input) {
|
|
|
29
35
|
'One paragraph stating the answer to the question, in plain prose.',
|
|
30
36
|
'',
|
|
31
37
|
'## Citations',
|
|
32
|
-
'One bullet per evidence item,
|
|
33
|
-
'
|
|
34
|
-
'
|
|
35
|
-
'
|
|
36
|
-
'
|
|
38
|
+
'One bullet per evidence item. Each bullet must start with `-` and contain `<file>:<LINE> — <claim>` (em-dash, OR `--` is also accepted). The parser tolerates an optional pair of backticks wrapping the path:line portion (e.g. `` `src/foo.ts:42` — claim ``) but the canonical form is without backticks.',
|
|
39
|
+
'Examples (use either form):',
|
|
40
|
+
' - src/foo.ts:42 — claim about line 42',
|
|
41
|
+
' - src/foo.ts:42-58 — claim about a span',
|
|
42
|
+
'Use a LINE-LINE range when an evidence span covers multiple lines.',
|
|
43
|
+
'If the question is fully project-level (no code evidence applies), write `(none)` on its own line — but only when Confidence is `low`.',
|
|
37
44
|
'',
|
|
38
45
|
'## Confidence',
|
|
39
|
-
'One of
|
|
46
|
+
'One of high, medium, or low, optionally followed by ` — <one-line rationale>`. Do NOT wrap the level in backticks; emit it as plain text.',
|
|
40
47
|
'',
|
|
41
48
|
'## Unresolved',
|
|
42
49
|
'Optional bullets describing follow-up questions; write `(none)` if there are none.',
|
|
@@ -59,7 +66,7 @@ function compilePrompt(input) {
|
|
|
59
66
|
// but evidence-grounding + scope-discipline + annotator-awareness
|
|
60
67
|
// apply just as much. Workers that cite hallucinated lines or
|
|
61
68
|
// speculate about unread files now have the rubric inline.
|
|
62
|
-
promptParts.push(EVIDENCE_RULE_INVESTIGATE, SCOPE_RULE_INVESTIGATE, ANNOTATOR_AWARENESS_INVESTIGATE);
|
|
69
|
+
promptParts.push(INVESTIGATE_FAILURE_MODES, CONFIDENCE_REMINDER_INVESTIGATE, EVIDENCE_RULE_INVESTIGATE, SCOPE_RULE_INVESTIGATE, ANNOTATOR_AWARENESS_INVESTIGATE);
|
|
63
70
|
return promptParts.join('\n\n');
|
|
64
71
|
}
|
|
65
72
|
export const toolConfig = {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/investigate/tool-config.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,0BAA0B,EAAE,MAAM,iCAAiC,CAAC;AAG7E,OAAO,EAAE,uBAAuB,EAAE,MAAM,2DAA2D,CAAC;AAEpG,OAAO,EAAE,2BAA2B,EAAE,MAAM,mDAAmD,CAAC;AAChG,OAAO,EAAE,6BAA6B,EAAE,MAAM,8CAA8C,CAAC;AAC7F,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EACL,yBAAyB,EACzB,sBAAsB,EACtB,+BAA+B,GAChC,MAAM,2BAA2B,CAAC;AAEnC,MAAM,UAAU,mBAAmB,CAAC,QAA6B;IAC/D,QAAQ,CAAC,QAAQ,CAAC;QAChB,SAAS,EAAE,aAAa;QACxB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,cAAc;QACxB,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,WAAW;QACnB,YAAY,EAAE,WAAW;QACzB,gBAAgB,EAAE,SAAS;QAC3B,oBAAoB,EAAE,KAAK;QAC3B,iBAAiB,EAAE,eAAe;KACnC,CAAC,CAAC;AACL,CAAC;AAoCD,SAAS,aAAa,CAAC,KAA+B;IACpD,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,WAAW,CAAC,IAAI,CACd;QACE,oFAAoF;QACpF,sFAAsF;QACtF,sFAAsF;QACtF,EAAE;QACF,YAAY;QACZ,mEAAmE;QACnE,EAAE;QACF,cAAc;QACd,
|
|
1
|
+
{"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/investigate/tool-config.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,0BAA0B,EAAE,MAAM,iCAAiC,CAAC;AAG7E,OAAO,EAAE,uBAAuB,EAAE,MAAM,2DAA2D,CAAC;AAEpG,OAAO,EAAE,2BAA2B,EAAE,MAAM,mDAAmD,CAAC;AAChG,OAAO,EAAE,6BAA6B,EAAE,MAAM,8CAA8C,CAAC;AAC7F,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EACL,+BAA+B,EAC/B,yBAAyB,EACzB,sBAAsB,EACtB,+BAA+B,EAC/B,yBAAyB,EACzB,+BAA+B,GAChC,MAAM,2BAA2B,CAAC;AAEnC,MAAM,UAAU,mBAAmB,CAAC,QAA6B;IAC/D,QAAQ,CAAC,QAAQ,CAAC;QAChB,SAAS,EAAE,aAAa;QACxB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,cAAc;QACxB,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,WAAW;QACnB,YAAY,EAAE,WAAW;QACzB,gBAAgB,EAAE,SAAS;QAC3B,oBAAoB,EAAE,KAAK;QAC3B,iBAAiB,EAAE,eAAe;KACnC,CAAC,CAAC;AACL,CAAC;AAoCD,SAAS,aAAa,CAAC,KAA+B;IACpD,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,6DAA6D;IAC7D,mEAAmE;IACnE,iEAAiE;IACjE,iEAAiE;IACjE,mBAAmB;IACnB,WAAW,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;IAClD,WAAW,CAAC,IAAI,CACd;QACE,oFAAoF;QACpF,sFAAsF;QACtF,sFAAsF;QACtF,EAAE;QACF,YAAY;QACZ,mEAAmE;QACnE,EAAE;QACF,cAAc;QACd,8SAA8S;QAC9S,6BAA6B;QAC7B,yCAAyC;QACzC,2CAA2C;QAC3C,oEAAoE;QACpE,wIAAwI;QACxI,EAAE;QACF,eAAe;QACf,2IAA2I;QAC3I,EAAE;QACF,eAAe;QACf,oFAAoF;QACpF,iFAAiF;QACjF,kDAAkD;KACnD,CAAC,IAAI,CAAC,IAAI,CAAC,CACb,CAAC;IACF,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,qBAAqB,EAAE,CAAC;QAChD,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAClC,CAAC;IACD,IAAI,KAAK,CAAC,0BAA0B,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChD,WAAW,CAAC,IAAI,CACd,gEAAgE;YAChE,KAAK,CAAC,0BAA0B,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAC/D,CAAC;IACJ,CAAC;IACD,WAAW,CAAC,IAAI,CAAC,aAAa,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;IAChD,IAAI,KAAK,CAAC,qBAAqB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3C,WAAW,CAAC,IAAI,CACd,sMAAsM,CACvM,CAAC;IACJ,CAAC;IACD,6DAA6D;IAC7D,mEAAmE;IACnE,kEAAkE;IAClE,8DAA8D;IAC9D,2DAA2D;IAC3D,WAAW,CAAC,IAAI,CACd,yBAAyB,EACzB,+BAA+B,EAC/B,yBAAyB,EACzB,sBAAsB,EACtB,+BAA+B,CAChC,CAAC;IACF,OAAO,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAClC,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAoF;IACzG,IAAI,EAAE,aAAa;IACnB,QAAQ,EAAE,WAAW;IACrB,SAAS,EAAE,SAAS;IACpB,SAAS,EAAE,CAAC,KAA+B,EAAsB,EAAE;QACjE,MAAM,cAAc,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;QAC5C,OAAO,CAAC;gBACN,QAAQ,EAAE,KAAK,CAAC,QAAQ;gBACxB,cAAc;gBACd,SAAS,EAAE,KAAK,CAAC,sBAAsB;gBACvC,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,EAAE;gBAC5C,KAAK,EAAE,KAAK,CAAC,KAAK;aACnB,CAAC,CAAC;IACL,CAAC;IACD,aAAa,EAAE,CAAC,KAAuB,EAAE,GAAqB,EAAE,EAAE,CAAC,CAAC;QAClE,MAAM,EAAE,KAAK,CAAC,cAAc;QAC5B,SAAS,EAAE,SAAkB;QAC7B,YAAY,EAAE,cAAuB;QACrC,GAAG,EAAE,GAAG,CAAC,cAAc,EAAE,GAAG,IAAI,GAAG,CAAC,GAAG;QACvC,eAAe,EAAE,KAAK,CAAC,eAAe;QACtC,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,KAAK,EAAE,KAAK,CAAC,KAAK,IAAI,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;QAC1D,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;QACpE,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;QACjD,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;QAC/D,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,SAAS;KACtC,CAAC;IACF,YAAY,EAAE,uBAAuB;IACrC,gBAAgB,EAAE,2BAA2B;IAC7C,eAAe,EAAE;QACf,SAAS,EAAE,0BAA0B;KACtC;IACD,mBAAmB,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;QACtC,MAAM,MAAM,GAAG,QAAQ,CAAC,gBAAuD,CAAC;QAChF,MAAM,aAAa,GAAG,MAAM,EAAE,IAAI,KAAK,mBAAmB,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC;QACzF,MAAM,YAAY,GAAG,aAAa,EAAE,wBAAwB,IAAI,KAAK,CAAC;QAEtE,MAAM,OAAO,GAAG,6BAA6B,CAAC;YAC5C,YAAY;YACZ,WAAW,EAAE,MAAM,EAAE,IAAI,KAAK,mBAAmB;gBAC/C,CAAC,CAAC,EAAE,IAAI,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,CAAC,eAAe,EAAE;gBAC7G,CAAC,CAAC,EAAE,IAAI,EAAE,sBAAsB,EAAE;SACrC,CAAC,CAAC;QAEH,+DAA+D;QAC/D,IAAI,aAAa,IAAI,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,gBAAgB,EAAE,CAAC;gBACzC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAS,CAAC,gBAAgB,GAAG,EAAE,aAAa,EAAE,CAAC;YACpE,CAAC;iBAAM,CAAC;gBACL,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,gBAAwB,CAAC,aAAa,GAAG,aAAa,CAAC;YAC9E,CAAC;YACA,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAS,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;YACjE,IAAI,OAAO,CAAC,gBAAgB,KAAK,SAAS,EAAE,CAAC;gBAC1C,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAS,CAAC,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAC;YAC3E,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;CACF,CAAC"}
|
|
@@ -1,11 +1,57 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Review-specific implementer criteria.
|
|
3
3
|
*
|
|
4
|
+
* REVIEW'S PURPOSE — read this before adding categories.
|
|
5
|
+
* mma-review is the pre-merge gate. The maintainer accepting your verdict
|
|
6
|
+
* will NOT re-investigate before pressing merge — your output is treated
|
|
7
|
+
* as authoritative. The success criterion is:
|
|
8
|
+
*
|
|
9
|
+
* "After fixes, will the merge be safe, correct, and maintainable —
|
|
10
|
+
* such that a regression is unlikely to ship?"
|
|
11
|
+
*
|
|
12
|
+
* That criterion makes a finding load-bearing. A nit that doesn't change
|
|
13
|
+
* whether the merge is safe is low-priority no matter how clean the
|
|
14
|
+
* suggested rewrite reads. A cross-file ripple that breaks a caller
|
|
15
|
+
* not in the diff is the audit-equivalent of an unimplementable fix —
|
|
16
|
+
* load-bearing even though the named file looks fine in isolation.
|
|
17
|
+
*
|
|
4
18
|
* Review examines source code in named files against a focus area
|
|
5
19
|
* (security/correctness/performance/style). Findings should be
|
|
6
|
-
* line-quotable — that's the natural shape of code defects
|
|
20
|
+
* line-quotable — that's the natural shape of code defects — but
|
|
21
|
+
* cross-file findings backed by call-site references are also valid.
|
|
22
|
+
*/
|
|
23
|
+
/**
|
|
24
|
+
* The orientation block. Goes at the TOP of every review prompt.
|
|
25
|
+
*
|
|
26
|
+
* This is the load-bearing addition. Without an explicit purpose
|
|
27
|
+
* statement, workers default to "find issues in this file" — which
|
|
28
|
+
* produces line-by-line proofreading and misses the cross-file rippe,
|
|
29
|
+
* test-gap, and implicit-contract findings that actually block merges.
|
|
7
30
|
*/
|
|
31
|
+
export declare const REVIEW_PURPOSE_ORIENTATION: string;
|
|
8
32
|
export declare const EVIDENCE_RULE_REVIEW: string;
|
|
9
33
|
export declare const SCOPE_RULE_REVIEW: string;
|
|
34
|
+
/**
|
|
35
|
+
* The failure-mode taxonomy for code reviews.
|
|
36
|
+
*
|
|
37
|
+
* Without this block, workers default to line-by-line proofreading of
|
|
38
|
+
* the named file and miss cross-file ripples, test gaps, and
|
|
39
|
+
* implicit-contract regressions — the findings that actually block
|
|
40
|
+
* merges. The 10 categories below are what a careful maintainer would
|
|
41
|
+
* scan for before pressing merge.
|
|
42
|
+
*/
|
|
43
|
+
export declare const CODE_REVIEW_FAILURE_MODES: string;
|
|
44
|
+
/**
|
|
45
|
+
* Counter-balance to the SEVERITY_LADDER's anti-inflation hint.
|
|
46
|
+
*
|
|
47
|
+
* The shared severity ladder ends with "Workers commonly inflate —
|
|
48
|
+
* resist the urge." That bias is correct in the limit (no, the missing
|
|
49
|
+
* comma is not critical) but produces UNDER-finding when combined with
|
|
50
|
+
* a thin per-tool rubric. For code review specifically, the typical
|
|
51
|
+
* failure is missing the cross-file ripple or test gap because the
|
|
52
|
+
* worker only looked at the diff in the named file. This block tells
|
|
53
|
+
* the worker that under-finding is the more common review failure.
|
|
54
|
+
*/
|
|
55
|
+
export declare const THOROUGHNESS_REMINDER_REVIEW: string;
|
|
10
56
|
export declare const ANNOTATOR_AWARENESS_REVIEW: string;
|
|
11
57
|
//# sourceMappingURL=implementer-criteria.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/review/implementer-criteria.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/review/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH;;;;;;;GAOG;AACH,eAAO,MAAM,0BAA0B,QAmB3B,CAAC;AAEb,eAAO,MAAM,oBAAoB,QAQrB,CAAC;AAEb,eAAO,MAAM,iBAAiB,QAOlB,CAAC;AAEb;;;;;;;;GAQG;AACH,eAAO,MAAM,yBAAyB,QAmB1B,CAAC;AAEb;;;;;;;;;;GAUG;AACH,eAAO,MAAM,4BAA4B,QAW7B,CAAC;AAEb,eAAO,MAAM,0BAA0B,QAO3B,CAAC"}
|
|
@@ -1,27 +1,128 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Review-specific implementer criteria.
|
|
3
3
|
*
|
|
4
|
+
* REVIEW'S PURPOSE — read this before adding categories.
|
|
5
|
+
* mma-review is the pre-merge gate. The maintainer accepting your verdict
|
|
6
|
+
* will NOT re-investigate before pressing merge — your output is treated
|
|
7
|
+
* as authoritative. The success criterion is:
|
|
8
|
+
*
|
|
9
|
+
* "After fixes, will the merge be safe, correct, and maintainable —
|
|
10
|
+
* such that a regression is unlikely to ship?"
|
|
11
|
+
*
|
|
12
|
+
* That criterion makes a finding load-bearing. A nit that doesn't change
|
|
13
|
+
* whether the merge is safe is low-priority no matter how clean the
|
|
14
|
+
* suggested rewrite reads. A cross-file ripple that breaks a caller
|
|
15
|
+
* not in the diff is the audit-equivalent of an unimplementable fix —
|
|
16
|
+
* load-bearing even though the named file looks fine in isolation.
|
|
17
|
+
*
|
|
4
18
|
* Review examines source code in named files against a focus area
|
|
5
19
|
* (security/correctness/performance/style). Findings should be
|
|
6
|
-
* line-quotable — that's the natural shape of code defects
|
|
20
|
+
* line-quotable — that's the natural shape of code defects — but
|
|
21
|
+
* cross-file findings backed by call-site references are also valid.
|
|
22
|
+
*/
|
|
23
|
+
/**
|
|
24
|
+
* The orientation block. Goes at the TOP of every review prompt.
|
|
25
|
+
*
|
|
26
|
+
* This is the load-bearing addition. Without an explicit purpose
|
|
27
|
+
* statement, workers default to "find issues in this file" — which
|
|
28
|
+
* produces line-by-line proofreading and misses the cross-file rippe,
|
|
29
|
+
* test-gap, and implicit-contract findings that actually block merges.
|
|
7
30
|
*/
|
|
31
|
+
export const REVIEW_PURPOSE_ORIENTATION = [
|
|
32
|
+
'Why this review exists:',
|
|
33
|
+
'mma-review is the pre-merge gate. The maintainer accepting your verdict will NOT re-investigate before merging — your verdict is treated as authoritative. A miss here ships to production.',
|
|
34
|
+
'',
|
|
35
|
+
'Your job is to find anything that would make the merge unsafe, including issues that look fine in the named files in isolation:',
|
|
36
|
+
'- a changed function with no test (or with a test that does not exercise the change)',
|
|
37
|
+
'- a changed signature whose direct callers (visible in the named files or via grep on the symbol) were not updated',
|
|
38
|
+
'- a change that introduces a new edge case (null/empty/timeout/error path) the code does not handle',
|
|
39
|
+
'- a race or concurrency hazard the change exposes (shared state mutation, missing lock, await-after-check pattern)',
|
|
40
|
+
'- a resource leak the change introduces (unclosed handle, untracked promise, file descriptor not freed)',
|
|
41
|
+
'- a backward-compatibility break in a public API or wire schema',
|
|
42
|
+
'- a security regression (auth bypass, injection, untrusted input flowing to a sink, data exposure)',
|
|
43
|
+
'- a performance regression (N+1 query, unbounded loop, blocking I/O on a hot path, unnecessary deep clone)',
|
|
44
|
+
'- an implicit-contract assumption — the change relies on the caller doing X but the contract does not state X',
|
|
45
|
+
'- a pre-existing bug entangled with the change (NOT a finding against this diff — separate cleanly)',
|
|
46
|
+
'',
|
|
47
|
+
'A finding that points at any of these is high-value EVEN IF the prose of the change reads cleanly. Conversely, a stylistic nit that does not change merge safety is low-priority no matter how clean the suggested rewrite reads.',
|
|
48
|
+
'',
|
|
49
|
+
'The completion test: would a maintainer who reads only your review and the diff (not the surrounding code) understand which changes are required, why each is required, and where each lives — well enough to apply the fix and re-merge?',
|
|
50
|
+
].join('\n');
|
|
8
51
|
export const EVIDENCE_RULE_REVIEW = [
|
|
9
52
|
'Evidence grounding (REQUIRED for every finding):',
|
|
10
53
|
'- Cite `file:line` (or `file:line-line` for a span) where the issue lives.',
|
|
11
|
-
'- Quote the exact code excerpt or command output that demonstrates the issue.
|
|
12
|
-
'-
|
|
54
|
+
'- Quote the exact code excerpt or command output that demonstrates the issue. Do not paraphrase — quote.',
|
|
55
|
+
'- For CROSS-FILE findings (a change in named file A breaks a caller B), cite both: the line in A that triggers the break, AND the call site in B that breaks. If B is not in the named files but is reachable via grep on the changed symbol, name it explicitly. Cross-file findings backed by call-site references are FULLY VALID — do not drop them as out-of-scope.',
|
|
56
|
+
'- For TEST-GAP findings, name the test file you would expect to cover the change AND quote the diff line that has no test coverage. If no test file exists for the changed area, that itself is the finding.',
|
|
57
|
+
'- For IMPLICIT-CONTRACT findings, quote the line in the named file that depends on the assumption AND name the contract source (the public docstring, the type, the README) that does not state the assumption.',
|
|
58
|
+
'- If you cannot quote evidence in one of these forms, do NOT raise the finding. Note "investigation needed" in your summary instead.',
|
|
13
59
|
].join('\n');
|
|
14
60
|
export const SCOPE_RULE_REVIEW = [
|
|
15
61
|
'Scope:',
|
|
16
62
|
'- The named files. Behavior of direct callers/callees can be referenced when visible in those files.',
|
|
17
|
-
'-
|
|
63
|
+
'- Cross-file ripples ARE in scope when the changed symbol is searchable: if the named files change a public function, look for its call sites in the rest of the repo and flag any caller that would break. This is the highest-value cross-file work for a code review.',
|
|
64
|
+
'- Test gaps ARE in scope: if the named files change behavior and a test file is the natural sibling (e.g. `foo.ts` → `tests/foo.test.ts`), check whether the test exercises the change.',
|
|
65
|
+
'- Out of scope: speculation about untouched files unrelated to the diff; doc/spec issues (those belong in an audit, not a review); style nits when the focus area is security/correctness/performance.',
|
|
66
|
+
'- Pre-existing bugs (the diff did not introduce them) belong in their own backlog item, not in this review. Note them in a "Pre-existing — out of scope" section if you spot them, but DO NOT mix them into the merge-blocking findings.',
|
|
67
|
+
].join('\n');
|
|
68
|
+
/**
|
|
69
|
+
* The failure-mode taxonomy for code reviews.
|
|
70
|
+
*
|
|
71
|
+
* Without this block, workers default to line-by-line proofreading of
|
|
72
|
+
* the named file and miss cross-file ripples, test gaps, and
|
|
73
|
+
* implicit-contract regressions — the findings that actually block
|
|
74
|
+
* merges. The 10 categories below are what a careful maintainer would
|
|
75
|
+
* scan for before pressing merge.
|
|
76
|
+
*/
|
|
77
|
+
export const CODE_REVIEW_FAILURE_MODES = [
|
|
78
|
+
'Look for these kinds of issues — applicable to ALL code reviews regardless of focus. The focus area (security/correctness/performance/style) tells you which lens to weight, but every code review should sweep the full taxonomy:',
|
|
79
|
+
'',
|
|
80
|
+
'1. TEST GAP — the diff changes behavior, but no test exercises the change. Either: no test file exists, OR the test file exists but the changed branch is not covered. **Always check for the natural sibling test file when reviewing source-code changes.**',
|
|
81
|
+
'2. CROSS-FILE RIPPLE — a changed signature, return shape, public type, or wire schema is referenced from another file that was not updated. **If the named files change a public symbol, grep for the symbol and flag any unupdated caller.**',
|
|
82
|
+
'3. PRE-EXISTING-BUG-VS-NEW-REGRESSION — a defect exists in the named files but the diff did not introduce it. Do NOT blame the diff for prior bugs; note them in a separate "Pre-existing — out of scope" section. Conversely, if the diff DID introduce or worsen a defect, flag it as a regression.',
|
|
83
|
+
'4. MISSING EDGE CASE — the change adds a code path but does not handle null/undefined/empty/timeout/error/zero/negative inputs the path could see. Walk the change against each natural boundary value.',
|
|
84
|
+
'5. RACE / CONCURRENCY — the change introduces shared state mutation, removes a lock, splits a previously-atomic operation, or adds an await between a check and an action (TOCTOU). Flag these even when no test reproduces.',
|
|
85
|
+
'6. RESOURCE LEAK — the change opens a handle (file, socket, lock, transaction, AbortController) without a guaranteed close path; or introduces an untracked promise that may reject silently.',
|
|
86
|
+
'7. BACKWARD-COMPAT BREAK — the change modifies a public API, exported type, wire schema, environment variable, or CLI flag in a way that breaks existing callers. Flag and require a migration note.',
|
|
87
|
+
'8. SECURITY REGRESSION — the change introduces or worsens auth bypass, injection (SQL/command/prompt), untrusted input flowing to a sink (eval/exec/HTML/SQL), data exposure, or weakened sandboxing. Apply the security lens to every change, not just security-flagged ones.',
|
|
88
|
+
'9. PERFORMANCE REGRESSION — the change adds N+1 queries, unbounded loops, blocking I/O on a hot path, unnecessary deep clones, or shifts work from build/init time to request time. Apply the performance lens to every change, not just performance-flagged ones.',
|
|
89
|
+
'10. IMPLICIT-CONTRACT ASSUMPTION — the changed code relies on the caller (or environment) doing X but the contract (docstring, type, README) does not state X. The change works for in-repo callers but will silently break when the contract is read literally.',
|
|
90
|
+
'',
|
|
91
|
+
'Severity calibration for code reviews:',
|
|
92
|
+
'- critical: the merge would corrupt data, expose credentials, allow auth bypass, break a public API in production, or cause production outage. A reader who applied the fix incorrectly could ship the regression.',
|
|
93
|
+
'- high: the merge would introduce a real bug, security gap, or substantial regression that blocks release. Cross-file ripple where a caller is broken. Missing edge case in a code path that production traffic will hit.',
|
|
94
|
+
'- medium: a real issue worth fixing soon: test gap on a non-trivial change, race condition with low contention, performance regression on a non-hot path, missing edge case on an unlikely input.',
|
|
95
|
+
'- low: stylistic / naming / dead-code / minor-refactor opportunity. Does not change merge safety.',
|
|
96
|
+
].join('\n');
|
|
97
|
+
/**
|
|
98
|
+
* Counter-balance to the SEVERITY_LADDER's anti-inflation hint.
|
|
99
|
+
*
|
|
100
|
+
* The shared severity ladder ends with "Workers commonly inflate —
|
|
101
|
+
* resist the urge." That bias is correct in the limit (no, the missing
|
|
102
|
+
* comma is not critical) but produces UNDER-finding when combined with
|
|
103
|
+
* a thin per-tool rubric. For code review specifically, the typical
|
|
104
|
+
* failure is missing the cross-file ripple or test gap because the
|
|
105
|
+
* worker only looked at the diff in the named file. This block tells
|
|
106
|
+
* the worker that under-finding is the more common review failure.
|
|
107
|
+
*/
|
|
108
|
+
export const THOROUGHNESS_REMINDER_REVIEW = [
|
|
109
|
+
'Thoroughness expectation for code reviews:',
|
|
110
|
+
'- For non-trivial diffs (>30 changed lines OR a public symbol changed), zero or 1-2 findings is unusual and usually indicates the rubric was applied too narrowly. Sweep the full failure-mode taxonomy above before declaring "no findings."',
|
|
111
|
+
'- The SEVERITY_LADDER warns against inflation. That warning is calibrated — but the typical UNDER-finding in code review is missing the cross-file ripple or test gap because the worker only looked at the diff in the named file. Apply the failure-mode taxonomy thoroughly first; THEN calibrate severity downward where the impact is small.',
|
|
112
|
+
'- Do not invent findings to hit a quota. But if you have applied all 10 failure modes and still have only stylistic nits, double-check categories 1, 2, 4, and 10 (test gap, cross-file ripple, missing edge case, implicit-contract assumption) — these are the ones reviewers most often miss on first pass and the ones most likely to ship a regression.',
|
|
113
|
+
'',
|
|
114
|
+
'Cross-file pass (REQUIRED when the named files change a public symbol — exported function, exported type, route handler, or wire-schema field):',
|
|
115
|
+
'- Make ONE explicit pass: identify the changed public symbols, grep for their call sites in the rest of the repo, and check whether each call site is consistent with the new signature/return-shape/contract.',
|
|
116
|
+
'- For each (changed symbol, call site) pair, ask: does the call site as currently written still work after this change?',
|
|
117
|
+
'- Worked example. A diff in `src/foo.ts` renames `getUserById(id)` to `getUserById(id, opts)` and makes `opts` required. The grep finds 3 call sites in `src/handlers/auth.ts`, `src/handlers/billing.ts`, `tests/integration/users.test.ts`. None pass `opts`. Flag this as HIGH (or CRITICAL if `auth.ts` would no-op silently rather than error). The fact that `src/foo.ts` looks clean in isolation is exactly the kind of false-clean that ships regressions.',
|
|
118
|
+
'- Most reviewers miss findings of this shape on first pass because they only read the named files. The cross-file pass forces the grep.',
|
|
18
119
|
].join('\n');
|
|
19
120
|
export const ANNOTATOR_AWARENESS_REVIEW = [
|
|
20
121
|
'After your output, an annotator validates each finding against this code-review rubric:',
|
|
21
|
-
'- Is the finding within the requested focus area?',
|
|
22
|
-
'- Does the evidence quote real code from the named files?',
|
|
23
|
-
'- Is the severity calibrated to actual impact?',
|
|
24
|
-
'- Is the finding within
|
|
25
|
-
'Self-check before emitting. Findings that fail any check are downgraded or dropped.',
|
|
122
|
+
'- Is the finding within the requested focus area (or universally applicable: security, performance, correctness apply to every review)?',
|
|
123
|
+
'- Does the evidence quote real code from the named files OR cite a real call site reachable via grep on the changed symbol?',
|
|
124
|
+
'- Is the severity calibrated to actual merge-safety impact (would a reader who applied the fix incorrectly ship a regression)?',
|
|
125
|
+
'- Is the finding within scope (named files + cross-file ripples on changed symbols + sibling test files), or is it speculation about unrelated code?',
|
|
126
|
+
'Self-check before emitting. Findings that fail any check are downgraded or dropped — but cross-file ripple findings backed by call-site references and test-gap findings backed by sibling-test-file references are FULLY VALID, do NOT downgrade them as "speculation about untouched files."',
|
|
26
127
|
].join('\n');
|
|
27
128
|
//# sourceMappingURL=implementer-criteria.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/review/implementer-criteria.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/review/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,0BAA0B,GAAG;IACxC,yBAAyB;IACzB,6LAA6L;IAC7L,EAAE;IACF,iIAAiI;IACjI,sFAAsF;IACtF,oHAAoH;IACpH,qGAAqG;IACrG,oHAAoH;IACpH,yGAAyG;IACzG,iEAAiE;IACjE,oGAAoG;IACpG,4GAA4G;IAC5G,+GAA+G;IAC/G,qGAAqG;IACrG,EAAE;IACF,mOAAmO;IACnO,EAAE;IACF,2OAA2O;CAC5O,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,oBAAoB,GAAG;IAClC,kDAAkD;IAClD,4EAA4E;IAC5E,0GAA0G;IAC1G,0WAA0W;IAC1W,8MAA8M;IAC9M,iNAAiN;IACjN,sIAAsI;CACvI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,QAAQ;IACR,sGAAsG;IACtG,0QAA0Q;IAC1Q,yLAAyL;IACzL,wMAAwM;IACxM,0OAA0O;CAC3O,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG;IACvC,oOAAoO;IACpO,EAAE;IACF,+PAA+P;IAC/P,+OAA+O;IAC/O,uSAAuS;IACvS,yMAAyM;IACzM,8NAA8N;IAC9N,+LAA+L;IAC/L,sMAAsM;IACtM,gRAAgR;IAChR,oQAAoQ;IACpQ,kQAAkQ;IAClQ,EAAE;IACF,wCAAwC;IACxC,oNAAoN;IACpN,2NAA2N;IAC3N,mMAAmM;IACnM,mGAAmG;CACpG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,4BAA4B,GAAG;IAC1C,4CAA4C;IAC5C,+OAA+O;IAC/O,mVAAmV;IACnV,8VAA8V;IAC9V,EAAE;IACF,iJAAiJ;IACjJ,gNAAgN;IAChN,yHAAyH;IACzH,qcAAqc;IACrc,yIAAyI;CAC1I,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,0BAA0B,GAAG;IACxC,yFAAyF;IACzF,yIAAyI;IACzI,6HAA6H;IAC7H,gIAAgI;IAChI,sJAAsJ;IACtJ,gSAAgS;CACjS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/review/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAElF,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AAEvE,OAAO,EAAmB,KAAK,WAAW,EAAE,MAAM,6CAA6C,CAAC;
|
|
1
|
+
{"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/review/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAElF,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AAEvE,OAAO,EAAmB,KAAK,WAAW,EAAE,MAAM,6CAA6C,CAAC;AAchG,wBAAgB,cAAc,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CAYlE;AA2GD,eAAO,MAAM,UAAU,EAAE,UAAU,CAAC,KAAK,EAAE,WAAW,EAAE,OAAO,CAsC9D,CAAC"}
|