@zhixuan92/multi-model-agent-core 4.0.6 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bounded-execution/index.d.ts +2 -0
- package/dist/bounded-execution/index.d.ts.map +1 -1
- package/dist/bounded-execution/index.js +2 -0
- package/dist/bounded-execution/index.js.map +1 -1
- package/dist/bounded-execution/safety-max-turns.d.ts +17 -0
- package/dist/bounded-execution/safety-max-turns.d.ts.map +1 -0
- package/dist/bounded-execution/safety-max-turns.js +17 -0
- package/dist/bounded-execution/safety-max-turns.js.map +1 -0
- package/dist/bounded-execution/stall-watchdog.d.ts +16 -0
- package/dist/bounded-execution/stall-watchdog.d.ts.map +1 -0
- package/dist/bounded-execution/stall-watchdog.js +69 -0
- package/dist/bounded-execution/stall-watchdog.js.map +1 -0
- package/dist/intake/brief-compiler-slots/delegate.d.ts +18 -0
- package/dist/intake/brief-compiler-slots/delegate.d.ts.map +1 -1
- package/dist/intake/brief-compiler-slots/delegate.js +36 -3
- package/dist/intake/brief-compiler-slots/delegate.js.map +1 -1
- package/dist/lifecycle/handlers/execution-context-builder.d.ts.map +1 -1
- package/dist/lifecycle/handlers/execution-context-builder.js +2 -1
- package/dist/lifecycle/handlers/execution-context-builder.js.map +1 -1
- package/dist/lifecycle/handlers/quality-chain-handlers.d.ts.map +1 -1
- package/dist/lifecycle/handlers/quality-chain-handlers.js +10 -1
- package/dist/lifecycle/handlers/quality-chain-handlers.js.map +1 -1
- package/dist/lifecycle/parallel-criteria-dispatcher.d.ts +62 -0
- package/dist/lifecycle/parallel-criteria-dispatcher.d.ts.map +1 -0
- package/dist/lifecycle/parallel-criteria-dispatcher.js +328 -0
- package/dist/lifecycle/parallel-criteria-dispatcher.js.map +1 -0
- package/dist/lifecycle/parallel-criteria-routes.d.ts +16 -0
- package/dist/lifecycle/parallel-criteria-routes.d.ts.map +1 -0
- package/dist/lifecycle/parallel-criteria-routes.js +147 -0
- package/dist/lifecycle/parallel-criteria-routes.js.map +1 -0
- package/dist/lifecycle/task-runner.d.ts.map +1 -1
- package/dist/lifecycle/task-runner.js +268 -97
- package/dist/lifecycle/task-runner.js.map +1 -1
- package/dist/providers/anthropic-messages-adapter.d.ts.map +1 -1
- package/dist/providers/anthropic-messages-adapter.js +8 -1
- package/dist/providers/anthropic-messages-adapter.js.map +1 -1
- package/dist/providers/provider-factory.d.ts.map +1 -1
- package/dist/providers/provider-factory.js +2 -1
- package/dist/providers/provider-factory.js.map +1 -1
- package/dist/providers/runner-adapter.d.ts +8 -0
- package/dist/providers/runner-adapter.d.ts.map +1 -1
- package/dist/providers/runner-shell-types.d.ts +7 -0
- package/dist/providers/runner-shell-types.d.ts.map +1 -1
- package/dist/providers/runner-shell.d.ts +51 -1
- package/dist/providers/runner-shell.d.ts.map +1 -1
- package/dist/providers/runner-shell.js +130 -0
- package/dist/providers/runner-shell.js.map +1 -1
- package/dist/reporting/report-parser-slots/investigate-report.d.ts.map +1 -1
- package/dist/reporting/report-parser-slots/investigate-report.js +41 -2
- package/dist/reporting/report-parser-slots/investigate-report.js.map +1 -1
- package/dist/review/annotator-engine.d.ts +7 -1
- package/dist/review/annotator-engine.d.ts.map +1 -1
- package/dist/review/annotator-engine.js +15 -2
- package/dist/review/annotator-engine.js.map +1 -1
- package/dist/review/annotator-prompt-builder.d.ts.map +1 -1
- package/dist/review/annotator-prompt-builder.js +10 -2
- package/dist/review/annotator-prompt-builder.js.map +1 -1
- package/dist/review/reviewer-engine.d.ts.map +1 -1
- package/dist/review/reviewer-engine.js +4 -3
- package/dist/review/reviewer-engine.js.map +1 -1
- package/dist/review/templates/annotator-audit.d.ts.map +1 -1
- package/dist/review/templates/annotator-audit.js +5 -3
- package/dist/review/templates/annotator-audit.js.map +1 -1
- package/dist/review/templates/annotator-debug.d.ts.map +1 -1
- package/dist/review/templates/annotator-debug.js +11 -7
- package/dist/review/templates/annotator-debug.js.map +1 -1
- package/dist/review/templates/annotator-investigate.d.ts.map +1 -1
- package/dist/review/templates/annotator-investigate.js +6 -3
- package/dist/review/templates/annotator-investigate.js.map +1 -1
- package/dist/review/templates/annotator-review.d.ts.map +1 -1
- package/dist/review/templates/annotator-review.js +13 -6
- package/dist/review/templates/annotator-review.js.map +1 -1
- package/dist/review/templates/annotator-shared.d.ts +8 -1
- package/dist/review/templates/annotator-shared.d.ts.map +1 -1
- package/dist/review/templates/annotator-shared.js.map +1 -1
- package/dist/review/templates/annotator-verify.d.ts.map +1 -1
- package/dist/review/templates/annotator-verify.js +11 -5
- package/dist/review/templates/annotator-verify.js.map +1 -1
- package/dist/tools/audit/implementer-criteria.d.ts +60 -4
- package/dist/tools/audit/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/audit/implementer-criteria.js +118 -11
- package/dist/tools/audit/implementer-criteria.js.map +1 -1
- package/dist/tools/audit/schema.d.ts +3 -10
- package/dist/tools/audit/schema.d.ts.map +1 -1
- package/dist/tools/audit/schema.js +3 -4
- package/dist/tools/audit/schema.js.map +1 -1
- package/dist/tools/audit/tool-config.d.ts.map +1 -1
- package/dist/tools/audit/tool-config.js +70 -36
- package/dist/tools/audit/tool-config.js.map +1 -1
- package/dist/tools/criteria-types.d.ts +27 -0
- package/dist/tools/criteria-types.d.ts.map +1 -0
- package/dist/tools/criteria-types.js +25 -0
- package/dist/tools/criteria-types.js.map +1 -0
- package/dist/tools/debug/implementer-criteria.d.ts +47 -4
- package/dist/tools/debug/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/debug/implementer-criteria.js +104 -13
- package/dist/tools/debug/implementer-criteria.js.map +1 -1
- package/dist/tools/debug/tool-config.d.ts.map +1 -1
- package/dist/tools/debug/tool-config.js +31 -4
- package/dist/tools/debug/tool-config.js.map +1 -1
- package/dist/tools/delegate/implementer-criteria.d.ts +62 -0
- package/dist/tools/delegate/implementer-criteria.d.ts.map +1 -0
- package/dist/tools/delegate/implementer-criteria.js +114 -0
- package/dist/tools/delegate/implementer-criteria.js.map +1 -0
- package/dist/tools/execute-plan/implementer-criteria.d.ts +52 -0
- package/dist/tools/execute-plan/implementer-criteria.d.ts.map +1 -0
- package/dist/tools/execute-plan/implementer-criteria.js +104 -0
- package/dist/tools/execute-plan/implementer-criteria.js.map +1 -0
- package/dist/tools/execute-plan/tool-config.d.ts.map +1 -1
- package/dist/tools/execute-plan/tool-config.js +17 -3
- package/dist/tools/execute-plan/tool-config.js.map +1 -1
- package/dist/tools/investigate/implementer-criteria.d.ts +51 -5
- package/dist/tools/investigate/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/investigate/implementer-criteria.js +109 -13
- package/dist/tools/investigate/implementer-criteria.js.map +1 -1
- package/dist/tools/investigate/tool-config.d.ts.map +1 -1
- package/dist/tools/investigate/tool-config.js +20 -8
- package/dist/tools/investigate/tool-config.js.map +1 -1
- package/dist/tools/parallel-criteria-prompt.d.ts +106 -0
- package/dist/tools/parallel-criteria-prompt.d.ts.map +1 -0
- package/dist/tools/parallel-criteria-prompt.js +86 -0
- package/dist/tools/parallel-criteria-prompt.js.map +1 -0
- package/dist/tools/review/implementer-criteria.d.ts +50 -1
- package/dist/tools/review/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/review/implementer-criteria.js +113 -9
- package/dist/tools/review/implementer-criteria.js.map +1 -1
- package/dist/tools/review/tool-config.d.ts.map +1 -1
- package/dist/tools/review/tool-config.js +50 -7
- package/dist/tools/review/tool-config.js.map +1 -1
- package/dist/tools/verify/implementer-criteria.d.ts +49 -0
- package/dist/tools/verify/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/verify/implementer-criteria.js +104 -8
- package/dist/tools/verify/implementer-criteria.js.map +1 -1
- package/dist/tools/verify/tool-config.d.ts.map +1 -1
- package/dist/tools/verify/tool-config.js +22 -2
- package/dist/tools/verify/tool-config.js.map +1 -1
- package/dist/types/run-result.d.ts +18 -0
- package/dist/types/run-result.d.ts.map +1 -1
- package/dist/types/task-spec.d.ts +11 -0
- package/dist/types/task-spec.d.ts.map +1 -1
- package/package.json +1 -1
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
export const annotatorDebugTemplate = {
|
|
2
2
|
role: 'debugging hypothesis',
|
|
3
|
-
onBriefCheck: 'Each finding should be a hypothesis,
|
|
3
|
+
onBriefCheck: 'Each finding should be a hypothesis with a complete trace from symptom to cause, not a point observation at the symptom. AND: the cited cause must come UPSTREAM of the cited symptom in the call/data flow — a finding that names a symptom location as the cause is misdirection (SYMPTOM-NOT-CAUSE failure mode). Findings consistent with the debug failure-mode taxonomy (symptom-not-cause, scapegoat file, incomplete trace, untested hypothesis, parallel causes, pre-existing-vs-new entanglement, wrong fix scope, missing reproduction, confidence overstatement) and backed by file:line citations at each step are valid even when the chain has marked gaps — partial-evidence hypotheses with explicit "the gap is here, verify by X" notes are FULLY VALID, do NOT downgrade them as "speculation".',
|
|
4
4
|
evidenceRule: [
|
|
5
|
-
'- Debug findings are hypotheses with
|
|
6
|
-
'-
|
|
7
|
-
'-
|
|
8
|
-
'-
|
|
5
|
+
'- Debug findings are hypotheses with REASONING CHAINS, not point observations.',
|
|
6
|
+
'- Each finding must have at least three citations: SYMPTOM (where the failure surfaces) → INTERMEDIATE STATE (the wrong value, the unexpected branch) → CAUSE (the file:line that, if changed, would prevent the failure).',
|
|
7
|
+
'- Evidence forms accepted: reproducer commands, captured logs / stack traces, observed values, code-path traces with file:line per step.',
|
|
8
|
+
'- A finding with NO falsifier (no way to verify the proposed fix worked) is a guess, not a finding.',
|
|
9
|
+
'- Cross-file tracing (symptom in one file, cause in another reachable via call/data flow) is REQUIRED and FULLY VALID — not "speculation about untouched files".',
|
|
10
|
+
'- Severity reflects evidence strength AND impact: confirmed root cause that ships a wrong fix = critical; confirmed root cause with full chain = high; plausible candidate with most of the chain = medium; partial trace / multiple plausible explanations = low.',
|
|
9
11
|
].join('\n'),
|
|
10
12
|
scopeRule: [
|
|
11
|
-
'- Cross-file tracing is in scope and
|
|
12
|
-
'-
|
|
13
|
+
'- Cross-file tracing is in scope and REQUIRED to follow the failure path. Cross-file findings are not out-of-scope just because the named files do not include the cause file.',
|
|
14
|
+
'- Reproduction discovery is in scope: if no reproduction was provided, the worker should infer one and state it explicitly.',
|
|
15
|
+
'- Pre-existing-vs-new separation is in scope: multiple bugs in one failure should be separated, with the answered one as the primary finding and others noted separately.',
|
|
16
|
+
'- Out of scope: applied fixes (the worker should propose, not apply); unrelated code-review remarks; broadening into general code review of files not on the failure path.',
|
|
13
17
|
].join('\n'),
|
|
14
18
|
};
|
|
15
19
|
//# sourceMappingURL=annotator-debug.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"annotator-debug.js","sourceRoot":"","sources":["../../../src/review/templates/annotator-debug.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,sBAAsB,GAAsB;IACvD,IAAI,EAAE,sBAAsB;IAC5B,YAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"annotator-debug.js","sourceRoot":"","sources":["../../../src/review/templates/annotator-debug.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,sBAAsB,GAAsB;IACvD,IAAI,EAAE,sBAAsB;IAC5B,YAAY,EAAE,qxBAAqxB;IACnyB,YAAY,EAAE;QACZ,gFAAgF;QAChF,4NAA4N;QAC5N,0IAA0I;QAC1I,qGAAqG;QACrG,kKAAkK;QAClK,oQAAoQ;KACrQ,CAAC,IAAI,CAAC,IAAI,CAAC;IACZ,SAAS,EAAE;QACT,gLAAgL;QAChL,6HAA6H;QAC7H,2KAA2K;QAC3K,4KAA4K;KAC7K,CAAC,IAAI,CAAC,IAAI,CAAC;CACb,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"annotator-investigate.d.ts","sourceRoot":"","sources":["../../../src/review/templates/annotator-investigate.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAE/D,eAAO,MAAM,4BAA4B,EAAE,
|
|
1
|
+
{"version":3,"file":"annotator-investigate.d.ts","sourceRoot":"","sources":["../../../src/review/templates/annotator-investigate.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAE/D,eAAO,MAAM,4BAA4B,EAAE,iBAe1C,CAAC"}
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
export const annotatorInvestigateTemplate = {
|
|
2
2
|
role: 'codebase investigation',
|
|
3
|
-
onBriefCheck: 'Each finding should be relevant to the question.',
|
|
3
|
+
onBriefCheck: 'Each finding should be relevant to the question (not an adjacent question — QUESTION SHIFT failure mode). AND: each load-bearing claim must have a file:line citation OR an explicit "searched X in Y, not found" negative. Findings consistent with the investigate failure-mode taxonomy (wrong file, stale quote, hallucinated citation, confidence overstatement, citation gap, question shift, synthesis without grounding, assumed-current-state) should be filed as the appropriate verdict — do NOT downgrade negative findings for lacking a code quote, and do NOT downgrade inference-with-citations as "speculation" if the cited links are real.',
|
|
4
4
|
evidenceRule: [
|
|
5
|
-
'- Present-thing citations: real `file:line` from files actually read, with a quote or summary.',
|
|
5
|
+
'- Present-thing citations: real `file:line` from files actually read THIS SESSION, with a quote or summary. Citations from training-data memory are hallucinations — flag any cited line that does not currently contain the cited content.',
|
|
6
6
|
'- Absent-thing citations: explicit "searched <pattern> in <path>, no matches" — negative findings are legitimate answers and must NOT be downgraded for lacking a code quote.',
|
|
7
|
-
'- Synthesis findings: cite each link in the reasoning chain by file:line.',
|
|
7
|
+
'- Synthesis findings: cite each link in the reasoning chain by file:line. A synthesis with even one un-cited link is a hand-wave; downgrade confidence or drop the un-cited link.',
|
|
8
|
+
'- Inference-with-citations ("I infer X from Y:42, Z:18") is FULLY VALID and should not be downgraded as "speculation" when the cited links are real. The distinction: inference-with-citations names what is inferred and what is cited; speculation makes a claim without naming the gap.',
|
|
8
9
|
].join('\n'),
|
|
9
10
|
scopeRule: [
|
|
10
11
|
'- Wherever the question leads is in scope; the question may not name files.',
|
|
12
|
+
'- Negative answers ("X is not used", "Y does not exist") ARE in scope when backed by an explicit search; they are not "unable to find" excuses.',
|
|
11
13
|
'- Drift into unrelated code-review remarks is out of scope.',
|
|
14
|
+
'- Fix proposals / suggestions / improvements are out of scope (this is a read-only Q&A).',
|
|
12
15
|
].join('\n'),
|
|
13
16
|
};
|
|
14
17
|
//# sourceMappingURL=annotator-investigate.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"annotator-investigate.js","sourceRoot":"","sources":["../../../src/review/templates/annotator-investigate.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,4BAA4B,GAAsB;IAC7D,IAAI,EAAE,wBAAwB;IAC9B,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"annotator-investigate.js","sourceRoot":"","sources":["../../../src/review/templates/annotator-investigate.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,4BAA4B,GAAsB;IAC7D,IAAI,EAAE,wBAAwB;IAC9B,YAAY,EAAE,+nBAA+nB;IAC7oB,YAAY,EAAE;QACZ,6OAA6O;QAC7O,+KAA+K;QAC/K,mLAAmL;QACnL,4RAA4R;KAC7R,CAAC,IAAI,CAAC,IAAI,CAAC;IACZ,SAAS,EAAE;QACT,6EAA6E;QAC7E,iJAAiJ;QACjJ,6DAA6D;QAC7D,0FAA0F;KAC3F,CAAC,IAAI,CAAC,IAAI,CAAC;CACb,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"annotator-review.d.ts","sourceRoot":"","sources":["../../../src/review/templates/annotator-review.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAE/D,eAAO,MAAM,uBAAuB,EAAE,
|
|
1
|
+
{"version":3,"file":"annotator-review.d.ts","sourceRoot":"","sources":["../../../src/review/templates/annotator-review.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAE/D,eAAO,MAAM,uBAAuB,EAAE,iBAoBrC,CAAC"}
|
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
export const annotatorReviewTemplate = {
|
|
2
2
|
role: 'code review',
|
|
3
|
-
onBriefCheck: 'For each finding, ask: is this within the requested focus area
|
|
3
|
+
onBriefCheck: 'For each finding, ask: is this within the requested focus area (or universally applicable: security, correctness, performance apply to every code change)? AND: is this finding consistent with the code-review failure-mode taxonomy (test gap, cross-file ripple, pre-existing-bug-vs-new-regression, missing edge case, race, resource leak, backward-compat break, security regression, performance regression, implicit-contract assumption)? Findings that match the taxonomy and are backed by call-site references or sibling-test-file references are valid even when the cited code is not in the named files — do NOT downgrade them as "speculation about untouched files".',
|
|
4
4
|
evidenceRule: [
|
|
5
|
-
'- Code-review findings
|
|
6
|
-
'-
|
|
7
|
-
'-
|
|
5
|
+
'- Code-review findings come in three valid shapes:',
|
|
6
|
+
' 1. In-file quote: a verbatim code excerpt from the named files at `file:line`.',
|
|
7
|
+
' 2. Cross-file ripple: a quote in the named file at `fileA:lineA` PLUS a call-site reference at `fileB:lineB` reachable via grep on the changed symbol. Both lines must be cited.',
|
|
8
|
+
' 3. Test-gap reference: a quote of the changed line in the named file PLUS the natural-sibling test file path (e.g. `tests/foo.test.ts` for `src/foo.ts`). If no test file exists, that itself is the finding.',
|
|
9
|
+
'- Implicit-contract findings are valid when they cite the changed line AND name the contract source (docstring, type, README) that does not state the assumption.',
|
|
10
|
+
'- Findings without one of these forms are speculation; downgrade or drop.',
|
|
11
|
+
'- Reasoning-based findings backed by call-site references (e.g. "this signature change breaks src/handlers/auth.ts:42") are FULLY VALID and the highest-value kind of code-review finding. Do NOT downgrade them as "speculation about untouched files."',
|
|
8
12
|
].join('\n'),
|
|
9
13
|
scopeRule: [
|
|
10
|
-
'-
|
|
11
|
-
'-
|
|
14
|
+
'- Named files are in scope. Behavior of direct callers/callees may be referenced when visible in the named files.',
|
|
15
|
+
'- Cross-file ripples on changed public symbols ARE in scope when backed by a grep-able symbol reference. Do not penalize as out-of-scope.',
|
|
16
|
+
'- Test-gap findings citing a sibling test file ARE in scope. Do not penalize as out-of-scope.',
|
|
17
|
+
'- Speculation about unrelated untouched files is out of scope.',
|
|
12
18
|
'- Doc/spec issues belong in an audit, not a review — flag as off-brief.',
|
|
19
|
+
'- Pre-existing bugs (the diff did not introduce them) belong in a separate "Pre-existing — out of scope" section, not in the merge-blocking findings.',
|
|
13
20
|
].join('\n'),
|
|
14
21
|
};
|
|
15
22
|
//# sourceMappingURL=annotator-review.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"annotator-review.js","sourceRoot":"","sources":["../../../src/review/templates/annotator-review.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,uBAAuB,GAAsB;IACxD,IAAI,EAAE,aAAa;IACnB,YAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"annotator-review.js","sourceRoot":"","sources":["../../../src/review/templates/annotator-review.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,uBAAuB,GAAsB;IACxD,IAAI,EAAE,aAAa;IACnB,YAAY,EAAE,ypBAAypB;IACvqB,YAAY,EAAE;QACZ,oDAAoD;QACpD,kFAAkF;QAClF,oLAAoL;QACpL,iNAAiN;QACjN,mKAAmK;QACnK,2EAA2E;QAC3E,0PAA0P;KAC3P,CAAC,IAAI,CAAC,IAAI,CAAC;IACZ,SAAS,EAAE;QACT,mHAAmH;QACnH,2IAA2I;QAC3I,+FAA+F;QAC/F,gEAAgE;QAChE,yEAAyE;QACzE,uJAAuJ;KACxJ,CAAC,IAAI,CAAC,IAAI,CAAC;CACb,CAAC"}
|
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
export interface AnnotatorPromptContext {
|
|
2
|
-
|
|
2
|
+
/** N parallel sub-worker narratives, one per criterion the dispatcher
|
|
3
|
+
* fanned out. The empty-result narrative ("No findings for this
|
|
4
|
+
* criterion.") is filtered out by the engine before this context is
|
|
5
|
+
* built — entries here are non-empty narratives that need merging. */
|
|
6
|
+
workerOutputs: Array<{
|
|
7
|
+
criterion: string;
|
|
8
|
+
narrative: string;
|
|
9
|
+
}>;
|
|
3
10
|
brief: string;
|
|
4
11
|
}
|
|
5
12
|
export interface AnnotatorTemplate {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"annotator-shared.d.ts","sourceRoot":"","sources":["../../../src/review/templates/annotator-shared.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,sBAAsB;IACrC,
|
|
1
|
+
{"version":3,"file":"annotator-shared.d.ts","sourceRoot":"","sources":["../../../src/review/templates/annotator-shared.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,sBAAsB;IACrC;;;2EAGuE;IACvE,aAAa,EAAE,KAAK,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC/D,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;IACrB;yDACqD;IACrD,YAAY,EAAE,MAAM,CAAC;IACrB;uCACmC;IACnC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,iBAAiB,GAAG,MAAM,CA+CxE"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"annotator-shared.js","sourceRoot":"","sources":["../../../src/review/templates/annotator-shared.ts"],"names":[],"mappings":"AAAA,uEAAuE;AACvE,iDAAiD;
|
|
1
|
+
{"version":3,"file":"annotator-shared.js","sourceRoot":"","sources":["../../../src/review/templates/annotator-shared.ts"],"names":[],"mappings":"AAAA,uEAAuE;AACvE,iDAAiD;AAsBjD,MAAM,UAAU,oBAAoB,CAAC,QAA2B;IAC9D,OAAO,MAAM,CAAC,GAAG,CAAA;;;;;;;CAOlB,GAAG,WAAW,GAAG;;;;;;;;;;;CAWjB,GAAG,KAAK,GAAG;;;GAGT,GAAG,MAAM,GAAG;GACZ,GAAG,YAAY,GAAG;;GAElB,GAAG,SAAS,GAAG;GACf,GAAG,YAAY,GAAG;;GAElB,GAAG,cAAc,GAAG;GACpB,GAAG,uBAAuB,GAAG;;GAE7B,GAAG,YAAY,GAAG;;;;;;;CAOpB,GAAG,QAAQ,CAAC,YAAY,GAAG;;;;CAI3B,GAAG,QAAQ,CAAC,SAAS,GAAG;;wCAEe,GAAG,MAAM,GAAG;mDACD,GAAG,SAAS,GAAG;CACjE,CAAC,IAAI,EAAE,CAAC;AACT,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"annotator-verify.d.ts","sourceRoot":"","sources":["../../../src/review/templates/annotator-verify.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAE/D,eAAO,MAAM,uBAAuB,EAAE,
|
|
1
|
+
{"version":3,"file":"annotator-verify.d.ts","sourceRoot":"","sources":["../../../src/review/templates/annotator-verify.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAE/D,eAAO,MAAM,uBAAuB,EAAE,iBAmBrC,CAAC"}
|
|
@@ -1,15 +1,21 @@
|
|
|
1
1
|
export const annotatorVerifyTemplate = {
|
|
2
2
|
role: 'verification report',
|
|
3
|
-
onBriefCheck: 'Each finding should map to
|
|
3
|
+
onBriefCheck: 'Each finding should map 1:1 to a checklist item with evidence in one of three valid shapes (EXECUTION command+output, FILE-LEVEL file:line+quote, or NEGATIVE "cannot verify"). AND: a PASS that rests on a prose claim from the work product alone — without execution output or file:line citation — is a rubber stamp; downgrade to FAIL with NEGATIVE evidence. Findings consistent with the verify failure-mode taxonomy (claim-without-evidence, stale evidence, implicit-criterion gap, partial coverage, conflated criteria, wrong-artifact evidence, assumed-PASS-on-untested) should be filed as the appropriate verdict — do NOT silently exclude implicit sub-criteria from PASS verdicts.',
|
|
4
4
|
evidenceRule: [
|
|
5
5
|
'- Each Finding must map 1:1 to a checklist item.',
|
|
6
|
-
'- Evidence is
|
|
7
|
-
'
|
|
8
|
-
'-
|
|
6
|
+
'- Evidence is one of three valid shapes:',
|
|
7
|
+
' 1. EXECUTION: a command + its observed output (test name + pass/fail line, build output, lint result), with the relevant output line quoted.',
|
|
8
|
+
' 2. FILE-LEVEL: `file:line` citation showing the implementation that satisfies (or fails) the criterion, with the relevant code excerpt quoted.',
|
|
9
|
+
' 3. NEGATIVE: an explicit "cannot verify from this artifact" plus what would be needed to verify (a test run, a different file, a runtime check).',
|
|
10
|
+
'- A claimed PASS without one of the three shapes is a rubber stamp; downgrade to FAIL with NEGATIVE evidence.',
|
|
11
|
+
'- A "the work product says it is done" claim is NOT valid evidence — only execution output or file:line citations count for PASS.',
|
|
12
|
+
'- Severity binding: PASS = low; FAIL = medium or high based on impact; FAIL on a release-blocking criterion = critical.',
|
|
13
|
+
'- FAIL with NEGATIVE evidence ("cannot verify") is FULLY VALID and the correct verdict when the artifact is insufficient. Do NOT downgrade NEGATIVE-evidence FAILs to "cannot determine" or assumed-PASS.',
|
|
9
14
|
].join('\n'),
|
|
10
15
|
scopeRule: [
|
|
11
16
|
'- Only checklist items are in scope. Findings not tied to a checklist item are off-brief.',
|
|
12
|
-
'- All checklist items should be covered (one Finding per item, in order).',
|
|
17
|
+
'- All checklist items should be covered (one Finding per item, in order, no skips).',
|
|
18
|
+
'- IMPLICIT criteria embedded in a checklist item (e.g. "fix the bug" implies "regression test added") ARE in scope. A PASS verdict that silently excludes an implicit sub-criterion is a partial-coverage failure.',
|
|
13
19
|
].join('\n'),
|
|
14
20
|
};
|
|
15
21
|
//# sourceMappingURL=annotator-verify.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"annotator-verify.js","sourceRoot":"","sources":["../../../src/review/templates/annotator-verify.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,uBAAuB,GAAsB;IACxD,IAAI,EAAE,qBAAqB;IAC3B,YAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"annotator-verify.js","sourceRoot":"","sources":["../../../src/review/templates/annotator-verify.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,uBAAuB,GAAsB;IACxD,IAAI,EAAE,qBAAqB;IAC3B,YAAY,EAAE,wqBAAwqB;IACtrB,YAAY,EAAE;QACZ,kDAAkD;QAClD,0CAA0C;QAC1C,gJAAgJ;QAChJ,kJAAkJ;QAClJ,oJAAoJ;QACpJ,+GAA+G;QAC/G,mIAAmI;QACnI,yHAAyH;QACzH,2MAA2M;KAC5M,CAAC,IAAI,CAAC,IAAI,CAAC;IACZ,SAAS,EAAE;QACT,2FAA2F;QAC3F,qFAAqF;QACrF,oNAAoN;KACrN,CAAC,IAAI,CAAC,IAAI,CAAC;CACb,CAAC"}
|
|
@@ -1,17 +1,73 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Audit-specific implementer criteria.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* AUDIT'S PURPOSE — read this before adding categories.
|
|
5
|
+
* Most audit targets are spec / plan / design / recommendation files that
|
|
6
|
+
* will subsequently be EXECUTED BY A LOW-JUDGMENT WORKER (a sub-agent that
|
|
7
|
+
* follows the spec literally, with little ability to disambiguate or
|
|
8
|
+
* choose between alternatives). The audit's success criterion is:
|
|
9
|
+
*
|
|
10
|
+
* "After audit + fixes, can a literal-following worker execute this
|
|
11
|
+
* artifact without failing, picking wrong, or getting stuck?"
|
|
12
|
+
*
|
|
13
|
+
* That criterion is what makes a finding load-bearing. Stylistic nits
|
|
14
|
+
* don't block execution; ambiguity, contradictions, missing verification,
|
|
15
|
+
* unspecified branches, out-of-order steps, and overloaded terms do.
|
|
16
|
+
*
|
|
17
|
+
* EVIDENCE & SCOPE for prose artifacts.
|
|
18
|
+
* Audit examines a prose artifact (spec, design doc, plan, recommendation
|
|
19
|
+
* doc, API contract, config, brief). The "thing being examined" is text —
|
|
20
|
+
* not source code — so evidence and scope rules differ from review/debug:
|
|
7
21
|
*
|
|
8
22
|
* - Evidence is a doc quote, OR a precise reference to a section/item
|
|
9
23
|
* that *should* address the issue but doesn't (absence-finding), OR
|
|
10
|
-
* a doc-claim + contradicting source (wrong-claim finding)
|
|
24
|
+
* a doc-claim + contradicting source (wrong-claim finding), OR
|
|
25
|
+
* two sections of the doc that contradict each other (internal-
|
|
26
|
+
* coherence finding).
|
|
11
27
|
* - Scope is the document and what it directly references; cross-section
|
|
12
28
|
* reasoning IS the value of an audit.
|
|
29
|
+
*
|
|
30
|
+
* The failure-mode rubric below tells the worker WHAT KINDS of issues to
|
|
31
|
+
* look for in a prose artifact. Without it, workers calibrated on code
|
|
32
|
+
* audits collapse to surface-level proofreading on documents.
|
|
33
|
+
*/
|
|
34
|
+
/**
|
|
35
|
+
* The orientation block. Goes at the TOP of every audit prompt.
|
|
36
|
+
*
|
|
37
|
+
* This is the load-bearing addition. Without an explicit purpose statement,
|
|
38
|
+
* workers default to "find issues in this doc" — which produces stylistic
|
|
39
|
+
* proofreading. With this orientation, they look for issues that would
|
|
40
|
+
* BLOCK EXECUTION by a literal-following worker, which is what the caller
|
|
41
|
+
* actually needs.
|
|
13
42
|
*/
|
|
43
|
+
export declare const AUDIT_PURPOSE_ORIENTATION: string;
|
|
14
44
|
export declare const EVIDENCE_RULE_AUDIT: string;
|
|
15
45
|
export declare const SCOPE_RULE_AUDIT: string;
|
|
46
|
+
/**
|
|
47
|
+
* The failure-mode rubric for prose-document audits.
|
|
48
|
+
*
|
|
49
|
+
* This is the load-bearing addition. Without an explicit taxonomy, workers
|
|
50
|
+
* calibrated on source-code rubrics (off-by-one, type mismatches, dead code)
|
|
51
|
+
* have nothing to look for in a spec/plan/recommendation doc and emit only
|
|
52
|
+
* surface nits. The 11 categories below cover what actually goes wrong in
|
|
53
|
+
* non-trivial prose artifacts and are independent of the audit-type label.
|
|
54
|
+
*/
|
|
55
|
+
export declare const DOC_AUDIT_FAILURE_MODES: string;
|
|
56
|
+
/**
|
|
57
|
+
* Counter-balance to the SEVERITY_LADDER's anti-inflation hint.
|
|
58
|
+
*
|
|
59
|
+
* The shared severity ladder ends with "Workers commonly inflate — resist
|
|
60
|
+
* the urge." That bias is correct for code reviews, where over-flagging
|
|
61
|
+
* stylistic preferences is the common failure. For prose-document audits
|
|
62
|
+
* the opposite is true: workers UNDER-find because they have nothing to
|
|
63
|
+
* pattern-match against in their training. This block tells the worker
|
|
64
|
+
* the doc-audit failure mode is silence, not noise.
|
|
65
|
+
*/
|
|
66
|
+
export declare const THOROUGHNESS_REMINDER_AUDIT: string;
|
|
16
67
|
export declare const ANNOTATOR_AWARENESS_AUDIT: string;
|
|
68
|
+
import { type CriterionEntry } from '../criteria-types.js';
|
|
69
|
+
/** Structured per-criterion array for parallel-criteria fan-out. Derived
|
|
70
|
+
* from DOC_AUDIT_FAILURE_MODES so the prose in the cached prefix and the
|
|
71
|
+
* per-sub-worker assignment stay in lockstep. */
|
|
72
|
+
export declare const AUDIT_CRITERIA: readonly CriterionEntry[];
|
|
17
73
|
//# sourceMappingURL=implementer-criteria.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/implementer-criteria.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH;;;;;;;;GAQG;AACH,eAAO,MAAM,yBAAyB,QAmB1B,CAAC;AAEb,eAAO,MAAM,mBAAmB,QAOpB,CAAC;AAEb,eAAO,MAAM,gBAAgB,QAMjB,CAAC;AAEb;;;;;;;;GAQG;AACH,eAAO,MAAM,uBAAuB,QAoBxB,CAAC;AAEb;;;;;;;;;GASG;AACH,eAAO,MAAM,2BAA2B,QAW5B,CAAC;AAEb,eAAO,MAAM,yBAAyB,QAO1B,CAAC;AAEb,OAAO,EAAiB,KAAK,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAE1E;;kDAEkD;AAClD,eAAO,MAAM,cAAc,EAAE,SAAS,cAAc,EAA2C,CAAC"}
|
|
@@ -1,36 +1,143 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Audit-specific implementer criteria.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* AUDIT'S PURPOSE — read this before adding categories.
|
|
5
|
+
* Most audit targets are spec / plan / design / recommendation files that
|
|
6
|
+
* will subsequently be EXECUTED BY A LOW-JUDGMENT WORKER (a sub-agent that
|
|
7
|
+
* follows the spec literally, with little ability to disambiguate or
|
|
8
|
+
* choose between alternatives). The audit's success criterion is:
|
|
9
|
+
*
|
|
10
|
+
* "After audit + fixes, can a literal-following worker execute this
|
|
11
|
+
* artifact without failing, picking wrong, or getting stuck?"
|
|
12
|
+
*
|
|
13
|
+
* That criterion is what makes a finding load-bearing. Stylistic nits
|
|
14
|
+
* don't block execution; ambiguity, contradictions, missing verification,
|
|
15
|
+
* unspecified branches, out-of-order steps, and overloaded terms do.
|
|
16
|
+
*
|
|
17
|
+
* EVIDENCE & SCOPE for prose artifacts.
|
|
18
|
+
* Audit examines a prose artifact (spec, design doc, plan, recommendation
|
|
19
|
+
* doc, API contract, config, brief). The "thing being examined" is text —
|
|
20
|
+
* not source code — so evidence and scope rules differ from review/debug:
|
|
7
21
|
*
|
|
8
22
|
* - Evidence is a doc quote, OR a precise reference to a section/item
|
|
9
23
|
* that *should* address the issue but doesn't (absence-finding), OR
|
|
10
|
-
* a doc-claim + contradicting source (wrong-claim finding)
|
|
24
|
+
* a doc-claim + contradicting source (wrong-claim finding), OR
|
|
25
|
+
* two sections of the doc that contradict each other (internal-
|
|
26
|
+
* coherence finding).
|
|
11
27
|
* - Scope is the document and what it directly references; cross-section
|
|
12
28
|
* reasoning IS the value of an audit.
|
|
29
|
+
*
|
|
30
|
+
* The failure-mode rubric below tells the worker WHAT KINDS of issues to
|
|
31
|
+
* look for in a prose artifact. Without it, workers calibrated on code
|
|
32
|
+
* audits collapse to surface-level proofreading on documents.
|
|
13
33
|
*/
|
|
34
|
+
/**
|
|
35
|
+
* The orientation block. Goes at the TOP of every audit prompt.
|
|
36
|
+
*
|
|
37
|
+
* This is the load-bearing addition. Without an explicit purpose statement,
|
|
38
|
+
* workers default to "find issues in this doc" — which produces stylistic
|
|
39
|
+
* proofreading. With this orientation, they look for issues that would
|
|
40
|
+
* BLOCK EXECUTION by a literal-following worker, which is what the caller
|
|
41
|
+
* actually needs.
|
|
42
|
+
*/
|
|
43
|
+
export const AUDIT_PURPOSE_ORIENTATION = [
|
|
44
|
+
'Why this audit exists:',
|
|
45
|
+
'The artifact you are auditing is most likely a spec, plan, design doc, or recommendation doc that will subsequently be EXECUTED BY A LOW-JUDGMENT WORKER — a sub-agent that follows instructions literally, has limited ability to disambiguate, and cannot recover from contradictions.',
|
|
46
|
+
'',
|
|
47
|
+
'Your job is to find anywhere a literal-following worker would:',
|
|
48
|
+
'- get stuck on ambiguity (e.g. "implement the function" with no signature, location, or contract)',
|
|
49
|
+
'- pick wrong on an unspecified branch (e.g. "if X then Y" with no "otherwise")',
|
|
50
|
+
'- implement contradictions (section A says use X, section B says use Y, both apparently authoritative)',
|
|
51
|
+
'- skip a requirement that is implicit or buried (the worker only does what is explicitly stated)',
|
|
52
|
+
'- be unable to verify completion (no acceptance criteria, no done condition, no test command)',
|
|
53
|
+
'- misinterpret an overloaded term (the same word means two different things in two sections)',
|
|
54
|
+
'- execute steps out of order (step 3 needs the output of step 5)',
|
|
55
|
+
'- act on an unbounded scope ("fix the bug" with no scope boundary)',
|
|
56
|
+
'- need context that is referenced but not provided (a helper, a flag, a file the spec assumes the worker knows)',
|
|
57
|
+
'- produce data of an unspecified shape (return value, file format, error envelope)',
|
|
58
|
+
'',
|
|
59
|
+
'A finding that points at any of these failure-mode triggers is high-value EVEN IF the prose reads cleanly. Conversely, a stylistic nit that does not block execution is low-priority no matter how clean the wording.',
|
|
60
|
+
'',
|
|
61
|
+
'When you have completed this audit and its fixes have been applied, the test is: would a worker that reads only this artifact, follows it literally, and asks no clarifying questions, produce the right outcome? If yes, the audit succeeded.',
|
|
62
|
+
].join('\n');
|
|
14
63
|
export const EVIDENCE_RULE_AUDIT = [
|
|
15
64
|
'Evidence grounding (REQUIRED for every finding):',
|
|
16
65
|
'- For issues IN the doc: quote the exact passage that demonstrates the issue.',
|
|
17
|
-
'- For ABSENCES (the doc is silent on something it should specify): name the section that should address it. Example: "Section 3.2 enumerates failure modes but does not specify queue-overflow behavior."',
|
|
66
|
+
'- For ABSENCES (the doc is silent on something it should specify): name the section that should address it. Example: "Section 3.2 enumerates failure modes but does not specify queue-overflow behavior." This is an absence-finding and is fully valid evidence.',
|
|
18
67
|
'- For WRONG-CLAIM findings: quote the doc\'s claim AND the source that contradicts it (the actual code, the referenced spec, etc.).',
|
|
19
|
-
'-
|
|
68
|
+
'- For INTERNAL-COHERENCE findings (two parts of the doc conflict, a recommendation contradicts a stated constraint, a fix relies on something the doc forbids): quote both passages OR quote one and name the section ID of the other.',
|
|
69
|
+
'- A finding without one of these four forms of evidence is speculation. Note "investigation needed" in your summary instead.',
|
|
20
70
|
].join('\n');
|
|
21
71
|
export const SCOPE_RULE_AUDIT = [
|
|
22
72
|
'Scope:',
|
|
23
73
|
'- The document itself plus any artifact the document directly references (cited code, linked spec, embedded config).',
|
|
24
|
-
'- Cross-section reasoning within the document IS in scope and often the highest-value kind of finding.',
|
|
74
|
+
'- Cross-section reasoning within the document IS in scope and is often the highest-value kind of finding.',
|
|
25
75
|
'- Do NOT enumerate the repository or glob across all source files. If verifying a referenced file or symbol, read or grep for that specific name only — the goal is to evaluate the document, not catalog the codebase.',
|
|
26
76
|
'- Out of scope: speculation about content the document does not reference; coding-style nits on inline code examples (those belong in a code review, not an audit).',
|
|
27
77
|
].join('\n');
|
|
78
|
+
/**
|
|
79
|
+
* The failure-mode rubric for prose-document audits.
|
|
80
|
+
*
|
|
81
|
+
* This is the load-bearing addition. Without an explicit taxonomy, workers
|
|
82
|
+
* calibrated on source-code rubrics (off-by-one, type mismatches, dead code)
|
|
83
|
+
* have nothing to look for in a spec/plan/recommendation doc and emit only
|
|
84
|
+
* surface nits. The 11 categories below cover what actually goes wrong in
|
|
85
|
+
* non-trivial prose artifacts and are independent of the audit-type label.
|
|
86
|
+
*/
|
|
87
|
+
export const DOC_AUDIT_FAILURE_MODES = [
|
|
88
|
+
'Look for these kinds of issues — applicable to ALL prose-document audits regardless of auditType. The auditType (default / security / performance) tells you which lens to weight, but every doc audit should sweep the full taxonomy:',
|
|
89
|
+
'',
|
|
90
|
+
'1. RECOMMENDATION-COHERENCE — does the proposed fix actually solve the stated problem given the doc\'s own stated constraints? A fix that requires X when the doc forbids X is logically incomplete. **Always check fixes against any explicit principles, constraints, invariants, or "what we won\'t do" sections in the doc itself.** Example: a doc that lists "no persistence" as a principle cannot have a fix that disambiguates "id existed before" from "id never existed" without persistence — that fix is unimplementable.',
|
|
91
|
+
'2. INTERNAL CONTRADICTION — does section A say something incompatible with section B? Does a methodology disclaimer ("these numbers are approximations") undercut a load-bearing claim built on those same numbers? Does a "do not auto-X" rule sit next to an "auto-X above threshold" recommendation?',
|
|
92
|
+
'3. CROSS-ITEM DUPLICATION — are two items addressing the same root cause without acknowledging each other? Should they be merged or cross-referenced? Look across the WHOLE doc for items that target the same underlying problem from different angles.',
|
|
93
|
+
'4. INDEPENDENCE-CLAIMED-WITHOUT-EVIDENCE — is X asserted as independent of Y when the evidence shows correlation, co-occurrence, or shared mechanism?',
|
|
94
|
+
'5. ARGUMENT SOUNDNESS — does the evidence chain support the conclusion? Does a headline ("95% wasted") rest on data the doc itself flags as unreliable? Does a severity rating match the evidence depth?',
|
|
95
|
+
'6. COMPLETENESS AGAINST CONSTRAINTS — does any constraint stated elsewhere render a recommendation infeasible? Is a fix step that depends on persistence proposed in a doc that forbids persistence? **If the doc has a principles, invariants, or constraints section, walk every recommendation through every constraint and flag mismatches.**',
|
|
96
|
+
'7. FIX ACTIONABILITY — is the proposed fix complete enough to implement, or does it stop at "fix it" / vague verbs? Does it leave open which subsystem owns the change? Are step-by-step actions or only goals?',
|
|
97
|
+
'8. DRIFT / STALENESS — does any claim in one section contradict more recently revised material in the same doc? **Specifically: count items the doc claims to discuss (e.g. "across all three sessions", "the four highest-impact items", "we have N tools") and verify the count against the actual list elsewhere.** If the count is wrong, that\'s drift. Other drift signals: version labels, renamed sections, references to removed items.',
|
|
98
|
+
'9. SCOPE-CREEP / FRAMING — do recommendations exceed what the evidence supports? Does the framing (table title, bucket label, headline) misrepresent what the row contents actually say?',
|
|
99
|
+
'10. STRUCTURAL CONSISTENCY — do similar items in a list/table follow the same shape? If one row has a Verification subsection and the others don\'t, that\'s structural inconsistency. If items are numbered "1, 1b, 2, 3" the duplicate "1" is a structural break. If a column is labeled "Fix direction" but one row\'s cell holds verification criteria, that\'s a column-content mismatch.',
|
|
100
|
+
'11. METADATA COMPLETENESS — for living/revised documents: is there a "last updated" / "as of" / version stamp? When findings claim "still unfixed in version X", is there a date timeline that supports the claim?',
|
|
101
|
+
'',
|
|
102
|
+
'Severity calibration for doc audits:',
|
|
103
|
+
'- critical: a recommendation that, if implemented, would fail or cause harm because the doc is internally incoherent (e.g. a fix that depends on something the doc forbids). Or: a contradiction that would silently lead to wrong implementation if a reader followed both passages.',
|
|
104
|
+
'- high: a substantive missing recommendation, an incorrect claim of independence between two issues, an evidence chain that does not support a load-bearing conclusion, OR a fix that violates a stated principle/constraint of the doc itself.',
|
|
105
|
+
'- medium: argument soundness gap, fix actionability gap, drift between sections (item-count mismatch), structural inconsistency between similar items, scope-creep risk that needs a guardrail.',
|
|
106
|
+
'- low: stylistic, labeling, or formatting issues; missing metadata; minor cross-reference fixes.',
|
|
107
|
+
].join('\n');
|
|
108
|
+
/**
|
|
109
|
+
* Counter-balance to the SEVERITY_LADDER's anti-inflation hint.
|
|
110
|
+
*
|
|
111
|
+
* The shared severity ladder ends with "Workers commonly inflate — resist
|
|
112
|
+
* the urge." That bias is correct for code reviews, where over-flagging
|
|
113
|
+
* stylistic preferences is the common failure. For prose-document audits
|
|
114
|
+
* the opposite is true: workers UNDER-find because they have nothing to
|
|
115
|
+
* pattern-match against in their training. This block tells the worker
|
|
116
|
+
* the doc-audit failure mode is silence, not noise.
|
|
117
|
+
*/
|
|
118
|
+
export const THOROUGHNESS_REMINDER_AUDIT = [
|
|
119
|
+
'Thoroughness expectation for prose-document audits:',
|
|
120
|
+
'- For non-trivial documents (>500 words), zero or 1-2 findings is unusual and usually indicates the rubric was applied too narrowly. Sweep the full failure-mode taxonomy above before declaring "no findings."',
|
|
121
|
+
'- The SEVERITY_LADDER warns against inflation. That warning is calibrated for code reviews — for prose audits the typical failure mode is the opposite (under-finding because the worker only looked for surface nits). Apply the failure-mode taxonomy thoroughly first; THEN calibrate severity downward where the impact is small.',
|
|
122
|
+
'- Do not invent findings to hit a quota. But if you have applied all 11 failure modes and still have only stylistic nits, double-check categories 1, 2, 5, 6, and 8 (recommendation-coherence, internal contradiction, argument soundness, completeness against constraints, drift) — these are the ones workers most often miss on first pass.',
|
|
123
|
+
'',
|
|
124
|
+
'Principle-mapping pass (REQUIRED when the doc has a principles / constraints / "what we won\'t do" section):',
|
|
125
|
+
'- Make ONE explicit pass walking each recommendation against each principle/constraint listed in the doc.',
|
|
126
|
+
'- For each (recommendation, constraint) pair, ask: does this recommendation, as written, require something the constraint forbids? Or rely on something the constraint says is unavailable?',
|
|
127
|
+
'- Worked example (illustrative — DO NOT match this verbatim against the doc you are auditing). Suppose a doc states Principle X: "Operations must be deterministic — no random sources." Suppose recommendation R proposes: "On request collision, generate a fresh tiebreaker using the system entropy pool." Chain: the tiebreaker uses entropy; entropy is non-deterministic; Principle X forbids non-determinism; therefore R is unimplementable as written without breaking Principle X. → File this as a HIGH-severity recommendation-coherence finding. The general pattern: a fix that REQUIRES something a constraint FORBIDS, or RELIES ON something a constraint says is UNAVAILABLE, is a load-bearing finding regardless of how clean the fix\'s prose reads.',
|
|
128
|
+
'- Most workers miss findings of this shape on first pass because the chain spans two non-adjacent sections. The principle-mapping pass forces you to make the chain.',
|
|
129
|
+
].join('\n');
|
|
28
130
|
export const ANNOTATOR_AWARENESS_AUDIT = [
|
|
29
131
|
'After your output, an annotator validates each finding against this audit-specific rubric:',
|
|
30
|
-
'- Is the finding about the document (contradiction / absence / ambiguity / wrong claim / scope gap)?',
|
|
31
|
-
'- Is the evidence
|
|
32
|
-
'- Is the severity calibrated to actual downstream-execution impact?',
|
|
132
|
+
'- Is the finding about the document (contradiction / absence / ambiguity / wrong claim / scope gap / recommendation-coherence / argument-soundness)?',
|
|
133
|
+
'- Is the evidence one of the four valid shapes: doc quote, absence-reference, claim+contradiction, OR internal-coherence cross-section reference?',
|
|
134
|
+
'- Is the severity calibrated to actual downstream-execution impact (does following the recommendation as written produce a wrong outcome)?',
|
|
33
135
|
'- Is the finding within the document\'s scope, or is it speculation about untouched material?',
|
|
34
|
-
'Self-check before emitting. Findings that fail any check are downgraded or dropped.',
|
|
136
|
+
'Self-check before emitting. Findings that fail any check are downgraded or dropped — but logical-coherence and argument-soundness findings backed by section references are FULLY VALID, do NOT downgrade them as "speculation."',
|
|
35
137
|
].join('\n');
|
|
138
|
+
import { parseCriteria } from '../criteria-types.js';
|
|
139
|
+
/** Structured per-criterion array for parallel-criteria fan-out. Derived
|
|
140
|
+
* from DOC_AUDIT_FAILURE_MODES so the prose in the cached prefix and the
|
|
141
|
+
* per-sub-worker assignment stay in lockstep. */
|
|
142
|
+
export const AUDIT_CRITERIA = parseCriteria(DOC_AUDIT_FAILURE_MODES);
|
|
36
143
|
//# sourceMappingURL=implementer-criteria.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/audit/implementer-criteria.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/audit/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG;IACvC,wBAAwB;IACxB,0RAA0R;IAC1R,EAAE;IACF,gEAAgE;IAChE,mGAAmG;IACnG,gFAAgF;IAChF,wGAAwG;IACxG,kGAAkG;IAClG,+FAA+F;IAC/F,8FAA8F;IAC9F,kEAAkE;IAClE,oEAAoE;IACpE,iHAAiH;IACjH,oFAAoF;IACpF,EAAE;IACF,uNAAuN;IACvN,EAAE;IACF,gPAAgP;CACjP,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,kDAAkD;IAClD,+EAA+E;IAC/E,mQAAmQ;IACnQ,qIAAqI;IACrI,wOAAwO;IACxO,8HAA8H;CAC/H,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC9B,QAAQ;IACR,sHAAsH;IACtH,2GAA2G;IAC3G,yNAAyN;IACzN,qKAAqK;CACtK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG;IACrC,wOAAwO;IACxO,EAAE;IACF,wgBAAwgB;IACxgB,ySAAyS;IACzS,0PAA0P;IAC1P,uJAAuJ;IACvJ,0MAA0M;IAC1M,mVAAmV;IACnV,iNAAiN;IACjN,kbAAkb;IAClb,0LAA0L;IAC1L,gYAAgY;IAChY,oNAAoN;IACpN,EAAE;IACF,sCAAsC;IACtC,uRAAuR;IACvR,iPAAiP;IACjP,iMAAiM;IACjM,kGAAkG;CACnG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,2BAA2B,GAAG;IACzC,qDAAqD;IACrD,iNAAiN;IACjN,uUAAuU;IACvU,iVAAiV;IACjV,EAAE;IACF,8GAA8G;IAC9G,2GAA2G;IAC3G,6LAA6L;IAC7L,4uBAA4uB;IAC5uB,sKAAsK;CACvK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,yBAAyB,GAAG;IACvC,4FAA4F;IAC5F,sJAAsJ;IACtJ,mJAAmJ;IACnJ,4IAA4I;IAC5I,+FAA+F;IAC/F,kOAAkO;CACnO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,OAAO,EAAE,aAAa,EAAuB,MAAM,sBAAsB,CAAC;AAE1E;;kDAEkD;AAClD,MAAM,CAAC,MAAM,cAAc,GAA8B,aAAa,CAAC,uBAAuB,CAAC,CAAC"}
|
|
@@ -1,18 +1,11 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
export declare const inputSchema: z.ZodObject<{
|
|
3
3
|
document: z.ZodOptional<z.ZodString>;
|
|
4
|
-
auditType: z.
|
|
4
|
+
auditType: z.ZodDefault<z.ZodEnum<{
|
|
5
|
+
default: "default";
|
|
5
6
|
security: "security";
|
|
6
7
|
performance: "performance";
|
|
7
|
-
|
|
8
|
-
correctness: "correctness";
|
|
9
|
-
general: "general";
|
|
10
|
-
}>, z.ZodArray<z.ZodEnum<{
|
|
11
|
-
security: "security";
|
|
12
|
-
performance: "performance";
|
|
13
|
-
style: "style";
|
|
14
|
-
correctness: "correctness";
|
|
15
|
-
}>>]>;
|
|
8
|
+
}>>;
|
|
16
9
|
filePaths: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
17
10
|
contextBlockIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
18
11
|
}, z.core.$strip>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/schema.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAMxB,eAAO,MAAM,WAAW
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/schema.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAMxB,eAAO,MAAM,WAAW;;;;;;;;;iBAStB,CAAC;AAEH,MAAM,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,WAAW,CAAC,CAAC;AAEhD,eAAO,MAAM,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAA8B,CAAC;AAExD,MAAM,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,YAAY,CAAC,CAAC"}
|
|
@@ -6,10 +6,9 @@ import { buildOutputEnvelopeSchema } from '../shared-output.js';
|
|
|
6
6
|
// cross-package coupling.
|
|
7
7
|
export const inputSchema = z.object({
|
|
8
8
|
document: z.string().optional().describe('Inline document content to audit'),
|
|
9
|
-
auditType: z.
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
]).describe('Audit focus.'),
|
|
9
|
+
auditType: z.enum(['default', 'security', 'performance'])
|
|
10
|
+
.default('default')
|
|
11
|
+
.describe('Audit focus. `default` is the comprehensive sweep — recommended for specs, plans, designs, recommendation docs, post-mortems. Use `security` or `performance` only when you specifically want to narrow the lens to that single dimension (threat models, scaling designs).'),
|
|
13
12
|
filePaths: z.array(z.string()).optional()
|
|
14
13
|
.describe('Files the sub-agent should focus on. Multiple files are processed in parallel.'),
|
|
15
14
|
contextBlockIds: z.array(z.string()).optional()
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../../src/tools/audit/schema.ts"],"names":[],"mappings":"AAAA,0CAA0C;AAC1C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,uFAAuF;AACvF,2EAA2E;AAC3E,0BAA0B;AAC1B,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;IAC5E,SAAS,EAAE,CAAC,CAAC,
|
|
1
|
+
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../../src/tools/audit/schema.ts"],"names":[],"mappings":"AAAA,0CAA0C;AAC1C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,uFAAuF;AACvF,2EAA2E;AAC3E,0BAA0B;AAC1B,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;IAC5E,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,UAAU,EAAE,aAAa,CAAC,CAAC;SACtD,OAAO,CAAC,SAAS,CAAC;SAClB,QAAQ,CAAC,6QAA6Q,CAAC;IAC1R,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;SACtC,QAAQ,CAAC,gFAAgF,CAAC;IAC7F,eAAe,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;SAC5C,QAAQ,CAAC,mHAAmH,CAAC;CACjI,CAAC,CAAC;AAIH,MAAM,CAAC,MAAM,YAAY,GAAG,yBAAyB,EAAE,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAClF,OAAO,EAAe,KAAK,KAAK,EAAE,MAAM,aAAa,CAAC;AAEtD,OAAO,EAAqB,KAAK,WAAW,EAAE,MAAM,qDAAqD,CAAC;AAE1G,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;
|
|
1
|
+
{"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAClF,OAAO,EAAe,KAAK,KAAK,EAAE,MAAM,aAAa,CAAC;AAEtD,OAAO,EAAqB,KAAK,WAAW,EAAE,MAAM,qDAAqD,CAAC;AAE1G,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AAavE,wBAAgB,aAAa,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CAYjE;AAID,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,MAAM,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,gBAAgB,EAAE,OAAO,CAAC;IAC1B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AA6CD,wBAAgB,cAAc,CAAC,KAAK,EAAE,KAAK,GAAG,cAAc,EAAE,CA0B7D;AAgFD,eAAO,MAAM,UAAU,EAAE,UAAU,CAAC,KAAK,EAAE,cAAc,EAAE,WAAW,CAqCrE,CAAC"}
|