@zhixuan92/multi-model-agent-core 4.2.1 → 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/config/schema.d.ts +3 -0
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +7 -4
- package/dist/config/schema.js.map +1 -1
- package/dist/error-codes.d.ts +1 -0
- package/dist/error-codes.d.ts.map +1 -1
- package/dist/error-codes.js +2 -0
- package/dist/error-codes.js.map +1 -1
- package/dist/events/running-headline-sink.d.ts.map +1 -1
- package/dist/events/running-headline-sink.js +47 -7
- package/dist/events/running-headline-sink.js.map +1 -1
- package/dist/events/telemetry-types.d.ts +24 -20
- package/dist/events/telemetry-types.d.ts.map +1 -1
- package/dist/identity/auth-token-store.d.ts +36 -0
- package/dist/identity/auth-token-store.d.ts.map +1 -1
- package/dist/identity/auth-token-store.js +71 -2
- package/dist/identity/auth-token-store.js.map +1 -1
- package/dist/identity/cwd-validator.d.ts.map +1 -1
- package/dist/identity/cwd-validator.js +15 -3
- package/dist/identity/cwd-validator.js.map +1 -1
- package/dist/identity/main-model-resolver.d.ts +14 -0
- package/dist/identity/main-model-resolver.d.ts.map +1 -0
- package/dist/identity/main-model-resolver.js +83 -0
- package/dist/identity/main-model-resolver.js.map +1 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/intake/brief-compiler-slots/delegate.d.ts +10 -11
- package/dist/intake/brief-compiler-slots/delegate.d.ts.map +1 -1
- package/dist/intake/brief-compiler-slots/delegate.js +12 -14
- package/dist/intake/brief-compiler-slots/delegate.js.map +1 -1
- package/dist/intake/brief-compiler-slots/execute-plan.js +3 -1
- package/dist/intake/brief-compiler-slots/execute-plan.js.map +1 -1
- package/dist/intake/context-overflow-estimator.d.ts +33 -0
- package/dist/intake/context-overflow-estimator.d.ts.map +1 -0
- package/dist/intake/context-overflow-estimator.js +36 -0
- package/dist/intake/context-overflow-estimator.js.map +1 -0
- package/dist/intake/pipeline.d.ts.map +1 -1
- package/dist/intake/pipeline.js +46 -0
- package/dist/intake/pipeline.js.map +1 -1
- package/dist/intake/plan-extractor.d.ts.map +1 -1
- package/dist/intake/plan-extractor.js +10 -1
- package/dist/intake/plan-extractor.js.map +1 -1
- package/dist/intake/types.d.ts +1 -0
- package/dist/intake/types.d.ts.map +1 -1
- package/dist/lifecycle/diff-tracker.d.ts +17 -1
- package/dist/lifecycle/diff-tracker.d.ts.map +1 -1
- package/dist/lifecycle/diff-tracker.js +115 -2
- package/dist/lifecycle/diff-tracker.js.map +1 -1
- package/dist/lifecycle/handlers/annotate-completion-handler.d.ts +9 -0
- package/dist/lifecycle/handlers/annotate-completion-handler.d.ts.map +1 -0
- package/dist/lifecycle/handlers/annotate-completion-handler.js +171 -0
- package/dist/lifecycle/handlers/annotate-completion-handler.js.map +1 -0
- package/dist/lifecycle/handlers/annotate-criteria-handler.d.ts +3 -0
- package/dist/lifecycle/handlers/annotate-criteria-handler.d.ts.map +1 -0
- package/dist/lifecycle/handlers/annotate-criteria-handler.js +67 -0
- package/dist/lifecycle/handlers/annotate-criteria-handler.js.map +1 -0
- package/dist/lifecycle/handlers/baseline-handlers.d.ts.map +1 -1
- package/dist/lifecycle/handlers/baseline-handlers.js +152 -65
- package/dist/lifecycle/handlers/baseline-handlers.js.map +1 -1
- package/dist/lifecycle/handlers/files-written-cross-check.d.ts +21 -0
- package/dist/lifecycle/handlers/files-written-cross-check.d.ts.map +1 -0
- package/dist/lifecycle/handlers/files-written-cross-check.js +85 -0
- package/dist/lifecycle/handlers/files-written-cross-check.js.map +1 -0
- package/dist/lifecycle/handlers/review-handler.d.ts +3 -0
- package/dist/lifecycle/handlers/review-handler.d.ts.map +1 -0
- package/dist/lifecycle/handlers/review-handler.js +141 -0
- package/dist/lifecycle/handlers/review-handler.js.map +1 -0
- package/dist/lifecycle/handlers/rework-handler.d.ts +3 -0
- package/dist/lifecycle/handlers/rework-handler.d.ts.map +1 -0
- package/dist/lifecycle/handlers/rework-handler.js +77 -0
- package/dist/lifecycle/handlers/rework-handler.js.map +1 -0
- package/dist/lifecycle/handlers/terminal-handlers.d.ts.map +1 -1
- package/dist/lifecycle/handlers/terminal-handlers.js +16 -3
- package/dist/lifecycle/handlers/terminal-handlers.js.map +1 -1
- package/dist/lifecycle/lifecycle-context.d.ts +4 -0
- package/dist/lifecycle/lifecycle-context.d.ts.map +1 -1
- package/dist/lifecycle/lifecycle-driver.d.ts.map +1 -1
- package/dist/lifecycle/lifecycle-driver.js +12 -7
- package/dist/lifecycle/lifecycle-driver.js.map +1 -1
- package/dist/lifecycle/parallel-criteria-routes.d.ts +1 -1
- package/dist/lifecycle/parallel-criteria-routes.d.ts.map +1 -1
- package/dist/lifecycle/parallel-criteria-routes.js +21 -1
- package/dist/lifecycle/parallel-criteria-routes.js.map +1 -1
- package/dist/lifecycle/shared-compute.d.ts +9 -0
- package/dist/lifecycle/shared-compute.d.ts.map +1 -1
- package/dist/lifecycle/shared-compute.js +35 -3
- package/dist/lifecycle/shared-compute.js.map +1 -1
- package/dist/lifecycle/stage-plan-builder.d.ts.map +1 -1
- package/dist/lifecycle/stage-plan-builder.js +65 -85
- package/dist/lifecycle/stage-plan-builder.js.map +1 -1
- package/dist/lifecycle/stage-plan-types.d.ts +48 -0
- package/dist/lifecycle/stage-plan-types.d.ts.map +1 -1
- package/dist/lifecycle/stage-progression.d.ts.map +1 -1
- package/dist/lifecycle/stage-progression.js +17 -24
- package/dist/lifecycle/stage-progression.js.map +1 -1
- package/dist/lifecycle/task-runner.d.ts.map +1 -1
- package/dist/lifecycle/task-runner.js +12 -1
- package/dist/lifecycle/task-runner.js.map +1 -1
- package/dist/model-profiles.json +192 -53
- package/dist/providers/anthropic-messages-adapter.d.ts +8 -0
- package/dist/providers/anthropic-messages-adapter.d.ts.map +1 -1
- package/dist/providers/anthropic-messages-adapter.js +16 -1
- package/dist/providers/anthropic-messages-adapter.js.map +1 -1
- package/dist/providers/file-tracker.d.ts +33 -0
- package/dist/providers/file-tracker.d.ts.map +1 -1
- package/dist/providers/file-tracker.js +54 -0
- package/dist/providers/file-tracker.js.map +1 -1
- package/dist/providers/provider-factory.d.ts.map +1 -1
- package/dist/providers/provider-factory.js +27 -2
- package/dist/providers/provider-factory.js.map +1 -1
- package/dist/providers/runner-shell-types.d.ts +14 -0
- package/dist/providers/runner-shell-types.d.ts.map +1 -1
- package/dist/providers/runner-shell.d.ts.map +1 -1
- package/dist/providers/runner-shell.js +103 -26
- package/dist/providers/runner-shell.js.map +1 -1
- package/dist/providers/tool-implementations.d.ts +12 -0
- package/dist/providers/tool-implementations.d.ts.map +1 -1
- package/dist/providers/tool-implementations.js +33 -0
- package/dist/providers/tool-implementations.js.map +1 -1
- package/dist/reporting/annotate-completion-parser.d.ts +39 -0
- package/dist/reporting/annotate-completion-parser.d.ts.map +1 -0
- package/dist/reporting/annotate-completion-parser.js +43 -0
- package/dist/reporting/annotate-completion-parser.js.map +1 -0
- package/dist/reporting/compose-running-headline.d.ts +15 -1
- package/dist/reporting/compose-running-headline.d.ts.map +1 -1
- package/dist/reporting/compose-running-headline.js +76 -1
- package/dist/reporting/compose-running-headline.js.map +1 -1
- package/dist/reporting/report-parser-slots/research-report.d.ts +1 -1
- package/dist/review/default-engines.d.ts.map +1 -1
- package/dist/review/default-engines.js +8 -4
- package/dist/review/default-engines.js.map +1 -1
- package/dist/review/parse-review-report.d.ts +6 -0
- package/dist/review/parse-review-report.d.ts.map +1 -0
- package/dist/review/parse-review-report.js +40 -0
- package/dist/review/parse-review-report.js.map +1 -0
- package/dist/review/reviewer-engine.d.ts +12 -3
- package/dist/review/reviewer-engine.d.ts.map +1 -1
- package/dist/review/reviewer-engine.js +4 -3
- package/dist/review/reviewer-engine.js.map +1 -1
- package/dist/review/templates/annotate-completion.d.ts +12 -0
- package/dist/review/templates/annotate-completion.d.ts.map +1 -0
- package/dist/review/templates/annotate-completion.js +72 -0
- package/dist/review/templates/annotate-completion.js.map +1 -0
- package/dist/review/templates/quality-review.d.ts +3 -0
- package/dist/review/templates/quality-review.d.ts.map +1 -0
- package/dist/review/templates/quality-review.js +40 -0
- package/dist/review/templates/quality-review.js.map +1 -0
- package/dist/review/templates/rework.d.ts +3 -0
- package/dist/review/templates/rework.d.ts.map +1 -0
- package/dist/review/templates/rework.js +42 -0
- package/dist/review/templates/rework.js.map +1 -0
- package/dist/review/templates/shared.d.ts +32 -0
- package/dist/review/templates/shared.d.ts.map +1 -1
- package/dist/review/templates/spec-review.d.ts +1 -16
- package/dist/review/templates/spec-review.d.ts.map +1 -1
- package/dist/review/templates/spec-review.js +23 -31
- package/dist/review/templates/spec-review.js.map +1 -1
- package/dist/stores/context-block-project-cap.d.ts +14 -0
- package/dist/stores/context-block-project-cap.d.ts.map +1 -0
- package/dist/stores/context-block-project-cap.js +68 -0
- package/dist/stores/context-block-project-cap.js.map +1 -0
- package/dist/stores/context-block-tool.d.ts +2 -0
- package/dist/stores/context-block-tool.d.ts.map +1 -1
- package/dist/stores/context-block-tool.js +3 -2
- package/dist/stores/context-block-tool.js.map +1 -1
- package/dist/stores/file-backed-context-block-store.d.ts +8 -1
- package/dist/stores/file-backed-context-block-store.d.ts.map +1 -1
- package/dist/stores/file-backed-context-block-store.js +118 -6
- package/dist/stores/file-backed-context-block-store.js.map +1 -1
- package/dist/tools/audit/plan-audit-criteria.d.ts +35 -0
- package/dist/tools/audit/plan-audit-criteria.d.ts.map +1 -0
- package/dist/tools/audit/plan-audit-criteria.js +136 -0
- package/dist/tools/audit/plan-audit-criteria.js.map +1 -0
- package/dist/tools/audit/plan-audit-verdict.d.ts +15 -0
- package/dist/tools/audit/plan-audit-verdict.d.ts.map +1 -0
- package/dist/tools/audit/plan-audit-verdict.js +44 -0
- package/dist/tools/audit/plan-audit-verdict.js.map +1 -0
- package/dist/tools/audit/schema.d.ts +1 -0
- package/dist/tools/audit/schema.d.ts.map +1 -1
- package/dist/tools/audit/schema.js +6 -3
- package/dist/tools/audit/schema.js.map +1 -1
- package/dist/tools/audit/tool-config.d.ts +3 -0
- package/dist/tools/audit/tool-config.d.ts.map +1 -1
- package/dist/tools/audit/tool-config.js +8 -0
- package/dist/tools/audit/tool-config.js.map +1 -1
- package/dist/tools/delegate/implementer-criteria.d.ts +31 -47
- package/dist/tools/delegate/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/delegate/implementer-criteria.js +60 -88
- package/dist/tools/delegate/implementer-criteria.js.map +1 -1
- package/dist/tools/delegate/tool-config.js +4 -4
- package/dist/tools/delegate/tool-config.js.map +1 -1
- package/dist/tools/execute-plan/implementer-criteria.d.ts +42 -37
- package/dist/tools/execute-plan/implementer-criteria.d.ts.map +1 -1
- package/dist/tools/execute-plan/implementer-criteria.js +79 -79
- package/dist/tools/execute-plan/implementer-criteria.js.map +1 -1
- package/dist/tools/execute-plan/tool-config.d.ts.map +1 -1
- package/dist/tools/execute-plan/tool-config.js +23 -13
- package/dist/tools/execute-plan/tool-config.js.map +1 -1
- package/dist/types/config.d.ts +1 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/enums.d.ts +2 -2
- package/dist/types/run-result.d.ts +71 -0
- package/dist/types/run-result.d.ts.map +1 -1
- package/dist/types/task-spec.d.ts +14 -0
- package/dist/types/task-spec.d.ts.map +1 -1
- package/dist/types.d.ts +10 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +5 -1
- package/dist/lifecycle/handlers/quality-chain-handlers.d.ts +0 -22
- package/dist/lifecycle/handlers/quality-chain-handlers.d.ts.map +0 -1
- package/dist/lifecycle/handlers/quality-chain-handlers.js +0 -369
- package/dist/lifecycle/handlers/quality-chain-handlers.js.map +0 -1
- package/dist/lifecycle/handlers/review-diff-handler.d.ts +0 -31
- package/dist/lifecycle/handlers/review-diff-handler.d.ts.map +0 -1
- package/dist/lifecycle/handlers/review-diff-handler.js +0 -168
- package/dist/lifecycle/handlers/review-diff-handler.js.map +0 -1
- package/dist/lifecycle/handlers/run-verify-command-handler.d.ts +0 -25
- package/dist/lifecycle/handlers/run-verify-command-handler.d.ts.map +0 -1
- package/dist/lifecycle/handlers/run-verify-command-handler.js +0 -84
- package/dist/lifecycle/handlers/run-verify-command-handler.js.map +0 -1
- package/dist/lifecycle/handlers/spec-chain-handlers.d.ts +0 -21
- package/dist/lifecycle/handlers/spec-chain-handlers.d.ts.map +0 -1
- package/dist/lifecycle/handlers/spec-chain-handlers.js +0 -287
- package/dist/lifecycle/handlers/spec-chain-handlers.js.map +0 -1
- package/dist/review/templates/diff-review.d.ts +0 -11
- package/dist/review/templates/diff-review.d.ts.map +0 -1
- package/dist/review/templates/diff-review.js +0 -39
- package/dist/review/templates/diff-review.js.map +0 -1
- package/dist/review/templates/quality-review-artifact.d.ts +0 -16
- package/dist/review/templates/quality-review-artifact.d.ts.map +0 -1
- package/dist/review/templates/quality-review-artifact.js +0 -46
- package/dist/review/templates/quality-review-artifact.js.map +0 -1
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Plan-audit failure-mode taxonomy (4.2.3+).
|
|
3
|
+
*
|
|
4
|
+
* Drives the parallel-criteria fan-out for `auditType: 'plan'` audits.
|
|
5
|
+
* Each numbered entry below becomes one sub-worker assigned to a single
|
|
6
|
+
* verification dimension. Sub-workers run on the existing complex tier
|
|
7
|
+
* with read-only tools (read_file / grep / glob / list_files); they
|
|
8
|
+
* derive source files to verify from the plan itself, then ground each
|
|
9
|
+
* finding in actual file-and-line evidence.
|
|
10
|
+
*
|
|
11
|
+
* A perspective emitting zero findings means "this dimension passes for
|
|
12
|
+
* this plan." That is the EXPECTED outcome for several perspectives on
|
|
13
|
+
* a clean plan; the merge annotator does NOT inflate severity to fill
|
|
14
|
+
* a quota.
|
|
15
|
+
*
|
|
16
|
+
* Contrast with DOC_AUDIT_FAILURE_MODES (in implementer-criteria.ts):
|
|
17
|
+
* those 11 categories are prose-INTERNAL coherence (ambiguity,
|
|
18
|
+
* contradiction, drift between sections). The 8 perspectives below are
|
|
19
|
+
* EXTERNAL coherence — does the plan match the codebase?
|
|
20
|
+
*/
|
|
21
|
+
import { type CriterionEntry } from '../criteria-types.js';
|
|
22
|
+
/** Orientation block — goes at the top of every plan-audit sub-worker
|
|
23
|
+
* prompt. Contrast with AUDIT_PURPOSE_ORIENTATION (prose-coherence
|
|
24
|
+
* audit): plan-audit sub-workers verify EXTERNAL coherence (plan vs
|
|
25
|
+
* codebase), not internal-coherence. */
|
|
26
|
+
export declare const PLAN_AUDIT_PURPOSE_ORIENTATION: string;
|
|
27
|
+
export declare const EVIDENCE_RULE_PLAN_AUDIT: string;
|
|
28
|
+
export declare const SCOPE_RULE_PLAN_AUDIT: string;
|
|
29
|
+
export declare const ANNOTATOR_AWARENESS_PLAN_AUDIT: string;
|
|
30
|
+
export declare const PLAN_AUDIT_FAILURE_MODES: string;
|
|
31
|
+
/** Parsed criterion array for the parallel-criteria fan-out. Eight
|
|
32
|
+
* sub-workers, one per verification perspective. Derived from
|
|
33
|
+
* PLAN_AUDIT_FAILURE_MODES so prose and dispatcher stay in lockstep. */
|
|
34
|
+
export declare const PLAN_AUDIT_CRITERIA: readonly CriterionEntry[];
|
|
35
|
+
//# sourceMappingURL=plan-audit-criteria.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"plan-audit-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/plan-audit-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAiB,KAAK,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAE1E;;;yCAGyC;AACzC,eAAO,MAAM,8BAA8B,QAsC/B,CAAC;AAEb,eAAO,MAAM,wBAAwB,QAOzB,CAAC;AAEb,eAAO,MAAM,qBAAqB,QAKtB,CAAC;AAEb,eAAO,MAAM,8BAA8B,QAO/B,CAAC;AAGb,eAAO,MAAM,wBAAwB,QA4CzB,CAAC;AAEb;;yEAEyE;AACzE,eAAO,MAAM,mBAAmB,EAAE,SAAS,cAAc,EAA4C,CAAC"}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Plan-audit failure-mode taxonomy (4.2.3+).
|
|
3
|
+
*
|
|
4
|
+
* Drives the parallel-criteria fan-out for `auditType: 'plan'` audits.
|
|
5
|
+
* Each numbered entry below becomes one sub-worker assigned to a single
|
|
6
|
+
* verification dimension. Sub-workers run on the existing complex tier
|
|
7
|
+
* with read-only tools (read_file / grep / glob / list_files); they
|
|
8
|
+
* derive source files to verify from the plan itself, then ground each
|
|
9
|
+
* finding in actual file-and-line evidence.
|
|
10
|
+
*
|
|
11
|
+
* A perspective emitting zero findings means "this dimension passes for
|
|
12
|
+
* this plan." That is the EXPECTED outcome for several perspectives on
|
|
13
|
+
* a clean plan; the merge annotator does NOT inflate severity to fill
|
|
14
|
+
* a quota.
|
|
15
|
+
*
|
|
16
|
+
* Contrast with DOC_AUDIT_FAILURE_MODES (in implementer-criteria.ts):
|
|
17
|
+
* those 11 categories are prose-INTERNAL coherence (ambiguity,
|
|
18
|
+
* contradiction, drift between sections). The 8 perspectives below are
|
|
19
|
+
* EXTERNAL coherence — does the plan match the codebase?
|
|
20
|
+
*/
|
|
21
|
+
import { parseCriteria } from '../criteria-types.js';
|
|
22
|
+
/** Orientation block — goes at the top of every plan-audit sub-worker
|
|
23
|
+
* prompt. Contrast with AUDIT_PURPOSE_ORIENTATION (prose-coherence
|
|
24
|
+
* audit): plan-audit sub-workers verify EXTERNAL coherence (plan vs
|
|
25
|
+
* codebase), not internal-coherence. */
|
|
26
|
+
export const PLAN_AUDIT_PURPOSE_ORIENTATION = [
|
|
27
|
+
'Why this audit exists:',
|
|
28
|
+
'You are auditing a CODE-EXECUTION PLAN against a real codebase. The plan will subsequently be dispatched to literal-following workers via mma-execute-plan; if the plan names a method, file, type, or signature that does not match the codebase as it exists today, the worker will freeze on the contradiction or produce broken code. Your job: verify every plan claim against actual codebase ground truth.',
|
|
29
|
+
'',
|
|
30
|
+
'Your job is NOT prose-quality (that is the default audit\'s job). Your job is external coherence: for every named symbol, file path, signature, or import in the plan, the codebase must contain it as described — UNLESS the plan task is the one creating it. (See "How to classify a plan\'s mention of X" below.)',
|
|
31
|
+
'',
|
|
32
|
+
'Tool surface: read_file / grep / glob / list_files. The plan itself is in your filePaths input (one file). Source files are NOT pre-listed — you derive them from the plan\'s "Files: Modify:" / "Test:" / "Create:" blocks and from import statements in code blocks, then grep / read them yourself under cwd.',
|
|
33
|
+
'',
|
|
34
|
+
'A finding without an actual file-and-line reference for the source side is speculation, not a load-bearing finding. Drop it. If a perspective has no findings to flag for this plan, that is the correct outcome — say so via the standard "No findings for this criterion." string and move on.',
|
|
35
|
+
'',
|
|
36
|
+
'— BEFORE FLAGGING — How to classify a plan\'s mention of X (REQUIRED triage step):',
|
|
37
|
+
'',
|
|
38
|
+
'Plans contain TWO different kinds of symbol mention. Confusing them is the #1 way plan-audits over-flag and produce false positives. Before any SYMBOL EXISTENCE / SIGNATURE MATCH / IMPORT GRAPH finding, classify the mention:',
|
|
39
|
+
'',
|
|
40
|
+
'**USE intent** — the plan TREATS X as already existing in the codebase. The task ASSUMES the symbol is there. Examples:',
|
|
41
|
+
' • method calls: `store.register(...)`, `obj.helper(...)`, `await provider.run(...)`',
|
|
42
|
+
' • property/field access: `config.someField`, `result.cost`, `this._ttlMs`',
|
|
43
|
+
' • import statements: `import { X } from "./bar.js"`',
|
|
44
|
+
' • type references: `function f(arg: X)`, `: Promise<X>`, `: ExistingInterface`',
|
|
45
|
+
' • test code calling production code: `expect(store.register(...))`',
|
|
46
|
+
'',
|
|
47
|
+
'**DEFINE intent** — the plan CREATES X in this task. X is the deliverable. Examples:',
|
|
48
|
+
' • function/method declarations: `function foo()`, `private foo()`, `static foo()`, `async foo()`',
|
|
49
|
+
' • class/interface/type declarations: `class Foo {}`, `interface Bar {}`, `type Q = ...`, `enum E {}`',
|
|
50
|
+
' • exported constants: `export const baz = ...`, `export function ...`',
|
|
51
|
+
' • new fields added to existing types: `interface ExistingType { newField: X }`',
|
|
52
|
+
' • new option keys on existing methods: `register(content: string, opts: { newOpt: X })`',
|
|
53
|
+
' • new test files via "Test: <path> (new)"',
|
|
54
|
+
' • new modules via "New: <path>" or "Create: <path>"',
|
|
55
|
+
'',
|
|
56
|
+
'**Verification rule by intent:**',
|
|
57
|
+
' • USE intent → the symbol MUST exist in the named source file. If grep returns no match → flag (CRITICAL, "did you mean: <nearest match>").',
|
|
58
|
+
' • DEFINE intent → the symbol MAY NOT exist yet. The task is the one adding it; that is the deliverable. **DO NOT FLAG.** This is the most common false-positive trap in plan audits.',
|
|
59
|
+
' • DEFINE intent + symbol DOES already exist in source → flag MEDIUM "task is obsolete; this deliverable already shipped — plan needs trimming."',
|
|
60
|
+
'',
|
|
61
|
+
'**Task scope = a unit.** Each `### Task X.Y:` heading + its `Files:` block + its numbered steps + their code blocks form ONE UNIT. Read the unit as a whole before flagging. Do not extract a symbol from a code block in isolation. Specifically: a `private findByContentSha(...) { … }` definition appearing inside Task A1.4\'s code block, where the task\'s `Files:` block names the implementation file as `Modify:`, is the task INTRODUCING that helper — not the task ASSUMING it already exists. Look at the task\'s intent before flagging the symbol.',
|
|
62
|
+
'',
|
|
63
|
+
'Heuristic for fast classification: if the plan\'s code block has a function/method declaration syntax ON THE SAME LINE as the symbol name, it\'s DEFINE intent. If the symbol appears as a callee, an imported name, a type annotation, or a property access, it\'s USE intent.',
|
|
64
|
+
].join('\n');
|
|
65
|
+
export const EVIDENCE_RULE_PLAN_AUDIT = [
|
|
66
|
+
'Evidence grounding (REQUIRED for every finding):',
|
|
67
|
+
'- Plan side: quote the exact line from the plan, with task ID + section reference (e.g. "Plan A1.4 step 3 line: `store.registerBlock(content, opts)`").',
|
|
68
|
+
'- Source side: file path + line number + actual content (e.g. "`packages/core/src/stores/file-backed-context-block-store.ts:113` defines `register(content, opts)` — no method named `registerBlock`").',
|
|
69
|
+
'- For SYMBOL-EXISTENCE findings: include the nearest match (Levenshtein) the worker found in the source. Example: "did you mean `register`? (Levenshtein distance 5)".',
|
|
70
|
+
'- For SIGNATURE-MATCH findings: quote BOTH the plan\'s call (with parameter names/types) AND the source\'s actual signature.',
|
|
71
|
+
'- A finding without both sides is speculation. Drop it.',
|
|
72
|
+
].join('\n');
|
|
73
|
+
export const SCOPE_RULE_PLAN_AUDIT = [
|
|
74
|
+
'Scope:',
|
|
75
|
+
'- The plan markdown (your one filePath input) plus the source files the plan directly references (in "Files: Modify:" / "Test:" / "Create:" lines, or via `import` statements in code blocks).',
|
|
76
|
+
'- Use grep / read_file targeted at the symbols and paths the plan names. DO NOT do an enumeration of the entire codebase.',
|
|
77
|
+
'- Out of scope: prose-quality issues in the plan (use auditType=default for that), suggestions for refactoring the plan\'s recommendations, and any source files the plan does not reference.',
|
|
78
|
+
].join('\n');
|
|
79
|
+
export const ANNOTATOR_AWARENESS_PLAN_AUDIT = [
|
|
80
|
+
'After your output, an annotator validates each finding against this plan-audit-specific rubric:',
|
|
81
|
+
'- Does the finding cite both plan-side and source-side evidence (with file:line for the source)?',
|
|
82
|
+
'- Is the finding categorized correctly by perspective (1 PATH / 2 SYMBOL / 3 SIGNATURE / 4 IMPORT / 5 TEST HARNESS / 6 STEP SEQUENCE / 7 CROSS-TASK / 8 VERIFY CMD)?',
|
|
83
|
+
'- Is the severity calibrated to actual dispatch impact (CRITICAL only when the task would BLOCK; HIGH for ambiguity that produces wrong artifact; MEDIUM for recoverable; LOW for cosmetic)?',
|
|
84
|
+
'- Does the finding name a specific task ID (e.g. "A1.4") so the merge annotator can group findings by task to compute per-task verdicts?',
|
|
85
|
+
'Self-check before emitting. Findings missing source-side evidence are downgraded to LOW or dropped — but findings backed by file:line citations from a real file are FULLY VALID, do NOT downgrade them as "speculation."',
|
|
86
|
+
].join('\n');
|
|
87
|
+
export const PLAN_AUDIT_FAILURE_MODES = [
|
|
88
|
+
'Plan-audit perspectives — applicable to code-execution plans being audited against a real codebase. Each perspective is one verification dimension; emitting zero findings for a perspective means "this dimension passes for this plan." Do not invent findings to hit a quota — if a perspective has nothing to flag, stay silent for that dimension. Apply each perspective end-to-end across every task in the plan. Use read_file / grep / glob / list_files to ground every finding in actual file-and-line evidence; findings without a real source-side reference are not load-bearing and must be dropped.',
|
|
89
|
+
'',
|
|
90
|
+
'1. PATH EXISTENCE — every line under a task\'s "Files:" block must resolve correctly per its label. Sub-rules: (a) "Modify: <path>" → file MUST exist on disk; missing = CRITICAL (the task cannot start). (b) "Test: <path>" or "Test: <path> (new)" → parent directory MUST exist; the test file itself may or may not exist. (c) "New: <path>" or "Create: <path>" → parent directory MUST exist AND the file MUST NOT exist (if it does, the task is obsolete — plan needs trimming, MEDIUM). Use list_files / read_file to verify. CRITICAL on missing modify-targets or missing parent dirs. MEDIUM on already-existing create-targets.',
|
|
91
|
+
'',
|
|
92
|
+
'2. SYMBOL EXISTENCE — for every method / type / class / function / imported identifier appearing inside ```ts``` or ```typescript``` code blocks under a task: FIRST classify the mention as USE or DEFINE intent (see the orientation block above). ONLY flag USE-intent mentions where grep against the named source file returns no match. DEFINE-intent mentions are the task\'s deliverable — DO NOT FLAG. Plan says `store.registerBlock(...)` (USE — method call on existing object) against `file-backed-context-block-store.ts`? grep the file; if not found, CRITICAL with nearest match. Plan defines `private findByContentSha(...) { … }` (DEFINE — declaration syntax) inside a Modify-target code block? skip — the task is creating the helper. Always include the actual nearest match (Levenshtein) on USE-intent findings so the plan can be fixed in one edit.',
|
|
93
|
+
'',
|
|
94
|
+
'3. SIGNATURE MATCH — when the plan\'s code uses a method with specific parameters or expects a specific return shape, the actual signature in the source must match. Same intent rule applies: ONLY flag for USE-intent mentions (calls or imports). When the plan DEFINES a method or extends an interface signature, that\'s the deliverable — don\'t flag. Plan calls `register(content, { ttlMs: 60_000 })` (USE) but actual signature is `register(content, opts: { id?: string })` with no `ttlMs`? CRITICAL — call would fail at build. Plan ALSO has a step adding `ttlMs?: number` to the interface (DEFINE)? skip the DEFINE; flag only the call site if the call appears BEFORE the interface-extension step within the task\'s numbered sequence (out-of-order — see perspective 6).',
|
|
95
|
+
'',
|
|
96
|
+
'4. IMPORT GRAPH — every `import { X } from \'…\'` line in code blocks must resolve under the same intent rule. Imports inside test code are USE intent (the imported symbol must exist somewhere). Imports of NEW modules the task creates (e.g. `import { sweepProjectCap } from \'./context-block-project-cap.js\'` when the same task\'s "Files: New:" block lists `context-block-project-cap.ts`) are DEFINE-adjacent — don\'t flag the import itself, but DO flag if the task forgets to also add the corresponding `exports` entry in the workspace package.json (HIGH — the build will fail).',
|
|
97
|
+
'',
|
|
98
|
+
'5. TEST HARNESS AVAILABILITY — when the plan introduces test code, every helper / factory / fixture the test USES (calls / imports) must exist at the named path. `mockProvider`, `startTestServerWithAgents`, `mkdtempSync`, `mockAdapter`, etc. — verify via grep. **However**: if the task explicitly says it adds a new option to an existing helper (e.g. "extend `startTestServerWithAgents` to accept `configOverrides`"), that\'s DEFINE intent — don\'t flag the new option, but DO flag if the test code uses the new option BEFORE the task\'s numbered step that adds it (out-of-order, perspective 6). Helper truly missing (no path, no related task) = HIGH.',
|
|
99
|
+
'',
|
|
100
|
+
'6. STEP SEQUENCE WITHIN TASK — within a single task, the numbered steps must be executable in order. Step 4 says "verify the test passes" — was step 3 the implementation that the test exercises? Step 7 references `helper()` — was it defined by an earlier step or in source? No step depends on output from a later step. Severity MEDIUM unless the dependency would actually halt execution (then HIGH).',
|
|
101
|
+
'',
|
|
102
|
+
'7. CROSS-TASK DEPENDENCIES — when task B\'s code uses something task A introduces (a method, a type, a config field), the plan\'s task ordering must reflect the dependency. Plan A1.5 calls `findByContentSha()` defined by A1.4 but A1.5 appears earlier? CRITICAL — execution would fail. Less severe: dependency exists but is undeclared in the task description (no "depends on A1.4" note). MEDIUM.',
|
|
103
|
+
'',
|
|
104
|
+
'8. VERIFICATION COMMAND VALIDITY — every "Run: <command>" / "verify" instruction in the plan must work with the project\'s actual tooling. Plan says `npm run validate-things` — does package.json have that script? Plan says `npx vitest run tests/foo.test.ts` — does that path exist (after the task\'s implementation lands)? Plan says `cargo test` in a TypeScript project? CRITICAL. Vague verification ("run the test") with no concrete command? MEDIUM — workable but the worker has to guess.',
|
|
105
|
+
'',
|
|
106
|
+
'9. TASK GRANULARITY — each task should be implementable in one focused sub-agent run. Flag tasks that are oversized for single-run execution. Concrete signals (any one or in combination): the task touches more than 3 distinct source files; the task\'s code blocks contain more than ~40 net lines of diff; the task mixes unrelated concerns (e.g. extending a config schema AND adding a new module AND wiring a CLI flag in one task heading); the task lists more than ~6 numbered steps. Severity HIGH when the task clearly exceeds standard-tier capacity (would force escalation to complex tier or thrash through review/rework rounds); MEDIUM when borderline. Suggested fix: split into atomic sub-tasks, one per file or one per concept, each with its own "Files:" block and numbered steps. The audit-fix loop iterates: author re-runs plan-audit, sees tasks downsized, re-audits until clean. This perspective is the upstream complement to perspective 6 (STEP SEQUENCE WITHIN TASK) — perspective 6 catches ordering bugs WITHIN a task; this one catches "task is too big to be one task at all."',
|
|
107
|
+
'',
|
|
108
|
+
'Severity calibration for plan audits:',
|
|
109
|
+
'- critical: plan claim contradicts codebase ground truth in a way that BLOCKS dispatch — task cannot start as written. Examples: missing modify-target file (perspective 1), wrong method name (perspective 2), wrong signature/return type (perspective 3), missing module export (perspective 4), out-of-order task dependency (perspective 7), wrong tooling (perspective 8).',
|
|
110
|
+
'- high: load-bearing ambiguity that risks wrong implementation. Plan signature is consistent with itself but multiple matching symbols exist in the source and the plan doesn\'t disambiguate. Test harness missing in the form claimed but the worker could synthesize. Step depends on later step in a recoverable but ambiguous way. The task may execute, but produces an ambiguous artifact.',
|
|
111
|
+
'- medium: step ordering issue, cross-task dependency unstated but inferable, verify command vague but recoverable, missing parent dirs for create-targets. Fixable by reordering or adding a sentence; doesn\'t block dispatch.',
|
|
112
|
+
'- low: stylistic, missing metadata, naming preference. Cosmetic.',
|
|
113
|
+
'',
|
|
114
|
+
'Per-task verdict (the merge-annotator computes this from all sub-worker findings):',
|
|
115
|
+
'- EXECUTABLE: zero CRITICAL or HIGH findings against this task across all 8 perspectives.',
|
|
116
|
+
'- PARTIAL: one or more HIGH findings, no CRITICAL. Task may execute but produces an ambiguous result.',
|
|
117
|
+
'- BLOCKED: one or more CRITICAL findings. Task cannot be dispatched as written; a literal worker would freeze.',
|
|
118
|
+
'',
|
|
119
|
+
'Output format for each finding:',
|
|
120
|
+
'- Task ID (e.g., "A1.4") that the finding affects.',
|
|
121
|
+
'- Perspective number (1-8) and name.',
|
|
122
|
+
'- Plan claim: quote the line + section reference.',
|
|
123
|
+
'- Source reality: file path + line number + actual content.',
|
|
124
|
+
'- Severity (critical / high / medium / low).',
|
|
125
|
+
'- Suggested fix: concrete edit (e.g. "rename `registerBlock` → `register` in plan A1.4 step 3", or "rename source method to match plan").',
|
|
126
|
+
'',
|
|
127
|
+
'Anti-patterns to avoid:',
|
|
128
|
+
'- Speculation without source-file evidence. If you can\'t open the file and find the line, drop the finding.',
|
|
129
|
+
'- Flagging a perspective on prose-quality grounds. That\'s the default audit\'s job, not yours. You only flag external coherence (plan vs codebase).',
|
|
130
|
+
'- Inventing findings to fill quota. Zero findings on a perspective is the correct outcome when the dimension passes.',
|
|
131
|
+
].join('\n');
|
|
132
|
+
/** Parsed criterion array for the parallel-criteria fan-out. Eight
|
|
133
|
+
* sub-workers, one per verification perspective. Derived from
|
|
134
|
+
* PLAN_AUDIT_FAILURE_MODES so prose and dispatcher stay in lockstep. */
|
|
135
|
+
export const PLAN_AUDIT_CRITERIA = parseCriteria(PLAN_AUDIT_FAILURE_MODES);
|
|
136
|
+
//# sourceMappingURL=plan-audit-criteria.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"plan-audit-criteria.js","sourceRoot":"","sources":["../../../src/tools/audit/plan-audit-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,aAAa,EAAuB,MAAM,sBAAsB,CAAC;AAE1E;;;yCAGyC;AACzC,MAAM,CAAC,MAAM,8BAA8B,GAAG;IAC5C,wBAAwB;IACxB,mZAAmZ;IACnZ,EAAE;IACF,uTAAuT;IACvT,EAAE;IACF,kTAAkT;IAClT,EAAE;IACF,kSAAkS;IAClS,EAAE;IACF,oFAAoF;IACpF,EAAE;IACF,kOAAkO;IAClO,EAAE;IACF,yHAAyH;IACzH,uFAAuF;IACvF,6EAA6E;IAC7E,uDAAuD;IACvD,kFAAkF;IAClF,sEAAsE;IACtE,EAAE;IACF,sFAAsF;IACtF,oGAAoG;IACpG,wGAAwG;IACxG,yEAAyE;IACzE,kFAAkF;IAClF,2FAA2F;IAC3F,6CAA6C;IAC7C,uDAAuD;IACvD,EAAE;IACF,kCAAkC;IAClC,+IAA+I;IAC/I,wLAAwL;IACxL,mJAAmJ;IACnJ,EAAE;IACF,oiBAAoiB;IACpiB,EAAE;IACF,iRAAiR;CAClR,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,wBAAwB,GAAG;IACtC,kDAAkD;IAClD,yJAAyJ;IACzJ,yMAAyM;IACzM,wKAAwK;IACxK,8HAA8H;IAC9H,yDAAyD;CAC1D,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,qBAAqB,GAAG;IACnC,QAAQ;IACR,gMAAgM;IAChM,2HAA2H;IAC3H,+LAA+L;CAChM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,8BAA8B,GAAG;IAC5C,iGAAiG;IACjG,kGAAkG;IAClG,sKAAsK;IACtK,8LAA8L;IAC9L,0IAA0I;IAC1I,2NAA2N;CAC5N,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAGb,MAAM,CAAC,MAAM,wBAAwB,GAAG;IACtC,qlBAAqlB;IACrlB,EAAE;IACF,+mBAA+mB;IAC/mB,EAAE;IACF,o1BAAo1B;IACp1B,EAAE;IACF,kwBAAkwB;IAClwB,EAAE;IACF,skBAAskB;IACtkB,EAAE;IACF,6oBAA6oB;IAC7oB,EAAE;IACF,iZAAiZ;IACjZ,EAAE;IACF,4YAA4Y;IAC5Y,EAAE;IACF,2eAA2e;IAC3e,EAAE;IACF,+jCAA+jC;IAC/jC,EAAE;IACF,uCAAuC;IACvC,kXAAkX;IAClX,mYAAmY;IACnY,iOAAiO;IACjO,kEAAkE;IAClE,EAAE;IACF,oFAAoF;IACpF,2FAA2F;IAC3F,uGAAuG;IACvG,gHAAgH;IAChH,EAAE;IACF,iCAAiC;IACjC,oDAAoD;IACpD,sCAAsC;IACtC,mDAAmD;IACnD,6DAA6D;IAC7D,8CAA8C;IAC9C,2IAA2I;IAC3I,EAAE;IACF,yBAAyB;IACzB,8GAA8G;IAC9G,sJAAsJ;IACtJ,sHAAsH;CACvH,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;yEAEyE;AACzE,MAAM,CAAC,MAAM,mBAAmB,GAA8B,aAAa,CAAC,wBAAwB,CAAC,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export type PlanTaskVerdict = 'EXECUTABLE' | 'PARTIAL' | 'BLOCKED';
|
|
2
|
+
export interface PlanAuditFinding {
|
|
3
|
+
taskId: string;
|
|
4
|
+
severity: 'critical' | 'high' | 'medium' | 'low';
|
|
5
|
+
}
|
|
6
|
+
export declare function derivePlanTaskVerdicts(findings: ReadonlyArray<PlanAuditFinding>): Map<string, PlanTaskVerdict>;
|
|
7
|
+
export interface PlanAuditSummary {
|
|
8
|
+
text: string;
|
|
9
|
+
executable: string[];
|
|
10
|
+
partial: string[];
|
|
11
|
+
blocked: string[];
|
|
12
|
+
nextBlocker: string | null;
|
|
13
|
+
}
|
|
14
|
+
export declare function composePlanAuditSummary(allTaskIds: ReadonlyArray<string>, verdicts: ReadonlyMap<string, PlanTaskVerdict>): PlanAuditSummary;
|
|
15
|
+
//# sourceMappingURL=plan-audit-verdict.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"plan-audit-verdict.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/plan-audit-verdict.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,eAAe,GAAG,YAAY,GAAG,SAAS,GAAG,SAAS,CAAC;AAEnE,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;CAClD;AAED,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,aAAa,CAAC,gBAAgB,CAAC,GACxC,GAAG,CAAC,MAAM,EAAE,eAAe,CAAC,CAa9B;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED,wBAAgB,uBAAuB,CACrC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,EACjC,QAAQ,EAAE,WAAW,CAAC,MAAM,EAAE,eAAe,CAAC,GAC7C,gBAAgB,CAsBlB"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
export function derivePlanTaskVerdicts(findings) {
|
|
2
|
+
const out = new Map();
|
|
3
|
+
for (const f of findings) {
|
|
4
|
+
const prior = out.get(f.taskId);
|
|
5
|
+
if (f.severity === 'critical') {
|
|
6
|
+
out.set(f.taskId, 'BLOCKED');
|
|
7
|
+
}
|
|
8
|
+
else if (f.severity === 'high') {
|
|
9
|
+
if (prior !== 'BLOCKED')
|
|
10
|
+
out.set(f.taskId, 'PARTIAL');
|
|
11
|
+
}
|
|
12
|
+
else if (!prior) {
|
|
13
|
+
out.set(f.taskId, 'EXECUTABLE');
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
return out;
|
|
17
|
+
}
|
|
18
|
+
export function composePlanAuditSummary(allTaskIds, verdicts) {
|
|
19
|
+
const executable = [];
|
|
20
|
+
const partial = [];
|
|
21
|
+
const blocked = [];
|
|
22
|
+
for (const id of allTaskIds) {
|
|
23
|
+
const v = verdicts.get(id) ?? 'EXECUTABLE';
|
|
24
|
+
if (v === 'BLOCKED')
|
|
25
|
+
blocked.push(id);
|
|
26
|
+
else if (v === 'PARTIAL')
|
|
27
|
+
partial.push(id);
|
|
28
|
+
else
|
|
29
|
+
executable.push(id);
|
|
30
|
+
}
|
|
31
|
+
const sortedBlocked = [...blocked].sort();
|
|
32
|
+
const nextBlocker = sortedBlocked[0] ?? null;
|
|
33
|
+
const lines = [
|
|
34
|
+
`${allTaskIds.length} tasks audited:`,
|
|
35
|
+
` EXECUTABLE: ${executable.length}${executable.length ? ` (${executable.join(', ')})` : ''}`,
|
|
36
|
+
` PARTIAL: ${partial.length}${partial.length ? ` (${partial.join(', ')})` : ''}`,
|
|
37
|
+
` BLOCKED: ${blocked.length}${blocked.length ? ` (${blocked.join(', ')})` : ''}`,
|
|
38
|
+
];
|
|
39
|
+
if (nextBlocker) {
|
|
40
|
+
lines.push('', `Next blocker: ${nextBlocker}`);
|
|
41
|
+
}
|
|
42
|
+
return { text: lines.join('\n'), executable, partial, blocked, nextBlocker };
|
|
43
|
+
}
|
|
44
|
+
//# sourceMappingURL=plan-audit-verdict.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"plan-audit-verdict.js","sourceRoot":"","sources":["../../../src/tools/audit/plan-audit-verdict.ts"],"names":[],"mappings":"AAOA,MAAM,UAAU,sBAAsB,CACpC,QAAyC;IAEzC,MAAM,GAAG,GAAG,IAAI,GAAG,EAA2B,CAAC;IAC/C,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;YAC9B,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QAC/B,CAAC;aAAM,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;YACjC,IAAI,KAAK,KAAK,SAAS;gBAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QACxD,CAAC;aAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YAClB,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAUD,MAAM,UAAU,uBAAuB,CACrC,UAAiC,EACjC,QAA8C;IAE9C,MAAM,UAAU,GAAa,EAAE,CAAC;IAChC,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,EAAE,IAAI,UAAU,EAAE,CAAC;QAC5B,MAAM,CAAC,GAAG,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,YAAY,CAAC;QAC3C,IAAI,CAAC,KAAK,SAAS;YAAE,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;aACjC,IAAI,CAAC,KAAK,SAAS;YAAE,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;;YACtC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC3B,CAAC;IACD,MAAM,aAAa,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1C,MAAM,WAAW,GAAG,aAAa,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IAC7C,MAAM,KAAK,GAAG;QACZ,GAAG,UAAU,CAAC,MAAM,iBAAiB;QACrC,iBAAiB,UAAU,CAAC,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QAC7F,iBAAiB,OAAO,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACpF,iBAAiB,OAAO,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;KACrF,CAAC;IACF,IAAI,WAAW,EAAE,CAAC;QAChB,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,iBAAiB,WAAW,EAAE,CAAC,CAAC;IACjD,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,UAAU,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC;AAC/E,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/schema.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAMxB,eAAO,MAAM,WAAW
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/schema.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAMxB,eAAO,MAAM,WAAW;;;;;;;;;;iBAevB,CAAC;AAEF,MAAM,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,WAAW,CAAC,CAAC;AAEhD,eAAO,MAAM,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAA8B,CAAC;AAExD,MAAM,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,YAAY,CAAC,CAAC"}
|
|
@@ -6,13 +6,16 @@ import { buildOutputEnvelopeSchema } from '../shared-output.js';
|
|
|
6
6
|
// cross-package coupling.
|
|
7
7
|
export const inputSchema = z.object({
|
|
8
8
|
document: z.string().optional().describe('Inline document content to audit'),
|
|
9
|
-
auditType: z.enum(['default', 'security', 'performance'])
|
|
9
|
+
auditType: z.enum(['default', 'security', 'performance', 'plan'])
|
|
10
10
|
.default('default')
|
|
11
|
-
.describe('Audit focus. `default` is the comprehensive sweep — recommended for specs,
|
|
11
|
+
.describe('Audit focus. `default` is the comprehensive prose-coherence sweep — recommended for specs, designs, recommendation docs, post-mortems (the requirement / what-we-want-to-do prose). `security` / `performance` narrow the lens to that one dimension (threat models, scaling designs). `plan` is for code-execution PLANS being audited against a real codebase: pass the plan file as the single filePaths entry; workers grep the codebase under cwd to verify every named symbol / path / signature / import. Use `default` to check spec quality; use `plan` to check whether a plan can actually dispatch.'),
|
|
12
12
|
filePaths: z.array(z.string()).optional()
|
|
13
|
-
.describe('Files the sub-agent should focus on. Multiple files are processed in parallel.'),
|
|
13
|
+
.describe('Files the sub-agent should focus on. Multiple files are processed in parallel. For auditType=plan, MUST contain exactly one entry — the plan markdown.'),
|
|
14
14
|
contextBlockIds: z.array(z.string()).optional()
|
|
15
15
|
.describe('IDs from register_context_block to prepend to prompt. Use for delta audits, diff-scoped reviews, or shared specs.'),
|
|
16
|
+
}).refine((input) => input.auditType !== 'plan' || (Array.isArray(input.filePaths) && input.filePaths.length === 1), {
|
|
17
|
+
message: "Plan audit takes exactly one filePath (the plan markdown). The worker discovers and verifies source files itself via its tool surface — do not pre-list source files.",
|
|
18
|
+
path: ['filePaths'],
|
|
16
19
|
});
|
|
17
20
|
export const outputSchema = buildOutputEnvelopeSchema();
|
|
18
21
|
//# sourceMappingURL=schema.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../../src/tools/audit/schema.ts"],"names":[],"mappings":"AAAA,0CAA0C;AAC1C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,uFAAuF;AACvF,2EAA2E;AAC3E,0BAA0B;AAC1B,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;IAC5E,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,UAAU,EAAE,aAAa,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../../src/tools/audit/schema.ts"],"names":[],"mappings":"AAAA,0CAA0C;AAC1C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,uFAAuF;AACvF,2EAA2E;AAC3E,0BAA0B;AAC1B,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;IAC5E,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,CAAC,CAAC;SAC9D,OAAO,CAAC,SAAS,CAAC;SAClB,QAAQ,CAAC,ilBAAilB,CAAC;IAC9lB,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;SACtC,QAAQ,CAAC,wJAAwJ,CAAC;IACrK,eAAe,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;SAC5C,QAAQ,CAAC,mHAAmH,CAAC;CACjI,CAAC,CAAC,MAAM,CACP,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,SAAS,KAAK,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,KAAK,CAAC,CAAC,EACzG;IACE,OAAO,EAAE,uKAAuK;IAChL,IAAI,EAAE,CAAC,WAAW,CAAC;CACpB,CACF,CAAC;AAIF,MAAM,CAAC,MAAM,YAAY,GAAG,yBAAyB,EAAE,CAAC"}
|
|
@@ -11,6 +11,9 @@ export interface ToolAuditBrief {
|
|
|
11
11
|
hasContextBlocks: boolean;
|
|
12
12
|
contextBlockIds?: string[];
|
|
13
13
|
perFilePath?: string;
|
|
14
|
+
/** A12: pass-through of input.auditType so buildTaskSpec can stamp it
|
|
15
|
+
* onto the TaskSpec for the dispatcher to read. */
|
|
16
|
+
auditType?: 'default' | 'security' | 'performance' | 'plan';
|
|
14
17
|
}
|
|
15
18
|
export declare function auditBriefSlot(input: Input): ToolAuditBrief[];
|
|
16
19
|
export declare const toolConfig: ToolConfig<Input, ToolAuditBrief, AuditReport>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAClF,OAAO,EAAe,KAAK,KAAK,EAAE,MAAM,aAAa,CAAC;AAEtD,OAAO,EAAqB,KAAK,WAAW,EAAE,MAAM,qDAAqD,CAAC;AAE1G,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AAavE,wBAAgB,aAAa,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CAYjE;AAID,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,MAAM,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,gBAAgB,EAAE,OAAO,CAAC;IAC1B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/audit/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAClF,OAAO,EAAe,KAAK,KAAK,EAAE,MAAM,aAAa,CAAC;AAEtD,OAAO,EAAqB,KAAK,WAAW,EAAE,MAAM,qDAAqD,CAAC;AAE1G,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AAavE,wBAAgB,aAAa,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CAYjE;AAID,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,MAAM,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,gBAAgB,EAAE,OAAO,CAAC;IAC1B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;wDACoD;IACpD,SAAS,CAAC,EAAE,SAAS,GAAG,UAAU,GAAG,aAAa,GAAG,MAAM,CAAC;CAC7D;AA+CD,wBAAgB,cAAc,CAAC,KAAK,EAAE,KAAK,GAAG,cAAc,EAAE,CA4B7D;AAgFD,eAAO,MAAM,UAAU,EAAE,UAAU,CAAC,KAAK,EAAE,cAAc,EAAE,WAAW,CA0CrE,CAAC"}
|
|
@@ -35,6 +35,7 @@ const AUDIT_DONE_CONDITIONS = {
|
|
|
35
35
|
default: 'Comprehensive audit. Apply the full failure-mode taxonomy through the executability lens (the orientation block above). For prose artifacts (specs, plans, recommendation docs, designs, post-mortems, audits, briefs): emphasize RECOMMENDATION-COHERENCE, INTERNAL CONTRADICTION, ARGUMENT SOUNDNESS, COMPLETENESS AGAINST CONSTRAINTS, FIX ACTIONABILITY, DRIFT, and SCOPE-CREEP — i.e., would a literal-following worker who reads this artifact and follows it without judgment produce the right outcome? Are sections internally consistent? Does each recommendation actually solve its stated problem given the doc\'s own constraints? Sweep style/clarity issues only when they would cause a worker to misinterpret. For source code: logic errors, contract violations, off-by-one bugs, type mismatches, unhandled edge cases. Each finding has severity (critical/high/medium/low), location, and remediation.',
|
|
36
36
|
security: 'Narrow lens: security ONLY. Use this only when the caller specifically wants security findings and not general audit findings. For prose artifacts (threat models, security designs, auth specs): identify missing controls, ambiguous trust boundaries, undeclared attack surfaces, leaked-secret patterns in examples, recommendations that introduce new attack surface without mitigation, and threat-model gaps. For source code: injection, auth bypass, data exposure, OWASP top 10. Apply the full failure-mode taxonomy through the security lens. Skip non-security findings. Each finding has severity, location, and remediation.',
|
|
37
37
|
performance: 'Narrow lens: performance ONLY. Use this only when the caller specifically wants performance findings and not general audit findings. For prose artifacts (designs, scaling plans, latency-sensitive specs): identify unstated complexity, missing hot-path consideration, unbounded loops in proposed designs, omitted scaling story, recommendations that mandate work that does not scale, and missing latency/throughput targets. For source code: O(n²) loops, unnecessary allocations, missing caching, blocking I/O. Apply the full failure-mode taxonomy through the performance lens. Skip non-performance findings. Each finding has impact level, location, and fix recommendation.',
|
|
38
|
+
plan: 'PLAN-VS-CODEBASE EXECUTABILITY AUDIT. The single filePath you receive is a code-execution plan; the source files you verify against live under cwd and you discover them yourself by reading the plan\'s "Files: Modify:" / "Test:" / "Create:" blocks and `import` statements in code blocks. Apply the 8 verification perspectives (PATH EXISTENCE, SYMBOL EXISTENCE, SIGNATURE MATCH, IMPORT GRAPH, TEST HARNESS AVAILABILITY, STEP SEQUENCE WITHIN TASK, CROSS-TASK DEPENDENCIES, VERIFICATION COMMAND VALIDITY). For each task in the plan, the merge annotator computes a verdict: EXECUTABLE / PARTIAL / BLOCKED. Use read_file / grep / glob / list_files to ground every finding in real file:line evidence. Findings without source-side citations are speculation — drop them. Zero findings on a perspective is the EXPECTED outcome on a clean plan; do not invent findings to fill quota.',
|
|
38
39
|
};
|
|
39
40
|
const DELTA_AUDIT_SUFFIX = ' Perform a full audit (do not reduce thoroughness). Verify each prior finding as fixed or unfixed. Omit fixed prior findings from the main report. Include unfixed prior findings and new findings. End with a summary of which prior findings were resolved.';
|
|
40
41
|
function resolveAuditTypeText(auditType) {
|
|
@@ -68,6 +69,7 @@ export function auditBriefSlot(input) {
|
|
|
68
69
|
hasContextBlocks,
|
|
69
70
|
contextBlockIds: input.contextBlockIds,
|
|
70
71
|
perFilePath: fp,
|
|
72
|
+
auditType: input.auditType,
|
|
71
73
|
}));
|
|
72
74
|
}
|
|
73
75
|
return [{
|
|
@@ -77,6 +79,7 @@ export function auditBriefSlot(input) {
|
|
|
77
79
|
filePaths: validPaths,
|
|
78
80
|
hasContextBlocks,
|
|
79
81
|
contextBlockIds: input.contextBlockIds,
|
|
82
|
+
auditType: input.auditType,
|
|
80
83
|
}];
|
|
81
84
|
}
|
|
82
85
|
const FINDING_FORMAT_INSTRUCTIONS = [
|
|
@@ -185,6 +188,11 @@ export const toolConfig = {
|
|
|
185
188
|
contextBlockIds: brief.contextBlockIds,
|
|
186
189
|
filePaths: brief.filePaths.length > 0 ? brief.filePaths : undefined,
|
|
187
190
|
mainModel: ctx.mainModel,
|
|
191
|
+
// A12 (4.2.3+): plumb auditType to the dispatcher. The
|
|
192
|
+
// parallel-criteria router branches on `task.auditType === 'plan'`
|
|
193
|
+
// to use the plan-audit route spec instead of the default audit
|
|
194
|
+
// spec (different criteria, orientation, severity semantics).
|
|
195
|
+
auditType: brief.auditType,
|
|
188
196
|
};
|
|
189
197
|
},
|
|
190
198
|
reportSchema: auditReportSchema,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/audit/tool-config.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAc,MAAM,aAAa,CAAC;AACtD,OAAO,EAAE,oBAAoB,EAAE,MAAM,gDAAgD,CAAC;AACtF,OAAO,EAAE,iBAAiB,EAAoB,MAAM,qDAAqD,CAAC;AAC1G,OAAO,EAAE,qBAAqB,EAAE,MAAM,6CAA6C,CAAC;AAGpF,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,4CAA4C,CAAC;AAC7E,OAAO,EACL,yBAAyB,EACzB,mBAAmB,EACnB,gBAAgB,EAChB,yBAAyB,EACzB,uBAAuB,EACvB,2BAA2B,GAC5B,MAAM,2BAA2B,CAAC;AAEnC,MAAM,UAAU,aAAa,CAAC,QAA6B;IACzD,QAAQ,CAAC,QAAQ,CAAC;QAChB,SAAS,EAAE,OAAO;QAClB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,WAAW;QACnB,YAAY,EAAE,WAAW;QACzB,gBAAgB,EAAE,SAAS;QAC3B,oBAAoB,EAAE,KAAK;QAC3B,iBAAiB,EAAE,eAAe;KACnC,CAAC,CAAC;AACL,CAAC;
|
|
1
|
+
{"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/audit/tool-config.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAc,MAAM,aAAa,CAAC;AACtD,OAAO,EAAE,oBAAoB,EAAE,MAAM,gDAAgD,CAAC;AACtF,OAAO,EAAE,iBAAiB,EAAoB,MAAM,qDAAqD,CAAC;AAC1G,OAAO,EAAE,qBAAqB,EAAE,MAAM,6CAA6C,CAAC;AAGpF,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,4CAA4C,CAAC;AAC7E,OAAO,EACL,yBAAyB,EACzB,mBAAmB,EACnB,gBAAgB,EAChB,yBAAyB,EACzB,uBAAuB,EACvB,2BAA2B,GAC5B,MAAM,2BAA2B,CAAC;AAEnC,MAAM,UAAU,aAAa,CAAC,QAA6B;IACzD,QAAQ,CAAC,QAAQ,CAAC;QAChB,SAAS,EAAE,OAAO;QAClB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,WAAW;QACnB,YAAY,EAAE,WAAW;QACzB,gBAAgB,EAAE,SAAS;QAC3B,oBAAoB,EAAE,KAAK;QAC3B,iBAAiB,EAAE,eAAe;KACnC,CAAC,CAAC;AACL,CAAC;AAiBD;;;;;;;;;;;;GAYG;AACH,MAAM,qBAAqB,GAA2B;IACpD,OAAO,EACL,+3BAA+3B;IACj4B,QAAQ,EACN,+mBAA+mB;IACjnB,WAAW,EACT,+pBAA+pB;IACjqB,IAAI,EACF,y2BAAy2B;CAC52B,CAAC;AAEF,MAAM,kBAAkB,GAAG,+PAA+P,CAAC;AAE3R,SAAS,oBAAoB,CAAC,SAAyC;IACrE,sEAAsE;IACtE,oEAAoE;IACpE,kEAAkE;IAClE,MAAM,CAAC,GAAG,SAAS,IAAI,SAAS,CAAC;IACjC,IAAI,CAAC,KAAK,SAAS;QAAE,OAAO,qGAAqG,CAAC;IAClI,OAAO,WAAW,CAAC,QAAQ,CAAC;AAC9B,CAAC;AAED,SAAS,oBAAoB,CAAC,SAAyC,EAAE,gBAAyB;IAChG,MAAM,CAAC,GAAG,SAAS,IAAI,SAAS,CAAC;IACjC,MAAM,IAAI,GAAG,qBAAqB,CAAC,CAAC,CAAC,IAAI,qBAAqB,CAAC,OAAO,CAAC;IACvE,OAAO,gBAAgB,CAAC,CAAC,CAAC,IAAI,GAAG,kBAAkB,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7D,CAAC;AAED,SAAS,UAAU,CAAC,KAAyB;IAC3C,OAAO,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;AACxD,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAY;IACzC,MAAM,gBAAgB,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC;IAClG,MAAM,aAAa,GAAG,oBAAoB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAC5D,MAAM,IAAI,GAAG,oBAAoB,CAAC,KAAK,CAAC,SAAS,EAAE,gBAAgB,CAAC,CAAC;IACrE,MAAM,UAAU,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE5E,0DAA0D;IAC1D,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,UAAU,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC1D,OAAO,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;YAC3B,aAAa;YACb,IAAI;YACJ,SAAS,EAAE,CAAC,EAAE,CAAC;YACf,gBAAgB;YAChB,eAAe,EAAE,KAAK,CAAC,eAAe;YACtC,WAAW,EAAE,EAAE;YACf,SAAS,EAAE,KAAK,CAAC,SAAS;SAC3B,CAAC,CAAC,CAAC;IACN,CAAC;IAED,OAAO,CAAC;YACN,aAAa;YACb,IAAI;YACJ,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,SAAS,EAAE,UAAU;YACrB,gBAAgB;YAChB,eAAe,EAAE,KAAK,CAAC,eAAe;YACtC,SAAS,EAAE,KAAK,CAAC,SAAS;SAC3B,CAAC,CAAC;AACL,CAAC;AAED,MAAM,2BAA2B,GAAG;IAClC,mEAAmE;IACnE,qEAAqE;IACrE,sEAAsE;IACtE,uEAAuE;IACvE,yBAAyB;IACzB,EAAE;IACF,kKAAkK;IAClK,EAAE;IACF,gCAAgC;IAChC,4CAA4C;IAC5C,yCAAyC;IACzC,oCAAoC;IACpC,2CAA2C;IAC3C,EAAE;IACF,gCAAgC;IAChC,iBAAiB;IACjB,OAAO;IACP,EAAE;IACF,QAAQ;IACR,8HAA8H;IAC9H,6GAA6G;IAC7G,mGAAmG;IACnG,EAAE;IACF,oEAAoE;IACpE,+DAA+D;IAC/D,6DAA6D;IAC7D,eAAe;IACf,EAAE;IACF,0EAA0E;IAC1E,wEAAwE;IACxE,wEAAwE;IACxE,kDAAkD;IAClD,uBAAuB;IACvB,EAAE;IACF,qEAAqE;IACrE,mEAAmE;IACnE,2BAA2B;IAC3B,EAAE;IACF,mBAAmB;IACnB,EAAE;IACF,gBAAgB;IAChB,EAAE;IACF,yBAAyB;CAC1B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,wBAAwB,GAAG;IAC/B,qGAAqG;IACrG,6KAA6K;CAC9K,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,SAAS,oBAAoB,CAAC,SAAmB;IAC/C,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACtC,OAAO,kCAAkC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;AACrF,CAAC;AAED,SAAS,WAAW,CAAC,KAAqB;IACxC,MAAM,KAAK,GAAa,CAAC,aAAa,KAAK,CAAC,aAAa,UAAU,CAAC,CAAC;IAErE,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC,kCAAkC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;IACpE,CAAC;SAAM,CAAC;QACN,IAAI,KAAK,CAAC,QAAQ;YAAE,KAAK,CAAC,IAAI,CAAC,gBAAgB,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;QACjE,MAAM,WAAW,GAAG,oBAAoB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAC1D,IAAI,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC3C,CAAC;IAED,mEAAmE;IACnE,qEAAqE;IACrE,gEAAgE;IAChE,IAAI,KAAK,CAAC,gBAAgB,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;IACvC,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAExC,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAmD;IACxE,IAAI,EAAE,OAAO;IACb,QAAQ,EAAE,WAAW;IACrB,SAAS,EAAE,SAAS;IACpB,SAAS,EAAE,cAAc;IACzB,aAAa,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;QAC5B,oEAAoE;QACpE,iEAAiE;QACjE,iEAAiE;QACjE,gCAAgC;QAChC,MAAM,WAAW,GAAa,CAAC,aAAa,KAAK,CAAC,aAAa,UAAU,CAAC,CAAC;QAC3E,IAAI,KAAK,CAAC,QAAQ;YAAE,WAAW,CAAC,IAAI,CAAC,gBAAgB,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;QACvE,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,WAAW,CAAC,IAAI,CAAC,kBAAkB,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACtF,CAAC;QACD,OAAO;YACL,MAAM,EAAE,WAAW,CAAC,KAAK,CAAC;YAC1B,cAAc,EAAE,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC;YACxC,SAAS,EAAE,SAAS;YACpB,YAAY,EAAE,cAAc;YAC5B,kBAAkB,EAAE,KAAK;YACzB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;YAC3C,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;YACpE,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;YACjD,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;YAC/D,GAAG,EAAE,GAAG,CAAC,cAAc,EAAE,GAAG,IAAI,GAAG,CAAC,GAAG;YACvC,eAAe,EAAE,KAAK,CAAC,eAAe;YACtC,SAAS,EAAE,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;YACnE,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,uDAAuD;YACvD,mEAAmE;YACnE,gEAAgE;YAChE,8DAA8D;YAC9D,SAAS,EAAE,KAAK,CAAC,SAAS;SACf,CAAC;IAChB,CAAC;IACD,YAAY,EAAE,iBAAiB;IAC/B,gBAAgB,EAAE,qBAAqB;IACvC,eAAe,EAAE;QACf,SAAS,EAAE,oBAAoB;KAChC;CACF,CAAC"}
|
|
@@ -1,62 +1,46 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Delegate
|
|
2
|
+
* Delegate worker criteria — 4.3.0 pipeline-redesign mindset.
|
|
3
3
|
*
|
|
4
|
-
* DELEGATE'S PURPOSE — read this before adding categories.
|
|
5
4
|
* mma-delegate is the generic dispatcher for ad-hoc implementation
|
|
6
|
-
* tasks.
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
* spec + quality + diff review applies. The spec the spec-reviewer
|
|
23
|
-
* checks against is the BRIEF (prompt + done), not your interpretation
|
|
24
|
-
* of it. The quality-reviewer checks safety / correctness / style.
|
|
5
|
+
* tasks. Caller hands you a `prompt` (and optionally a `done` acceptance
|
|
6
|
+
* criterion, `filePaths`, `verifyCommand`); your output is a diff.
|
|
7
|
+
*
|
|
8
|
+
* Pipeline mindset (different from earlier versions):
|
|
9
|
+
* - This is a SINGLE-PASS pipeline. There are NO rework rounds for you.
|
|
10
|
+
* - After your turn, a SPEC reviewer (complex tier, full editor tools)
|
|
11
|
+
* runs ONCE — it doesn't ask you to fix; it fixes inline itself.
|
|
12
|
+
* - Then a QUALITY reviewer (complex tier, full editor tools) runs ONCE
|
|
13
|
+
* for safety/correctness — same thing: fixes inline, doesn't ask you.
|
|
14
|
+
* - Then an annotator scores overall completion and the commit gate fires
|
|
15
|
+
* if the score is high enough.
|
|
16
|
+
*
|
|
17
|
+
* What this means for you: do your best ONE pass. You don't need to
|
|
18
|
+
* second-guess minor things — the reviewer will catch and fix them.
|
|
19
|
+
* Don't over-think; don't restart-loop; don't bail on uncertainty. The
|
|
20
|
+
* pipeline has a safety net BUT only one round of it.
|
|
25
21
|
*/
|
|
26
22
|
/**
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
* Without an explicit orientation, workers default to "implement
|
|
30
|
-
* something good" — which produces over-implementation (SCOPE CREEP)
|
|
31
|
-
* and under-implementation (SILENT PARTIAL FIX). With this orientation,
|
|
32
|
-
* the worker calibrates against the reviewer's standard: minimal +
|
|
33
|
-
* complete, the brief is the contract.
|
|
23
|
+
* Orientation — "smallest complete change" framing.
|
|
34
24
|
*/
|
|
35
25
|
export declare const DELEGATE_PURPOSE_ORIENTATION: string;
|
|
36
|
-
/**
|
|
37
|
-
* The scope rule for delegate.
|
|
38
|
-
*
|
|
39
|
-
* Replaces the prior one-liner with a concrete contract about what
|
|
40
|
-
* is in scope, what is off-limits, and what to do at the boundary.
|
|
41
|
-
*/
|
|
42
26
|
export declare const DELEGATE_SCOPE_RULE: string;
|
|
43
27
|
/**
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
*
|
|
47
|
-
* (
|
|
48
|
-
* below are the specific patterns reviewers raise as merge-blockers.
|
|
28
|
+
* Top-4 failure modes — calibrated from observed reviewer rejections.
|
|
29
|
+
* Dropped from the original 9: WRONG FILE TARGET (subsumed by scope
|
|
30
|
+
* rule), CROSS-CUTTING DAMAGE, CONVENTION DRIFT, SPEC OVERREACH,
|
|
31
|
+
* UNDOCUMENTED ASSUMPTION (low signal, high noise for cheap models).
|
|
49
32
|
*/
|
|
50
33
|
export declare const DELEGATE_FAILURE_MODES: string;
|
|
51
34
|
/**
|
|
52
|
-
* Completeness reminder.
|
|
53
|
-
*
|
|
54
|
-
* The shared SEVERITY_LADDER does not apply to write tools. The
|
|
55
|
-
* counter-balance for delegate is opposite to read-only tools: the
|
|
56
|
-
* typical failure is OVER-IMPLEMENTATION (scope creep) and UNDER-
|
|
57
|
-
* IMPLEMENTATION (silent partial fix), often in the same task. This
|
|
58
|
-
* block tells the worker the load-bearing constraint is "minimal AND
|
|
59
|
-
* complete simultaneously".
|
|
35
|
+
* Completeness reminder — brief-vs-diff walk only. Worked example
|
|
36
|
+
* dropped (cheap models can apply the rule directly without it).
|
|
60
37
|
*/
|
|
61
38
|
export declare const COMPLETENESS_REMINDER_DELEGATE: string;
|
|
39
|
+
/**
|
|
40
|
+
* Turn budget — calibration block. Same rationale as execute-plan's:
|
|
41
|
+
* cheap models default to "be thorough" and treat each turn as
|
|
42
|
+
* "re-verify by re-reading", which becomes a discovery loop. This
|
|
43
|
+
* block tells them to trust prior reads and edit confidently.
|
|
44
|
+
*/
|
|
45
|
+
export declare const TURN_BUDGET_DELEGATE: string;
|
|
62
46
|
//# sourceMappingURL=implementer-criteria.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/delegate/implementer-criteria.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/delegate/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH;;GAEG;AACH,eAAO,MAAM,4BAA4B,QAW7B,CAAC;AAEb,eAAO,MAAM,mBAAmB,QAMpB,CAAC;AAEb;;;;;GAKG;AACH,eAAO,MAAM,sBAAsB,QAOvB,CAAC;AAEb;;;GAGG;AACH,eAAO,MAAM,8BAA8B,QAU/B,CAAC;AAEb;;;;;GAKG;AACH,eAAO,MAAM,oBAAoB,QAMrB,CAAC"}
|