@mmerterden/multi-agent-pipeline 10.7.3 → 10.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +73 -2
- package/docs/adr/0001-three-model-triage.md +2 -2
- package/docs/adr/0007-multi-tool-adapter-framework.md +1 -1
- package/docs/adr/README.md +2 -2
- package/docs/architecture.md +14 -14
- package/docs/features.md +35 -22
- package/docs/performance.md +3 -3
- package/index.js +3 -7
- package/install/templates/copilot-instructions.md +2 -2
- package/package.json +2 -5
- package/pipeline/agents/dev-critic.md +1 -1
- package/pipeline/claude-md-template.md +1 -1
- package/pipeline/commands/multi-agent/dev-autopilot.md +1 -1
- package/pipeline/commands/multi-agent/finish.md +2 -2
- package/pipeline/commands/multi-agent/help.md +12 -12
- package/pipeline/commands/multi-agent/local.md +1 -1
- package/pipeline/commands/multi-agent/refs/features/dev-critic.md +1 -1
- package/pipeline/commands/multi-agent/refs/features/model-fallback.md +7 -3
- package/pipeline/commands/multi-agent/refs/features/verify-by-test.md +41 -0
- package/pipeline/commands/multi-agent/refs/knowledge.md +1 -1
- package/pipeline/commands/multi-agent/refs/phases/log-format.md +11 -1
- package/pipeline/commands/multi-agent/refs/phases/modes.md +1 -1
- package/pipeline/commands/multi-agent/refs/phases/operations.md +15 -2
- package/pipeline/commands/multi-agent/refs/phases/phase-1-analysis.md +2 -2
- package/pipeline/commands/multi-agent/refs/phases/phase-2-planning.md +2 -2
- package/pipeline/commands/multi-agent/refs/phases/phase-3-dev.md +3 -1
- package/pipeline/commands/multi-agent/refs/phases/phase-4-review.md +51 -19
- package/pipeline/commands/multi-agent/refs/progress-contract.md +1 -1
- package/pipeline/commands/multi-agent/refs/rules.md +1 -0
- package/pipeline/commands/multi-agent/refs/tracker-contract.md +1 -2
- package/pipeline/commands/multi-agent/resume.md +7 -4
- package/pipeline/commands/multi-agent/review.md +41 -9
- package/pipeline/commands/multi-agent/sync.md +3 -3
- package/pipeline/commands/multi-agent.md +7 -7
- package/pipeline/schemas/agent-state.schema.json +1 -1
- package/pipeline/schemas/diff-risk.schema.json +5 -4
- package/pipeline/schemas/prefs.schema.json +38 -3
- package/pipeline/schemas/reviewer-output.schema.json +1 -1
- package/pipeline/schemas/triage-output.schema.json +37 -6
- package/pipeline/scripts/README.md +3 -2
- package/pipeline/scripts/cost-budget-check.mjs +1 -1
- package/pipeline/scripts/cost-table.json +7 -0
- package/pipeline/scripts/diff-risk-score.mjs +11 -1
- package/pipeline/scripts/fixtures/diff-risk-test-removal.diff +40 -0
- package/pipeline/scripts/fixtures/install-layout.tsv +5 -5
- package/pipeline/scripts/smoke-diff-risk.sh +30 -1
- package/pipeline/scripts/smoke-handoff-contract.sh +92 -0
- package/pipeline/scripts/smoke-verify-by-test.sh +148 -0
- package/pipeline/scripts/uninstall.mjs +53 -57
- package/pipeline/scripts/validate-diff-risk.mjs +2 -1
- package/pipeline/scripts/validate-triage.mjs +31 -2
- package/pipeline/skills/shared/core/multi-agent/SKILL.md +11 -11
- package/pipeline/skills/shared/core/multi-agent-dev-autopilot/SKILL.md +1 -1
- package/pipeline/skills/shared/core/multi-agent-finish/SKILL.md +1 -1
- package/pipeline/skills/shared/core/multi-agent-help/SKILL.md +8 -8
- package/pipeline/skills/shared/core/multi-agent-review/SKILL.md +5 -5
- package/pipeline/skills/shared/core/multi-agent-sync/SKILL.md +7 -5
- package/pipeline/scripts/smoke-readme-counts.sh +0 -120
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
3
|
"$id": "https://github.com/mmerterden/multi-agent-pipeline/pipeline/schemas/diff-risk.schema.json",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.1.0",
|
|
5
5
|
"title": "Multi-Agent Pipeline - Phase 4 diff risk score",
|
|
6
|
-
"description": "Output contract for diff-risk-score.mjs. Heuristic, deterministic, no LLM. Produced before Phase 4 Step 2 to give reviewer prompts a priority ordering - never used as a gate.",
|
|
6
|
+
"description": "Output contract for diff-risk-score.mjs. Heuristic, deterministic, no LLM. Produced before Phase 4 Step 2 to give reviewer prompts a priority ordering - never used as a gate. v1.1.0 adds the test_lines_removed signal (immutable-test backstop: a test file whose diff removes more lines than it adds).",
|
|
7
7
|
"type": "object",
|
|
8
8
|
"additionalProperties": false,
|
|
9
9
|
"required": ["schemaVersion", "task", "totals", "files"],
|
|
10
10
|
"properties": {
|
|
11
11
|
"schemaVersion": {
|
|
12
12
|
"type": "string",
|
|
13
|
-
"const": "1.
|
|
13
|
+
"const": "1.1.0"
|
|
14
14
|
},
|
|
15
15
|
"task": {
|
|
16
16
|
"type": "object",
|
|
@@ -63,7 +63,8 @@
|
|
|
63
63
|
"no_test_change",
|
|
64
64
|
"complexity_delta",
|
|
65
65
|
"ui_critical",
|
|
66
|
-
"migration"
|
|
66
|
+
"migration",
|
|
67
|
+
"test_lines_removed"
|
|
67
68
|
]
|
|
68
69
|
},
|
|
69
70
|
"weight": { "type": "number" },
|
|
@@ -701,6 +701,41 @@
|
|
|
701
701
|
"default": false,
|
|
702
702
|
"description": "v6.1.0+ \u2014 Phase 4 Step 2.5 rebuttal round. When reviewers disagree (mixed blocker/approved verdict), each reviewer is re-prompted with the others' opposing arguments for one additional round before triage. Lifts signal quality on ambiguous findings at ~1\u00d7 Step 2 token cost. Off by default \u2014 flip for security-critical or release-branch reviews."
|
|
703
703
|
},
|
|
704
|
+
"verifyByTest": {
|
|
705
|
+
"type": "object",
|
|
706
|
+
"additionalProperties": false,
|
|
707
|
+
"description": "v10.8+ - Phase 4 Step 3.7 verify-by-test. When enabled, accepted BLOCKING findings are empirically validated before the Phase 3 rework loop: one verifier agent writes a minimal repro test per finding and runs only that test. Confirmed findings hand their failing test to Phase 3 as the RED step; non-reproducible findings are downgraded to deferred under evidence-gate. Only blocking findings are ever verified (fixed behavior, not a knob). Adds one model call plus up to maxFindings single-test runs per iteration with accepted blockers; default off. Flip on for security-critical work, release branches, or repos with noisy reviewers. Full spec: refs/features/verify-by-test.md.",
|
|
708
|
+
"properties": {
|
|
709
|
+
"enabled": {
|
|
710
|
+
"type": "boolean",
|
|
711
|
+
"default": false,
|
|
712
|
+
"description": "Master switch."
|
|
713
|
+
},
|
|
714
|
+
"maxFindings": {
|
|
715
|
+
"type": "integer",
|
|
716
|
+
"minimum": 1,
|
|
717
|
+
"maximum": 10,
|
|
718
|
+
"default": 3,
|
|
719
|
+
"description": "Max accepted blocking findings verified per review iteration. Findings beyond the cap keep their judgment-only verdict."
|
|
720
|
+
},
|
|
721
|
+
"model": {
|
|
722
|
+
"type": "string",
|
|
723
|
+
"enum": [
|
|
724
|
+
"sonnet",
|
|
725
|
+
"opus"
|
|
726
|
+
],
|
|
727
|
+
"default": "sonnet",
|
|
728
|
+
"description": "Verifier agent model. Writing a minimal repro test is mechanical work; Sonnet is the cost-sane default."
|
|
729
|
+
},
|
|
730
|
+
"stepTimeoutSec": {
|
|
731
|
+
"type": "integer",
|
|
732
|
+
"minimum": 60,
|
|
733
|
+
"maximum": 1800,
|
|
734
|
+
"default": 600,
|
|
735
|
+
"description": "Wall-clock budget for the whole Step 3.7 pass. On breach, remaining findings keep judgment-only verdicts and the pipeline proceeds (never blocks)."
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
},
|
|
704
739
|
"review": {
|
|
705
740
|
"type": "object",
|
|
706
741
|
"additionalProperties": false,
|
|
@@ -831,9 +866,9 @@
|
|
|
831
866
|
},
|
|
832
867
|
"pricingModel": {
|
|
833
868
|
"type": "string",
|
|
834
|
-
"enum": ["opus", "sonnet", "haiku"],
|
|
835
|
-
"default": "
|
|
836
|
-
"description": "Which cost-table.json rate to price accumulated tokens at. Defaults to
|
|
869
|
+
"enum": ["fable", "opus", "sonnet", "haiku"],
|
|
870
|
+
"default": "fable",
|
|
871
|
+
"description": "Which cost-table.json rate to price accumulated tokens at. Defaults to fable (the top tier since v10.6.0) for a deliberately conservative (upper-bound) estimate, so the ceiling trips early rather than late."
|
|
837
872
|
}
|
|
838
873
|
}
|
|
839
874
|
},
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
},
|
|
20
20
|
"reviewer": {
|
|
21
21
|
"type": "string",
|
|
22
|
-
"description": "Model label for this output (e.g. 'opus', 'sonnet', 'gpt'). Present once the parallel reviewer outputs are merged into the Phase 4 array so triage/consensus can attribute each finding to its source. Optional on a single reviewer's raw pre-merge output."
|
|
22
|
+
"description": "Model label for this output (e.g. 'fable', 'opus', 'sonnet', 'gpt'). Present once the parallel reviewer outputs are merged into the Phase 4 array so triage/consensus can attribute each finding to its source. Optional on a single reviewer's raw pre-merge output."
|
|
23
23
|
}
|
|
24
24
|
},
|
|
25
25
|
"$defs": {
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
3
|
"$id": "https://github.com/mmerterden/multi-agent-pipeline/pipeline/schemas/triage-output.schema.json",
|
|
4
|
-
"version": "3.
|
|
4
|
+
"version": "3.2.0",
|
|
5
5
|
"title": "Multi-Agent Pipeline - Phase 4 triage output",
|
|
6
|
-
"description": "Contract for the Opus triage agent's JSON output in Phase 4 Step 3. Triage consumes merged reviewer findings and splits them into accepted/deferred/rejected. Only `accepted` blocking/important items trigger Phase 3 rework. v3.1.0 adds the optional `consensus` block so triage can surface reviewer-agreement risk (false consensus among same-base-model reviewers) instead of silently merging.",
|
|
6
|
+
"description": "Contract for the Opus triage agent's JSON output in Phase 4 Step 3. Triage consumes merged reviewer findings and splits them into accepted/deferred/rejected. Only `accepted` blocking/important items trigger Phase 3 rework. v3.1.0 adds the optional `consensus` block so triage can surface reviewer-agreement risk (false consensus among same-base-model reviewers) instead of silently merging. v3.2.0 adds the optional per-finding `verification` block written by Phase 4 Step 3.7 (verify-by-test): the empirical repro-test outcome for accepted blocking findings.",
|
|
7
7
|
"type": "object",
|
|
8
8
|
"additionalProperties": false,
|
|
9
9
|
"required": ["accepted", "deferred", "rejected", "approved"],
|
|
@@ -74,8 +74,8 @@
|
|
|
74
74
|
},
|
|
75
75
|
"reviewer": {
|
|
76
76
|
"type": "string",
|
|
77
|
-
"enum": ["opus", "sonnet"],
|
|
78
|
-
"description": "Which reviewer produced the raw finding. Haiku was removed in v2.1.0."
|
|
77
|
+
"enum": ["fable", "opus", "sonnet", "gpt"],
|
|
78
|
+
"description": "Which reviewer produced the raw finding. Claude Code Reviewer 1 is fable (opus when fallback engages); Copilot CLI adds gpt. Haiku was removed in v2.1.0."
|
|
79
79
|
},
|
|
80
80
|
"consensus": {
|
|
81
81
|
"type": "object",
|
|
@@ -114,6 +114,35 @@
|
|
|
114
114
|
}
|
|
115
115
|
}
|
|
116
116
|
},
|
|
117
|
+
"verification": {
|
|
118
|
+
"type": "object",
|
|
119
|
+
"additionalProperties": false,
|
|
120
|
+
"description": "v3.2.0 verify-by-test outcome (Phase 4 Step 3.7, opt-in via prefs.global.verifyByTest). confirmed = repro test failed as the finding predicts (finding stands, test kept as the Phase 3 RED test); not-reproduced = repro test passed under evidence-gate (finding downgraded to deferred); inconclusive = compile error / timeout / not unit-testable (judgment verdict stands).",
|
|
121
|
+
"required": ["result"],
|
|
122
|
+
"properties": {
|
|
123
|
+
"result": {
|
|
124
|
+
"type": "string",
|
|
125
|
+
"enum": ["confirmed", "not-reproduced", "inconclusive"]
|
|
126
|
+
},
|
|
127
|
+
"testRef": {
|
|
128
|
+
"type": "string",
|
|
129
|
+
"minLength": 1,
|
|
130
|
+
"description": "Single-test reference, e.g. 'AuthTests/LoginTests/testExpiredTokenRejected' or 'tests/test_auth.py::test_expired_token'."
|
|
131
|
+
},
|
|
132
|
+
"evidencePath": {
|
|
133
|
+
"type": "string",
|
|
134
|
+
"minLength": 1,
|
|
135
|
+
"description": "Path to the test-run log verified by evidence-gate.mjs, e.g. '.pipeline/verify-1.test.log'."
|
|
136
|
+
},
|
|
137
|
+
"note": { "type": "string" }
|
|
138
|
+
},
|
|
139
|
+
"if": {
|
|
140
|
+
"properties": { "result": { "enum": ["confirmed", "not-reproduced"] } }
|
|
141
|
+
},
|
|
142
|
+
"then": {
|
|
143
|
+
"required": ["result", "testRef", "evidencePath"]
|
|
144
|
+
}
|
|
145
|
+
},
|
|
117
146
|
"rawFinding": {
|
|
118
147
|
"type": "object",
|
|
119
148
|
"additionalProperties": false,
|
|
@@ -124,7 +153,8 @@
|
|
|
124
153
|
"line": { "type": "integer", "minimum": 0 },
|
|
125
154
|
"issue": { "type": "string", "minLength": 4 },
|
|
126
155
|
"fix": { "type": "string" },
|
|
127
|
-
"reviewer": { "$ref": "#/$defs/reviewer" }
|
|
156
|
+
"reviewer": { "$ref": "#/$defs/reviewer" },
|
|
157
|
+
"verification": { "$ref": "#/$defs/verification" }
|
|
128
158
|
}
|
|
129
159
|
},
|
|
130
160
|
"acceptedFinding": {
|
|
@@ -144,7 +174,8 @@
|
|
|
144
174
|
"type": "string",
|
|
145
175
|
"minLength": 4,
|
|
146
176
|
"description": "Concrete change the dev agent must make. Required for accepted items so Phase 3 re-entry has actionable direction."
|
|
147
|
-
}
|
|
177
|
+
},
|
|
178
|
+
"verification": { "$ref": "#/$defs/verification" }
|
|
148
179
|
}
|
|
149
180
|
}
|
|
150
181
|
]
|
|
@@ -22,6 +22,8 @@ Validate contracts. Each emits `══ <name> smoke: N passed, M failed ══`
|
|
|
22
22
|
- `smoke-phase-6-multi.sh` - Phase 6 multi-repo commit/PR cross-linking
|
|
23
23
|
- `smoke-phase-banner.sh` + `smoke-phase-tracker.sh` - Phase UI output contracts
|
|
24
24
|
- `smoke-phase4-triage.sh` - Phase 4 reviewer → triage flow
|
|
25
|
+
- `smoke-verify-by-test.sh` - Phase 4 Step 3.7 verify-by-test contract (v10.8.0)
|
|
26
|
+
- `smoke-handoff-contract.sh` - phase-boundary structured handoff + handoff-first resume (v10.8.0)
|
|
25
27
|
|
|
26
28
|
### Schema + state
|
|
27
29
|
- `smoke-schema-validation.sh` - all JSON schemas validate
|
|
@@ -64,12 +66,11 @@ Installed into `~/.claude/scripts/` and invoked by settings.json hook configurat
|
|
|
64
66
|
- `pre-push-check.sh` - runs before `git push` (smoke-cross-cli-behavior + smoke-personal-data)
|
|
65
67
|
- `output-quality-check.sh` - runs after PR body / Jira comment generation (newline / HTML entity guard)
|
|
66
68
|
|
|
67
|
-
## Runtime helpers
|
|
69
|
+
## Runtime helpers
|
|
68
70
|
Shell scripts invoked during pipeline execution.
|
|
69
71
|
|
|
70
72
|
- `phase-banner.sh` - renders phase headers
|
|
71
73
|
- `phase-tracker.sh` - live tracker state + tokens accumulation + render
|
|
72
|
-
- `stack-swap.sh` - stack detection + skill set swap
|
|
73
74
|
- `keychain-save.sh` - store PAT in macOS Keychain
|
|
74
75
|
- `audit-log.sh` + `audit-log-rotate.sh` - opt-in audit trail
|
|
75
76
|
- `log-metric.sh` - opt-in metric capture
|
|
@@ -66,7 +66,7 @@ if (flags.help || flags.h) {
|
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
// --- resolve config: prefs first, CLI overrides -----------------------------
|
|
69
|
-
const cfg = { enabled: false, maxUsd: 5.0, warnPct: 80, onExceed: "warn", pricingModel: "
|
|
69
|
+
const cfg = { enabled: false, maxUsd: 5.0, warnPct: 80, onExceed: "warn", pricingModel: "fable" };
|
|
70
70
|
|
|
71
71
|
if (flags.prefs) {
|
|
72
72
|
if (!existsSync(flags.prefs)) die(`prefs file not found: ${flags.prefs}`);
|
|
@@ -2,6 +2,13 @@
|
|
|
2
2
|
"_readme": "Per-model unit prices in USD per million tokens. Source: Anthropic public pricing (verified 2026-04-21). Update when Anthropic publishes new tiers. Unknown models render USD as ' - ' and emit a footnote - never block PR-body generation. cacheReadPerMtok is the discounted rate for prompt-cache hits (~10% of inPerMtok); the renderer prices a phase's tokens_cached at this rate when the tracker records it, so resume/cache reuse is visible in the ledger.",
|
|
3
3
|
"schemaVersion": "1.1.0",
|
|
4
4
|
"prices": {
|
|
5
|
+
"fable": {
|
|
6
|
+
"inPerMtok": 10.0,
|
|
7
|
+
"outPerMtok": 50.0,
|
|
8
|
+
"cacheReadPerMtok": 1.0,
|
|
9
|
+
"modelId": "claude-fable-5",
|
|
10
|
+
"note": "Top tier (restored v10.6.0) - architects, Reviewer 1, triage. Verified against Anthropic pricing 2026-07-02."
|
|
11
|
+
},
|
|
5
12
|
"opus": {
|
|
6
13
|
"inPerMtok": 5.0,
|
|
7
14
|
"outPerMtok": 25.0,
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
* complexity_delta - added if/guard/case/switch/while count w=1.5
|
|
16
16
|
* ui_critical - *View.swift / *Screen.kt / Configuration w=1.5
|
|
17
17
|
* migration - DB schema / migration path w=4.0
|
|
18
|
+
* test_lines_removed - test file shrinks (removed > added) w=3.0
|
|
18
19
|
*
|
|
19
20
|
* Inputs:
|
|
20
21
|
* --base <ref> Base ref. Default: origin/main, fallback: main
|
|
@@ -275,6 +276,15 @@ function buildRow(stat, addedLines, allChangedPaths) {
|
|
|
275
276
|
}
|
|
276
277
|
}
|
|
277
278
|
|
|
279
|
+
// Test-lines-removed: a test-classified file whose diff removes more lines
|
|
280
|
+
// than it adds. Shrinking tests is the classic get-to-green shortcut the
|
|
281
|
+
// immutable-test rule forbids (refs/rules.md); surface it to reviewers.
|
|
282
|
+
if (isTestPath(path) && stat.removed > stat.added) {
|
|
283
|
+
const w = 3.0;
|
|
284
|
+
signals.push({ name: "test_lines_removed", weight: w, value: stat.removed - stat.added });
|
|
285
|
+
score += 12 * w;
|
|
286
|
+
}
|
|
287
|
+
|
|
278
288
|
return {
|
|
279
289
|
path,
|
|
280
290
|
score: Math.round(score * 100) / 100,
|
|
@@ -306,7 +316,7 @@ function main() {
|
|
|
306
316
|
};
|
|
307
317
|
|
|
308
318
|
const out = {
|
|
309
|
-
schemaVersion: "1.
|
|
319
|
+
schemaVersion: "1.1.0",
|
|
310
320
|
task: {
|
|
311
321
|
id: TASK_ID,
|
|
312
322
|
base: BASE || "(diff-file)",
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
diff --git a/MyAppTests/LoginViewModelTests.swift b/MyAppTests/LoginViewModelTests.swift
|
|
2
|
+
index 1111111..2222222 100644
|
|
3
|
+
--- a/MyAppTests/LoginViewModelTests.swift
|
|
4
|
+
+++ b/MyAppTests/LoginViewModelTests.swift
|
|
5
|
+
@@ -10,30 +10,20 @@ final class LoginViewModelTests: XCTestCase {
|
|
6
|
+
func testLoginWithValidCredentials_Succeeds() {
|
|
7
|
+
let sut = LoginViewModel(service: MockAuthService())
|
|
8
|
+
+ sut.retryPolicy = .none
|
|
9
|
+
sut.login(email: "user@example.com", password: "correct")
|
|
10
|
+
+ XCTAssertTrue(sut.isAuthenticated)
|
|
11
|
+
}
|
|
12
|
+
-
|
|
13
|
+
- func testLoginWithInvalidEmail_ShowsError() {
|
|
14
|
+
- let sut = LoginViewModel(service: MockAuthService())
|
|
15
|
+
- sut.login(email: "not-an-email", password: "irrelevant")
|
|
16
|
+
- XCTAssertEqual(sut.errorMessage, "Invalid email")
|
|
17
|
+
- }
|
|
18
|
+
-
|
|
19
|
+
- func testLoginWithExpiredToken_Rejects() {
|
|
20
|
+
- let sut = LoginViewModel(service: MockAuthService(tokenState: .expired))
|
|
21
|
+
- sut.login(email: "user@example.com", password: "correct")
|
|
22
|
+
- XCTAssertFalse(sut.isAuthenticated)
|
|
23
|
+
- }
|
|
24
|
+
-
|
|
25
|
+
- func testLogout_ClearsSession() {
|
|
26
|
+
- let sut = LoginViewModel(service: MockAuthService())
|
|
27
|
+
- sut.logout()
|
|
28
|
+
- XCTAssertNil(sut.session)
|
|
29
|
+
- }
|
|
30
|
+
}
|
|
31
|
+
diff --git a/MyApp/Sources/Auth/LoginViewModel.swift b/MyApp/Sources/Auth/LoginViewModel.swift
|
|
32
|
+
index 3333333..4444444 100644
|
|
33
|
+
--- a/MyApp/Sources/Auth/LoginViewModel.swift
|
|
34
|
+
+++ b/MyApp/Sources/Auth/LoginViewModel.swift
|
|
35
|
+
@@ -20,6 +20,8 @@ final class LoginViewModel {
|
|
36
|
+
func login(email: String, password: String) {
|
|
37
|
+
+ guard email.contains("@") else { return }
|
|
38
|
+
+ service.authenticate(email: email, password: password)
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
.claude/CLAUDE.md 1
|
|
2
2
|
.claude/agents 8
|
|
3
|
-
.claude/commands
|
|
3
|
+
.claude/commands 89
|
|
4
4
|
.claude/lib 23
|
|
5
5
|
.claude/multi-agent-preferences.json 1
|
|
6
6
|
.claude/rules 12
|
|
7
7
|
.claude/schemas 23
|
|
8
|
-
.claude/scripts
|
|
8
|
+
.claude/scripts 169
|
|
9
9
|
.claude/settings.json 1
|
|
10
|
-
.claude/skills
|
|
10
|
+
.claude/skills 560
|
|
11
11
|
.copilot/agents 8
|
|
12
12
|
.copilot/copilot-instructions.md 1
|
|
13
13
|
.copilot/lib 23
|
|
14
14
|
.copilot/schemas 23
|
|
15
|
-
.copilot/scripts
|
|
16
|
-
.copilot/skills
|
|
15
|
+
.copilot/scripts 169
|
|
16
|
+
.copilot/skills 596
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# 8. phase-4-review.md ref doc declares Step 1.75 + diff-risk-score.mjs
|
|
13
13
|
# 9. code-reviewer.md agent template carries the priority-files placeholder
|
|
14
14
|
# 10. prefs.schema.json exposes diffRisk advisory toggle
|
|
15
|
+
# 11. test-removal fixture fires the test_lines_removed signal (v1.1.0)
|
|
15
16
|
#
|
|
16
17
|
# Exit 0 = all pass, 1 = any failure.
|
|
17
18
|
|
|
@@ -26,6 +27,7 @@ REVIEWER="$ROOT/pipeline/agents/code-reviewer.md"
|
|
|
26
27
|
PREFS="$ROOT/pipeline/schemas/prefs.schema.json"
|
|
27
28
|
FIX_IOS="$ROOT/pipeline/scripts/fixtures/diff-risk-ios.diff"
|
|
28
29
|
FIX_AND="$ROOT/pipeline/scripts/fixtures/diff-risk-android.diff"
|
|
30
|
+
FIX_TESTRM="$ROOT/pipeline/scripts/fixtures/diff-risk-test-removal.diff"
|
|
29
31
|
|
|
30
32
|
pass=0
|
|
31
33
|
fail=0
|
|
@@ -38,10 +40,11 @@ printf '→ smoke-diff-risk (v8.3.0): pre-review risk scoring contract\n'
|
|
|
38
40
|
[ -f "$SCHEMA" ] || { record_fail "schema missing: $SCHEMA"; exit 1; }
|
|
39
41
|
[ -f "$FIX_IOS" ] || { record_fail "fixture missing: $FIX_IOS"; exit 1; }
|
|
40
42
|
[ -f "$FIX_AND" ] || { record_fail "fixture missing: $FIX_AND"; exit 1; }
|
|
43
|
+
[ -f "$FIX_TESTRM" ] || { record_fail "fixture missing: $FIX_TESTRM"; exit 1; }
|
|
41
44
|
|
|
42
45
|
# --- 1: iOS fixture produces JSON ---
|
|
43
46
|
out_ios=$(node "$SCORE" --diff "$FIX_IOS" 2>/dev/null)
|
|
44
|
-
if jq -e '.schemaVersion == "1.
|
|
47
|
+
if jq -e '.schemaVersion == "1.1.0"' <<< "$out_ios" >/dev/null 2>&1; then
|
|
45
48
|
record_pass "iOS fixture renders schema-versioned JSON"
|
|
46
49
|
else
|
|
47
50
|
record_fail "iOS fixture JSON malformed or missing schemaVersion"
|
|
@@ -150,6 +153,32 @@ else
|
|
|
150
153
|
record_fail "prefs.schema.json missing global.diffRiskAdvisory"
|
|
151
154
|
fi
|
|
152
155
|
|
|
156
|
+
# --- 11: test_lines_removed signal fires on the test-removal fixture ---
|
|
157
|
+
out_testrm=$(node "$SCORE" --diff "$FIX_TESTRM" 2>/dev/null)
|
|
158
|
+
sig_value=$(jq -r '.files[] | select(.path == "MyAppTests/LoginViewModelTests.swift")
|
|
159
|
+
| .signals[] | select(.name == "test_lines_removed") | .value' <<< "$out_testrm")
|
|
160
|
+
if [ "$sig_value" = "16" ]; then
|
|
161
|
+
record_pass "test_lines_removed fires with value=16 (18 removed - 2 added)"
|
|
162
|
+
else
|
|
163
|
+
record_fail "test_lines_removed should fire with value=16, got: ${sig_value:-missing}"
|
|
164
|
+
fi
|
|
165
|
+
sig_on_source=$(jq -r '[.files[] | select(.path == "MyApp/Sources/Auth/LoginViewModel.swift")
|
|
166
|
+
| .signals[] | select(.name == "test_lines_removed")] | length' <<< "$out_testrm")
|
|
167
|
+
if [ "$sig_on_source" = "0" ]; then
|
|
168
|
+
record_pass "test_lines_removed does not fire on source files"
|
|
169
|
+
else
|
|
170
|
+
record_fail "test_lines_removed must only fire on test-classified paths"
|
|
171
|
+
fi
|
|
172
|
+
set +e
|
|
173
|
+
echo "$out_testrm" | node "$VALIDATE" - >/dev/null 2>&1
|
|
174
|
+
rc_testrm=$?
|
|
175
|
+
set -e
|
|
176
|
+
if [ "$rc_testrm" -eq 0 ]; then
|
|
177
|
+
record_pass "validator accepts output carrying test_lines_removed"
|
|
178
|
+
else
|
|
179
|
+
record_fail "validator rejected test_lines_removed output (rc=$rc_testrm)"
|
|
180
|
+
fi
|
|
181
|
+
|
|
153
182
|
# --- Summary ---
|
|
154
183
|
total=$((pass + fail))
|
|
155
184
|
printf '\n→ smoke-diff-risk: %d/%d passed\n' "$pass" "$total"
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# smoke-handoff-contract.sh
|
|
3
|
+
#
|
|
4
|
+
# Verifies the v10.8.0 structured-handoff contract (fresh-context re-entry):
|
|
5
|
+
# 1. operations.md documents the Handoff block with all 5 required lines
|
|
6
|
+
# 2. operations.md compaction trigger re-reads state AND the latest handoff
|
|
7
|
+
# 3. log-format.md documents the Handoff section in the canonical log shape
|
|
8
|
+
# 4. resume.md Step 3 reads the latest handoff FIRST with pre-v10.8 fallback
|
|
9
|
+
#
|
|
10
|
+
# Exit 0 = all pass, 1 = any failure.
|
|
11
|
+
|
|
12
|
+
set -euo pipefail
|
|
13
|
+
|
|
14
|
+
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
|
15
|
+
OPS="$ROOT/pipeline/commands/multi-agent/refs/phases/operations.md"
|
|
16
|
+
LOGFMT="$ROOT/pipeline/commands/multi-agent/refs/phases/log-format.md"
|
|
17
|
+
RESUME="$ROOT/pipeline/commands/multi-agent/resume.md"
|
|
18
|
+
|
|
19
|
+
pass=0
|
|
20
|
+
fail=0
|
|
21
|
+
failures=()
|
|
22
|
+
record_pass() { pass=$((pass + 1)); printf ' \033[0;32mPASS\033[0m %s\n' "$1"; }
|
|
23
|
+
record_fail() { fail=$((fail + 1)); failures+=("$1"); printf ' \033[0;31mFAIL\033[0m %s\n' "$1"; }
|
|
24
|
+
|
|
25
|
+
printf '→ smoke-handoff-contract: structured handoff (fresh-context re-entry)\n'
|
|
26
|
+
|
|
27
|
+
# 1. operations.md documents the Handoff block with the 5 required lines
|
|
28
|
+
if [ ! -f "$OPS" ]; then
|
|
29
|
+
record_fail "operations.md missing"
|
|
30
|
+
else
|
|
31
|
+
if grep -qF "Handoff block (v10.8.0)" "$OPS"; then
|
|
32
|
+
record_pass "operations.md documents the Handoff block"
|
|
33
|
+
else
|
|
34
|
+
record_fail "operations.md missing 'Handoff block (v10.8.0)' spec"
|
|
35
|
+
fi
|
|
36
|
+
for line in "- Done:" "- Remaining:" "- Decisions:" "- Open findings:" "- Next:"; do
|
|
37
|
+
if grep -qF -- "$line" "$OPS"; then
|
|
38
|
+
record_pass "operations.md handoff spec has '$line'"
|
|
39
|
+
else
|
|
40
|
+
record_fail "operations.md handoff spec missing '$line'"
|
|
41
|
+
fi
|
|
42
|
+
done
|
|
43
|
+
if grep -qF "no agent dispatch, no extra LLM call" "$OPS"; then
|
|
44
|
+
record_pass "operations.md states handoff is orchestrator-written (no LLM call)"
|
|
45
|
+
else
|
|
46
|
+
record_fail "operations.md must state the handoff costs no LLM call"
|
|
47
|
+
fi
|
|
48
|
+
fi
|
|
49
|
+
|
|
50
|
+
# 2. Compaction trigger re-reads state AND latest handoff
|
|
51
|
+
if grep -qE 'agent-state\.json.*AND the latest.*Handoff' "$OPS"; then
|
|
52
|
+
record_pass "compaction trigger re-reads state + latest handoff"
|
|
53
|
+
else
|
|
54
|
+
record_fail "operations.md compaction trigger must re-read agent-state.json AND the latest Handoff block"
|
|
55
|
+
fi
|
|
56
|
+
|
|
57
|
+
# 3. log-format.md documents the Handoff section
|
|
58
|
+
if grep -qF "## Handoff - end of Phase" "$LOGFMT"; then
|
|
59
|
+
record_pass "log-format.md documents the Handoff section"
|
|
60
|
+
else
|
|
61
|
+
record_fail "log-format.md missing the Handoff section"
|
|
62
|
+
fi
|
|
63
|
+
if grep -qF "LATEST block is authoritative" "$LOGFMT"; then
|
|
64
|
+
record_pass "log-format.md states latest-block-wins semantics"
|
|
65
|
+
else
|
|
66
|
+
record_fail "log-format.md must state the latest handoff block is authoritative"
|
|
67
|
+
fi
|
|
68
|
+
|
|
69
|
+
# 4. resume.md reads handoff first, with fallback for older logs
|
|
70
|
+
if grep -qE 'LATEST .?## Handoff.? block' "$RESUME"; then
|
|
71
|
+
record_pass "resume.md Step 3 reads the latest Handoff block first"
|
|
72
|
+
else
|
|
73
|
+
record_fail "resume.md Step 3 must read the latest Handoff block first"
|
|
74
|
+
fi
|
|
75
|
+
if grep -qiF "fall back to per-phase findings" "$RESUME"; then
|
|
76
|
+
record_pass "resume.md keeps the pre-v10.8 per-phase fallback"
|
|
77
|
+
else
|
|
78
|
+
record_fail "resume.md must keep the pre-v10.8 per-phase findings fallback"
|
|
79
|
+
fi
|
|
80
|
+
if grep -qF "trust state on mismatch" "$RESUME"; then
|
|
81
|
+
record_pass "resume.md defines state-wins conflict rule"
|
|
82
|
+
else
|
|
83
|
+
record_fail "resume.md must define the handoff-vs-state conflict rule (state wins)"
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
printf '\n══ handoff-contract smoke: %d passed, %d failed ══\n' "$pass" "$fail"
|
|
87
|
+
if [ "$fail" -gt 0 ]; then
|
|
88
|
+
printf '\nFailures:\n'
|
|
89
|
+
for msg in "${failures[@]}"; do printf ' - %s\n' "$msg"; done
|
|
90
|
+
exit 1
|
|
91
|
+
fi
|
|
92
|
+
exit 0
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# smoke-verify-by-test.sh
|
|
3
|
+
#
|
|
4
|
+
# Verifies the Phase 4 Step 3.7 verify-by-test contract:
|
|
5
|
+
# 1. phase-4-review.md documents Step 3.7 with evidence-gate invocation + feature-doc pointer
|
|
6
|
+
# 2. refs/features/verify-by-test.md exists and covers the verdict table + red-test handoff
|
|
7
|
+
# 3. prefs.schema.json exposes global.verifyByTest.{enabled,maxFindings,model,stepTimeoutSec}
|
|
8
|
+
# 4. verifyByTest.enabled defaults to false (opt-in, no surprise cost)
|
|
9
|
+
# 5. triage-output.schema.json is v3.2.0 with the $defs.verification result enum
|
|
10
|
+
# 6. validate-triage.mjs accepts a valid `confirmed` verification and rejects bad ones
|
|
11
|
+
# 7. phase-3-dev.md documents the redTests rework re-entry
|
|
12
|
+
#
|
|
13
|
+
# Exit 0 = all pass, 1 = any failure.
|
|
14
|
+
|
|
15
|
+
set -euo pipefail
|
|
16
|
+
|
|
17
|
+
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
|
18
|
+
PHASE4_DOC="$ROOT/pipeline/commands/multi-agent/refs/phases/phase-4-review.md"
|
|
19
|
+
PHASE3_DOC="$ROOT/pipeline/commands/multi-agent/refs/phases/phase-3-dev.md"
|
|
20
|
+
FEATURE_DOC="$ROOT/pipeline/commands/multi-agent/refs/features/verify-by-test.md"
|
|
21
|
+
PREFS_SCHEMA="$ROOT/pipeline/schemas/prefs.schema.json"
|
|
22
|
+
TRIAGE_SCHEMA="$ROOT/pipeline/schemas/triage-output.schema.json"
|
|
23
|
+
VALIDATOR="$ROOT/pipeline/scripts/validate-triage.mjs"
|
|
24
|
+
|
|
25
|
+
pass=0
|
|
26
|
+
fail=0
|
|
27
|
+
failures=()
|
|
28
|
+
record_pass() { pass=$((pass + 1)); printf ' \033[0;32mPASS\033[0m %s\n' "$1"; }
|
|
29
|
+
record_fail() { fail=$((fail + 1)); failures+=("$1"); printf ' \033[0;31mFAIL\033[0m %s\n' "$1"; }
|
|
30
|
+
|
|
31
|
+
printf '→ smoke-verify-by-test: Phase 4 Step 3.7 contract\n'
|
|
32
|
+
|
|
33
|
+
# 1. Phase 4 doc documents Step 3.7
|
|
34
|
+
if [ ! -f "$PHASE4_DOC" ]; then
|
|
35
|
+
record_fail "phase-4-review.md missing"
|
|
36
|
+
else
|
|
37
|
+
if grep -qF "3.7 Verify-by-test" "$PHASE4_DOC"; then
|
|
38
|
+
record_pass "phase-4-review.md documents Step 3.7"
|
|
39
|
+
else
|
|
40
|
+
record_fail "phase-4-review.md missing Step 3.7 section"
|
|
41
|
+
fi
|
|
42
|
+
if grep -qF "evidence-gate.mjs --claim test --status passed" "$PHASE4_DOC"; then
|
|
43
|
+
record_pass "Step 3.7 downgrade is evidence-gated"
|
|
44
|
+
else
|
|
45
|
+
record_fail "Step 3.7 must gate downgrades via evidence-gate.mjs --claim test --status passed"
|
|
46
|
+
fi
|
|
47
|
+
if grep -qF "refs/features/verify-by-test.md" "$PHASE4_DOC"; then
|
|
48
|
+
record_pass "phase-4-review.md points to the feature doc"
|
|
49
|
+
else
|
|
50
|
+
record_fail "phase-4-review.md must reference refs/features/verify-by-test.md"
|
|
51
|
+
fi
|
|
52
|
+
if grep -qF "review.verify_by_test" "$PHASE4_DOC"; then
|
|
53
|
+
record_pass "Step 3.7 emits review.verify_by_test telemetry"
|
|
54
|
+
else
|
|
55
|
+
record_fail "Step 3.7 must document the review.verify_by_test metric"
|
|
56
|
+
fi
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
# 2. Feature doc exists with verdict + handoff coverage
|
|
60
|
+
if [ ! -f "$FEATURE_DOC" ]; then
|
|
61
|
+
record_fail "refs/features/verify-by-test.md missing"
|
|
62
|
+
else
|
|
63
|
+
for token in "not-reproduced" "inconclusive" "redTests" "Off by default"; do
|
|
64
|
+
if grep -qF "$token" "$FEATURE_DOC"; then
|
|
65
|
+
record_pass "feature doc covers '$token'"
|
|
66
|
+
else
|
|
67
|
+
record_fail "feature doc missing '$token'"
|
|
68
|
+
fi
|
|
69
|
+
done
|
|
70
|
+
fi
|
|
71
|
+
|
|
72
|
+
# 3. Prefs schema exposes verifyByTest knobs
|
|
73
|
+
for prop in enabled maxFindings model stepTimeoutSec; do
|
|
74
|
+
if jq -e ".properties.global.properties.verifyByTest.properties.${prop}" "$PREFS_SCHEMA" >/dev/null 2>&1; then
|
|
75
|
+
record_pass "prefs schema exposes verifyByTest.${prop}"
|
|
76
|
+
else
|
|
77
|
+
record_fail "prefs schema missing verifyByTest.${prop}"
|
|
78
|
+
fi
|
|
79
|
+
done
|
|
80
|
+
|
|
81
|
+
# 4. Off by default - preserves existing-user baseline
|
|
82
|
+
if jq -e '.properties.global.properties.verifyByTest.properties.enabled
|
|
83
|
+
| has("default") and .default == false' "$PREFS_SCHEMA" >/dev/null 2>&1; then
|
|
84
|
+
record_pass "verifyByTest.enabled defaults to false (opt-in)"
|
|
85
|
+
else
|
|
86
|
+
record_fail "verifyByTest.enabled must default to false"
|
|
87
|
+
fi
|
|
88
|
+
|
|
89
|
+
# 5. Triage schema version + verification enum
|
|
90
|
+
schema_version=$(jq -r '.version // empty' "$TRIAGE_SCHEMA")
|
|
91
|
+
if [ "$schema_version" = "3.2.0" ]; then
|
|
92
|
+
record_pass "triage-output schema version is 3.2.0"
|
|
93
|
+
else
|
|
94
|
+
record_fail "triage-output schema version should be 3.2.0 (was: ${schema_version:-missing})"
|
|
95
|
+
fi
|
|
96
|
+
if jq -e '.["$defs"].verification.properties.result.enum
|
|
97
|
+
| (index("confirmed") != null
|
|
98
|
+
and index("not-reproduced") != null
|
|
99
|
+
and index("inconclusive") != null)' "$TRIAGE_SCHEMA" >/dev/null 2>&1; then
|
|
100
|
+
record_pass "schema verification.result enum complete"
|
|
101
|
+
else
|
|
102
|
+
record_fail "schema \$defs.verification.result enum must be confirmed/not-reproduced/inconclusive"
|
|
103
|
+
fi
|
|
104
|
+
|
|
105
|
+
# 6. Behavioral validator round-trips
|
|
106
|
+
valid_fixture='{"accepted":[{"severity":"blocking","file":"Sources/Auth/Login.swift","line":42,"issue":"expired token accepted as valid","fix":"reject tokens past expiry in validateToken()","reviewer":"fable","verification":{"result":"confirmed","testRef":"AuthTests/LoginTests/testExpiredTokenRejected","evidencePath":".pipeline/verify-1.test.log"}}],"deferred":[],"rejected":[],"approved":false}'
|
|
107
|
+
if printf '%s' "$valid_fixture" | node "$VALIDATOR" - >/dev/null 2>&1; then
|
|
108
|
+
record_pass "validator accepts confirmed verification with testRef+evidencePath"
|
|
109
|
+
else
|
|
110
|
+
record_fail "validator rejected a valid confirmed verification"
|
|
111
|
+
fi
|
|
112
|
+
|
|
113
|
+
bad_result_fixture='{"accepted":[{"severity":"blocking","file":"a.swift","line":1,"issue":"some issue here","fix":"do the fix","reviewer":"fable","verification":{"result":"maybe"}}],"deferred":[],"rejected":[],"approved":false}'
|
|
114
|
+
if printf '%s' "$bad_result_fixture" | node "$VALIDATOR" - >/dev/null 2>&1; then
|
|
115
|
+
record_fail "validator must reject verification.result 'maybe'"
|
|
116
|
+
else
|
|
117
|
+
record_pass "validator rejects bad verification.result"
|
|
118
|
+
fi
|
|
119
|
+
|
|
120
|
+
missing_ref_fixture='{"accepted":[{"severity":"blocking","file":"a.swift","line":1,"issue":"some issue here","fix":"do the fix","reviewer":"fable","verification":{"result":"confirmed"}}],"deferred":[],"rejected":[],"approved":false}'
|
|
121
|
+
if printf '%s' "$missing_ref_fixture" | node "$VALIDATOR" - >/dev/null 2>&1; then
|
|
122
|
+
record_fail "validator must reject confirmed verification without testRef/evidencePath"
|
|
123
|
+
else
|
|
124
|
+
record_pass "validator rejects confirmed verification lacking testRef/evidencePath"
|
|
125
|
+
fi
|
|
126
|
+
|
|
127
|
+
# Reviewer enum parity: fable (Claude Code default) and gpt (Copilot CLI) accepted
|
|
128
|
+
fable_fixture='{"accepted":[{"severity":"important","file":"a.swift","line":1,"issue":"some issue here","fix":"do the fix","reviewer":"gpt"}],"deferred":[],"rejected":[],"approved":true}'
|
|
129
|
+
if printf '%s' "$fable_fixture" | node "$VALIDATOR" - >/dev/null 2>&1; then
|
|
130
|
+
record_pass "validator accepts schema-allowed reviewers (fable/gpt)"
|
|
131
|
+
else
|
|
132
|
+
record_fail "validator must accept reviewer values fable and gpt (schema v3.1.0 parity)"
|
|
133
|
+
fi
|
|
134
|
+
|
|
135
|
+
# 7. Phase 3 doc documents the red-test rework re-entry
|
|
136
|
+
if grep -qF "verifyByTest.redTests" "$PHASE3_DOC"; then
|
|
137
|
+
record_pass "phase-3-dev.md documents redTests rework re-entry"
|
|
138
|
+
else
|
|
139
|
+
record_fail "phase-3-dev.md must document verifyByTest.redTests re-entry"
|
|
140
|
+
fi
|
|
141
|
+
|
|
142
|
+
printf '\n══ verify-by-test smoke: %d passed, %d failed ══\n' "$pass" "$fail"
|
|
143
|
+
if [ "$fail" -gt 0 ]; then
|
|
144
|
+
printf '\nFailures:\n'
|
|
145
|
+
for msg in "${failures[@]}"; do printf ' - %s\n' "$msg"; done
|
|
146
|
+
exit 1
|
|
147
|
+
fi
|
|
148
|
+
exit 0
|