multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
<!-- prereq: 0.3, 2.1, 4.2 -->
|
|
2
|
+
|
|
3
|
+
## 14. Multi-Model Review (`forge workflow`)
|
|
4
|
+
|
|
5
|
+
Validates workflow runners + skill architecture.
|
|
6
|
+
|
|
7
|
+
- `forge workflow panel` is the fan-out runner CLI (supports `--code`, `--context`, and `--check`).
|
|
8
|
+
- `/forge:analyze` is a skill that calls `forge workflow analyze` (N=1 model).
|
|
9
|
+
- `/forge:debate` is a skill that calls `forge workflow debate` (supports `--code` for code evaluation).
|
|
10
|
+
- This section uses `$FORGE_QA_WORKFLOW_MODELS` (set by `start-container.sh` per provider profile). Workflow proxy
|
|
11
|
+
aliases are created in 4.2.
|
|
12
|
+
- Omitting `--models` uses all configured defaults (from `forge workflow list-models`).
|
|
13
|
+
- Workflow workers require `claude` on PATH in the environment running `forge workflow`; proxies choose model routing,
|
|
14
|
+
but workers still execute through local `claude -p`.
|
|
15
|
+
|
|
16
|
+
### 14.1 List Available Workflow Models
|
|
17
|
+
|
|
18
|
+
<!-- auto -->
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
forge workflow list-models
|
|
22
|
+
forge workflow list-models --json
|
|
23
|
+
forge workflow list-models --available
|
|
24
|
+
|
|
25
|
+
# Verify structured model metadata used by routing/preflight.
|
|
26
|
+
forge workflow list-models --json \
|
|
27
|
+
| jq -e 'map(has("name") and has("model_id") and has("family") and has("provider_refs") and has("preferred_proxy") and has("status") and has("reason")) | all'
|
|
28
|
+
|
|
29
|
+
# `--available` JSON should include only ready models.
|
|
30
|
+
forge workflow list-models --available --json \
|
|
31
|
+
| jq -e 'all(.status == "ready")'
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
- [ ] Groups models by primary credential and shows `[configured]` / `[not configured]`
|
|
35
|
+
- [ ] Shows model name, description, and status (`ready`/`unavailable`/`error`)
|
|
36
|
+
- [ ] `--json` outputs a structured JSON array with `name`, `model_id`, `family`, `provider_refs`, `preferred_proxy`,
|
|
37
|
+
`status`, and `reason`
|
|
38
|
+
- [ ] `--available` filters to ready models only
|
|
39
|
+
|
|
40
|
+
### 14.2 `forge workflow panel`
|
|
41
|
+
|
|
42
|
+
<!-- prereq: 14.1 -->
|
|
43
|
+
|
|
44
|
+
<!-- auto -->
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
forge workflow panel docs/ --models $FORGE_QA_WORKFLOW_MODELS --json
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
- [ ] Returns structured JSON output
|
|
51
|
+
- [ ] JSON includes `resolved_models` with actual model refs and proxy/template routing for each worker
|
|
52
|
+
- [ ] `--context blind` is the default (no --resume passed to workers)
|
|
53
|
+
|
|
54
|
+
### 14.3 `forge workflow panel --check`
|
|
55
|
+
|
|
56
|
+
<!-- prereq: 14.1 -->
|
|
57
|
+
|
|
58
|
+
<!-- auto -->
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# Policy gate mode (structured verdict + exit code)
|
|
62
|
+
forge workflow panel -p "Check for security issues" --models $FORGE_QA_WORKFLOW_MODELS --check
|
|
63
|
+
echo "Exit code: $?"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
- [ ] Returns structured JSON verdict
|
|
67
|
+
- [ ] Exit code 0 = pass, 1 = findings
|
|
68
|
+
|
|
69
|
+
### 14.4 `forge workflow panel --code`
|
|
70
|
+
|
|
71
|
+
<!-- prereq: 14.1 -->
|
|
72
|
+
|
|
73
|
+
<!-- auto -->
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Multi-model code review (uses bundled codereview resource)
|
|
77
|
+
forge workflow panel src/ --code --models $FORGE_QA_WORKFLOW_MODELS --json
|
|
78
|
+
|
|
79
|
+
# With --check mode
|
|
80
|
+
forge workflow panel src/ --code --models $FORGE_QA_WORKFLOW_MODELS --check
|
|
81
|
+
echo "Exit code: $?"
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
- [ ] Spawns multiple workers with codereview resource prompt
|
|
85
|
+
- [ ] Returns structured JSON output per worker
|
|
86
|
+
- [ ] `--check` mode: fail-closed -- every worker must succeed AND emit parseable verdict
|
|
87
|
+
|
|
88
|
+
### 14.5 `forge workflow analyze`
|
|
89
|
+
|
|
90
|
+
<!-- prereq: 14.1 -->
|
|
91
|
+
|
|
92
|
+
<!-- auto -->
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
# Single-model deep analysis (N=1 fan-out with bundled thinkdeep resource)
|
|
96
|
+
forge workflow analyze -p "Analyze the architecture of this project" --models $FORGE_QA_WORKFLOW_MODEL_A --json
|
|
97
|
+
|
|
98
|
+
# With --check mode (exit 0=pass, 1=findings)
|
|
99
|
+
forge workflow analyze -p "Check for security issues" --models $FORGE_QA_WORKFLOW_MODEL_A --check
|
|
100
|
+
echo "Exit code: $?"
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
- [ ] Spawns single worker with analysis resource prompt
|
|
104
|
+
- [ ] Returns structured JSON output
|
|
105
|
+
- [ ] JSON includes `resolved_models` with actual model ref and proxy/template routing for the worker
|
|
106
|
+
- [ ] `--check` mode returns exit code 0/1 with verdict
|
|
107
|
+
|
|
108
|
+
### 14.6 `forge workflow debate`
|
|
109
|
+
|
|
110
|
+
<!-- prereq: 14.1 -->
|
|
111
|
+
|
|
112
|
+
<!-- auto -->
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
# Adversarial debate with positional proposal
|
|
116
|
+
forge workflow debate "Should we rewrite the core in Rust?" --models $FORGE_QA_WORKFLOW_MODELS --json
|
|
117
|
+
|
|
118
|
+
# Gate mode (exit 0=pass, 1=fail). Debate is fail-closed: success without a parseable verdict = failure.
|
|
119
|
+
forge workflow debate "Should we adopt microservices?" --models $FORGE_QA_WORKFLOW_MODELS --check
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
- [ ] Spawns workers with stance injection (for/against/neutral)
|
|
123
|
+
- [ ] Mandatory blinding (workers don't see each other's output)
|
|
124
|
+
- [ ] JSON includes `resolved_models` with actual model refs and proxy/template routing for each worker
|
|
125
|
+
- [ ] Returns structured output with agreement/disagreement areas
|
|
126
|
+
|
|
127
|
+
### 14.7 `forge workflow debate --code`
|
|
128
|
+
|
|
129
|
+
<!-- prereq: 14.1 -->
|
|
130
|
+
|
|
131
|
+
<!-- auto -->
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
# Adversarial code evaluation (uses bundled code evaluation resource)
|
|
135
|
+
forge workflow debate src/ --code --models $FORGE_QA_WORKFLOW_MODELS --json
|
|
136
|
+
|
|
137
|
+
# With --check mode
|
|
138
|
+
forge workflow debate src/ --code --models $FORGE_QA_WORKFLOW_MODELS --check
|
|
139
|
+
echo "Exit code: $?"
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
- [ ] Spawns workers with code evaluation resource + stance injection
|
|
143
|
+
- [ ] Returns structured JSON output with code-specific findings per worker
|
|
144
|
+
- [ ] `--check` mode: fail-closed -- every worker must succeed AND emit parseable verdict
|
|
145
|
+
|
|
146
|
+
### 14.8 `forge workflow consensus`
|
|
147
|
+
|
|
148
|
+
<!-- prereq: 14.1 -->
|
|
149
|
+
|
|
150
|
+
<!-- auto -->
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
# Two-round consensus with role-assigned workers (proposal mode)
|
|
154
|
+
forge workflow consensus "Should we adopt a microservices architecture?" --models $FORGE_QA_WORKFLOW_MODELS --json
|
|
155
|
+
|
|
156
|
+
# Gate mode (requires 'position' field, not 'verdict')
|
|
157
|
+
forge workflow consensus "Should we adopt event sourcing?" --models $FORGE_QA_WORKFLOW_MODELS --check
|
|
158
|
+
echo "Exit code: $?"
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
- [ ] Spawns workers with role injection (architecture/security/correctness)
|
|
162
|
+
- [ ] Two rounds: independent positions then reconciliation
|
|
163
|
+
- [ ] Mandatory blinding both rounds (no --resume passed to workers)
|
|
164
|
+
- [ ] JSON includes `round1`, `round2`, `roles`, `role_map`, `reconciliation_brief`
|
|
165
|
+
- [ ] JSON includes `resolved_models` with actual model refs and proxy/template routing for each worker
|
|
166
|
+
- [ ] `--check` mode: requires `position` field (rejects legacy `passed`/`verdict`)
|
|
167
|
+
|
|
168
|
+
### 14.9 `forge workflow consensus --code`
|
|
169
|
+
|
|
170
|
+
<!-- prereq: 14.1 -->
|
|
171
|
+
|
|
172
|
+
<!-- auto -->
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
# Two-round code consensus (code mode uses architecture/security/maintainability)
|
|
176
|
+
forge workflow consensus src/ --code --models $FORGE_QA_WORKFLOW_MODELS --json
|
|
177
|
+
|
|
178
|
+
# With --check mode
|
|
179
|
+
forge workflow consensus src/ --code --models $FORGE_QA_WORKFLOW_MODELS --check
|
|
180
|
+
echo "Exit code: $?"
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
- [ ] Spawns workers with code-mode role cycle (architecture/security/maintainability)
|
|
184
|
+
- [ ] Returns structured JSON output with code-specific findings per worker per round
|
|
185
|
+
- [ ] `--check` mode: schema-strict -- only SUPPORT/SUPPORT_WITH_CONDITIONS pass
|
|
186
|
+
|
|
187
|
+
### 14.10 `/forge:debate` (Live Session)
|
|
188
|
+
|
|
189
|
+
<!-- prereq: 14.1 -->
|
|
190
|
+
|
|
191
|
+
<!-- human:guided -->
|
|
192
|
+
|
|
193
|
+
<!-- requires: api_key -->
|
|
194
|
+
|
|
195
|
+
Validate the real Claude-facing `/forge:debate` path, not a terminal fallback. This step passes only if Claude Code
|
|
196
|
+
accepts the slash command and actually executes the adversarial runner end to end.
|
|
197
|
+
|
|
198
|
+
If Session B is not already open, start Claude Code in the container shell first:
|
|
199
|
+
|
|
200
|
+
```
|
|
201
|
+
cd $FORGE_TEST_REPO
|
|
202
|
+
claude
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
Read `$FORGE_QA_WORKFLOW_MODELS` from the container environment and construct the fully expanded command for the user to
|
|
206
|
+
type in Session B:
|
|
207
|
+
|
|
208
|
+
```
|
|
209
|
+
/forge:debate --models <expanded FORGE_QA_WORKFLOW_MODELS> A startup with 5 developers has a working Python monolith serving 10k req/sec. They're hitting scaling issues. Should they rewrite the core in Rust?
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
Wait for Claude to finish. Do not replace this with `forge workflow debate` in the shell; that CLI surface is already
|
|
213
|
+
covered by `14.6`. If Claude only says `Command completed`, echoes the skill instructions back, or asks you to run the
|
|
214
|
+
commands manually, treat this step as a failure. If the command reports `claude CLI not found in PATH`, the slash
|
|
215
|
+
command was accepted but the nested workflow worker runtime is missing from Claude Code's Bash environment.
|
|
216
|
+
|
|
217
|
+
- [ ] Slash command accepted in Claude Code (no unknown-skill or parsing error)
|
|
218
|
+
- [ ] Claude executes the skill itself (not just instruction injection / "Command completed")
|
|
219
|
+
- [ ] No workflow preflight error about `claude` missing from PATH
|
|
220
|
+
- [ ] Workers spawned with different stances (for/against/neutral)
|
|
221
|
+
- [ ] Synthesis produced with points of agreement AND disagreement
|
|
222
|
+
- [ ] Different perspectives visible in the final response
|
|
223
|
+
|
|
224
|
+
### 14.11 Workflow `--models` Filter
|
|
225
|
+
|
|
226
|
+
<!-- prereq: 14.1 -->
|
|
227
|
+
|
|
228
|
+
<!-- requires: api_key -->
|
|
229
|
+
|
|
230
|
+
<!-- auto -->
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
# Single model filter -- should limit to that model only
|
|
234
|
+
forge workflow panel docs/ --models $FORGE_QA_WORKFLOW_MODEL_B --json 2>&1 | jq '.results | keys | length'
|
|
235
|
+
|
|
236
|
+
echo "---"
|
|
237
|
+
|
|
238
|
+
# Multiple model filter (comma-separated)
|
|
239
|
+
forge workflow panel docs/ --models $FORGE_QA_WORKFLOW_MODELS --json 2>&1 | jq '{results: (.results | keys), successful: .successful, failed: .failed}'
|
|
240
|
+
|
|
241
|
+
echo "---"
|
|
242
|
+
|
|
243
|
+
# Verify result keys match the requested models
|
|
244
|
+
forge workflow panel docs/ --models $FORGE_QA_WORKFLOW_MODEL_B --json 2>&1 | jq '.results | keys'
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
- [ ] Single `--models` value produces 1 result key in `.results`
|
|
248
|
+
- [ ] Comma-separated `--models` produces one result per specified model (`.successful` count matches)
|
|
249
|
+
- [ ] Result keys in `.results` correspond to the requested model names
|
|
250
|
+
|
|
251
|
+
### 14.12 Workflow Routing, `--proxy`, and Preflight
|
|
252
|
+
|
|
253
|
+
<!-- prereq: 4.2, 14.1 -->
|
|
254
|
+
|
|
255
|
+
<!-- requires: api_key -->
|
|
256
|
+
|
|
257
|
+
<!-- auto -->
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
# Explicit proxy routing: one selected proxy handles this worker.
|
|
261
|
+
FORGE_DEBUG=1 forge workflow panel docs/ \
|
|
262
|
+
--models "$FORGE_QA_WORKFLOW_MODEL_A" \
|
|
263
|
+
--proxy "$FORGE_QA_OPENAI_PROXY" \
|
|
264
|
+
--json > /tmp/forge-workflow-via.json
|
|
265
|
+
|
|
266
|
+
jq '{results: (.results | keys), successful, failed}' /tmp/forge-workflow-via.json
|
|
267
|
+
|
|
268
|
+
echo "---"
|
|
269
|
+
|
|
270
|
+
# Human output should surface non-blocking routing advisories when they apply.
|
|
271
|
+
forge workflow analyze -p "Reply with READY only." \
|
|
272
|
+
--models "$FORGE_QA_WORKFLOW_MODEL_A" \
|
|
273
|
+
--proxy "$FORGE_QA_OPENAI_PROXY" 2>&1 | tee /tmp/forge-workflow-via-warning.txt
|
|
274
|
+
|
|
275
|
+
grep -E "Routing warning|tier overrides|Proxy tier mappings" /tmp/forge-workflow-via-warning.txt || true
|
|
276
|
+
|
|
277
|
+
echo "---"
|
|
278
|
+
|
|
279
|
+
# Routing decisions are logged for observability when logging is enabled.
|
|
280
|
+
latest_log="$(ls -t "$FORGE_HOME"/logs/cli/workflow.*.log 2>/dev/null | head -n 1)"
|
|
281
|
+
test -n "$latest_log" && grep "Routing decision: model=$FORGE_QA_WORKFLOW_MODEL_A" "$latest_log"
|
|
282
|
+
|
|
283
|
+
echo "---"
|
|
284
|
+
|
|
285
|
+
# Direct Anthropic workers fail fast when no Anthropic credential is available.
|
|
286
|
+
tmp_home="$(mktemp -d)"
|
|
287
|
+
env -u ANTHROPIC_API_KEY FORGE_HOME="$tmp_home" \
|
|
288
|
+
forge workflow analyze -p "This should not call the model." \
|
|
289
|
+
--models claude-opus-4.6 \
|
|
290
|
+
--json 2>&1 | tee /tmp/forge-workflow-direct-preflight.json
|
|
291
|
+
rm -rf "$tmp_home"
|
|
292
|
+
|
|
293
|
+
jq -e 'any(.preflight_errors[]; test("ANTHROPIC_API_KEY|anthropic"; "i"))' \
|
|
294
|
+
/tmp/forge-workflow-direct-preflight.json
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
- [ ] `--proxy` resolves a compatible selected proxy and the JSON output remains parseable
|
|
298
|
+
- [ ] Non-JSON workflow output prints a `Routing warning:` when `--proxy` selects a cross-family or live-advisory route
|
|
299
|
+
(same-family routes may have no warning)
|
|
300
|
+
- [ ] The latest CLI workflow log contains a consolidated `Routing decision:` line with model, source, proxy/template,
|
|
301
|
+
and model ref
|
|
302
|
+
- [ ] Direct Anthropic workflow workers fail during preflight with an actionable credential error when
|
|
303
|
+
`ANTHROPIC_API_KEY` is absent
|
|
304
|
+
|
|
305
|
+
---
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
<!-- prereq: 0.3, 2.1, 4.2, 5.1 -->
|
|
2
|
+
|
|
3
|
+
## 15. Skills (`/forge:review`, `/forge:understand`, `/forge:panel`, `/forge:consensus`)
|
|
4
|
+
|
|
5
|
+
Validates the user-facing skill invocation UX. Section 14 tested the underlying `forge workflow` CLI engine; this
|
|
6
|
+
section tests the skills that wrap it with auto-detection and model-aware resource selection.
|
|
7
|
+
|
|
8
|
+
The workflow-backed skills in this section depend on the workflow proxy aliases created in 4.2. In partial runs such as
|
|
9
|
+
`--from 15`, prereq auto-resolution should create those aliases before `/forge:panel` or `/forge:consensus` is checked.
|
|
10
|
+
|
|
11
|
+
### 15.1 `forge session context` CLI
|
|
12
|
+
|
|
13
|
+
<!-- auto -->
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# Verify the session context command works (requires an active session)
|
|
17
|
+
forge session context test-session-1 --json
|
|
18
|
+
|
|
19
|
+
# Extract model family field
|
|
20
|
+
forge session context test-session-1 --field model_family
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
- [ ] Returns valid JSON with session_name, proxy, model_family, models, policy
|
|
24
|
+
- [ ] `--field model_family` returns a raw string (openai, gemini, or anthropic)
|
|
25
|
+
- [ ] Direct (no-proxy) session returns `model_family: "anthropic"`
|
|
26
|
+
|
|
27
|
+
### 15.2 `forge session context` with UUID
|
|
28
|
+
|
|
29
|
+
<!-- auto -->
|
|
30
|
+
|
|
31
|
+
<!-- prereq: 5.1 -->
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# Get the Claude session UUID from the session manifest
|
|
35
|
+
UUID=$(cat .forge/sessions/test-session-1/forge.session.json | jq -r '.confirmed.claude_session_id // empty')
|
|
36
|
+
|
|
37
|
+
# If UUID exists, verify UUID-based resolution
|
|
38
|
+
if [ -n "$UUID" ]; then
|
|
39
|
+
forge session context "$UUID" --field session_name
|
|
40
|
+
echo "UUID_RESOLVED=true"
|
|
41
|
+
else
|
|
42
|
+
echo "UUID_RESOLVED=skip (no confirmed UUID yet)"
|
|
43
|
+
fi
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
- [ ] UUID resolves to the correct session name (or skips if no UUID confirmed yet)
|
|
47
|
+
|
|
48
|
+
### 15.3 `/forge:review` (Live Session)
|
|
49
|
+
|
|
50
|
+
<!-- human:guided -->
|
|
51
|
+
|
|
52
|
+
<!-- requires: api_key -->
|
|
53
|
+
|
|
54
|
+
In Session B (or a live Claude session in the container), invoke the review skill to verify resource selection.
|
|
55
|
+
|
|
56
|
+
1. In the container shell, launch Claude and invoke the review skill:
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
/forge:review src/
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
2. Verify that Claude:
|
|
63
|
+
- Loads a code review resource from `~/.claude/skills/review/resources/`
|
|
64
|
+
- Produces a structured code review with findings
|
|
65
|
+
|
|
66
|
+
Expected:
|
|
67
|
+
|
|
68
|
+
- [ ] Skill invocation accepted by Claude Code (no "skill not found" error)
|
|
69
|
+
- [ ] Review output includes file:line references and severity ratings
|
|
70
|
+
|
|
71
|
+
### 15.4 `/forge:understand` (Live Session)
|
|
72
|
+
|
|
73
|
+
<!-- human:guided -->
|
|
74
|
+
|
|
75
|
+
<!-- requires: api_key -->
|
|
76
|
+
|
|
77
|
+
In the same live Claude session, invoke the understand skill.
|
|
78
|
+
|
|
79
|
+
1. Invoke the understand skill:
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
/forge:understand src/main.py --depth quick
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
2. Verify that Claude:
|
|
86
|
+
- Reads the target file
|
|
87
|
+
- Produces a structured explanation
|
|
88
|
+
|
|
89
|
+
Expected:
|
|
90
|
+
|
|
91
|
+
- [ ] Skill invocation accepted
|
|
92
|
+
- [ ] Output includes Summary and Key Components sections
|
|
93
|
+
- [ ] Quick depth produces concise output (\<500 words)
|
|
94
|
+
|
|
95
|
+
### 15.5 `/forge:panel` (Live Session)
|
|
96
|
+
|
|
97
|
+
<!-- human:guided -->
|
|
98
|
+
|
|
99
|
+
<!-- requires: api_key -->
|
|
100
|
+
|
|
101
|
+
In the same live Claude session, invoke the panel skill for a multi-model review.
|
|
102
|
+
|
|
103
|
+
1. Invoke the panel skill:
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
/forge:panel src/ --code
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
2. This fans out to multiple models. Verify that Claude:
|
|
110
|
+
- Calls `forge workflow panel` under the hood
|
|
111
|
+
- Collects results from multiple models
|
|
112
|
+
- Synthesizes findings
|
|
113
|
+
|
|
114
|
+
Expected:
|
|
115
|
+
|
|
116
|
+
- [ ] Skill invocation accepted (runs as forked subagent)
|
|
117
|
+
- [ ] Multi-model fan-out executes through configured workflow proxies
|
|
118
|
+
- [ ] Output names the actual resolved model ref and proxy/template used for each worker
|
|
119
|
+
- [ ] Synthesis includes consensus findings and unique insights
|
|
120
|
+
|
|
121
|
+
Failure cue: if output says the default model set is unusable because OpenRouter proxies are missing, or falls back to a
|
|
122
|
+
Claude-only panel, mark this step failed. That means 4.2 did not create the workflow proxy aliases for this run.
|
|
123
|
+
|
|
124
|
+
### 15.6 `/forge:consensus` (Live Session)
|
|
125
|
+
|
|
126
|
+
<!-- human:guided -->
|
|
127
|
+
|
|
128
|
+
<!-- requires: api_key -->
|
|
129
|
+
|
|
130
|
+
In the same live Claude session, invoke the consensus skill for a multi-model recommendation.
|
|
131
|
+
|
|
132
|
+
1. Invoke the consensus skill:
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
/forge:consensus Should we split the session manager into separate read and write modules?
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
2. This runs two rounds across multiple models. Verify that Claude:
|
|
139
|
+
- Calls `forge workflow consensus` under the hood
|
|
140
|
+
- Round 1 collects independent positions from role-assigned workers
|
|
141
|
+
- Round 2 produces reconciled recommendations
|
|
142
|
+
- Synthesizes into agreed/disputed/no-consensus sections
|
|
143
|
+
|
|
144
|
+
Expected:
|
|
145
|
+
|
|
146
|
+
- [ ] Skill invocation accepted (runs as forked subagent)
|
|
147
|
+
- [ ] Two-round execution visible in output (Round 1 positions + Round 2 reconciliation)
|
|
148
|
+
- [ ] Output names the actual resolved model ref and proxy/template used for each worker
|
|
149
|
+
- [ ] Synthesis distinguishes agreed recommendations from unresolved disagreements
|
|
150
|
+
- [ ] Roles visible in output (architecture, security, correctness)
|
|
151
|
+
|
|
152
|
+
Failure cue: if output says the default model set is unusable because OpenRouter proxies are missing, or falls back to
|
|
153
|
+
Claude-only workers, mark this step failed. That means 4.2 did not create the workflow proxy aliases for this run.
|
|
154
|
+
|
|
155
|
+
---
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
<!-- prereq: 0.3, 5.1, 10.1 -->
|
|
2
|
+
|
|
3
|
+
## 16. Handoff Agent
|
|
4
|
+
|
|
5
|
+
### 16.1 Configure Direct Handoff Targets
|
|
6
|
+
|
|
7
|
+
<!-- requires: api_key -->
|
|
8
|
+
|
|
9
|
+
<!-- auto -->
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
cd $FORGE_TEST_REPO
|
|
13
|
+
|
|
14
|
+
# Seed one real target doc. The CLI validates existence, so the intentionally
|
|
15
|
+
# missing doc used for runtime-skip coverage is injected below via raw override.
|
|
16
|
+
mkdir -p .forge/memory
|
|
17
|
+
cat > .forge/memory/debugging.md <<'EOF'
|
|
18
|
+
# Debugging Notes
|
|
19
|
+
EOF
|
|
20
|
+
|
|
21
|
+
# Enable handoff agent for the session and configure explicit designated docs.
|
|
22
|
+
forge session set memory.auto_update.enabled true --session test-session-1
|
|
23
|
+
forge session set memory.auto_update.min_turns 1 --session test-session-1
|
|
24
|
+
forge session set memory.auto_update.mode augment --session test-session-1
|
|
25
|
+
forge session set memory.designated_docs '[]' --session test-session-1
|
|
26
|
+
forge session memory add-doc .forge/memory/debugging.md --strategy debugging --session test-session-1
|
|
27
|
+
forge session memory list-docs --json --session test-session-1 | jq -e '
|
|
28
|
+
length == 1
|
|
29
|
+
and any(.[]; .path == ".forge/memory/debugging.md" and .strategy == "debugging")
|
|
30
|
+
'
|
|
31
|
+
|
|
32
|
+
# Inject one missing doc directly to validate the agent's runtime skip path.
|
|
33
|
+
# This represents stale/manual config; `forge session memory add-doc` should reject it.
|
|
34
|
+
forge session set memory.designated_docs '[{"path":".forge/memory/debugging.md","strategy":"debugging","shadows":null},{"path":".forge/memory/patterns.md","strategy":"patterns","shadows":null}]' --session test-session-1
|
|
35
|
+
forge session memory list-docs --session test-session-1
|
|
36
|
+
forge session memory list-docs --json --session test-session-1 | jq -e '
|
|
37
|
+
length == 2
|
|
38
|
+
and any(.[]; .path == ".forge/memory/debugging.md" and .strategy == "debugging")
|
|
39
|
+
and any(.[]; .path == ".forge/memory/patterns.md" and .strategy == "patterns")
|
|
40
|
+
'
|
|
41
|
+
|
|
42
|
+
# Verify config
|
|
43
|
+
cat .forge/sessions/test-session-1/forge.session.json | jq '.overrides.memory'
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
- [ ] Handoff config written to session overrides
|
|
47
|
+
- [ ] `enabled`, `min_turns`, and `mode` values set
|
|
48
|
+
- [ ] `forge session memory add-doc/list-docs` configures an existing designated doc
|
|
49
|
+
- [ ] Raw override includes one missing designated doc for runtime skip coverage
|
|
50
|
+
- [ ] Config stores worktree-relative paths under `memory.designated_docs`
|
|
51
|
+
|
|
52
|
+
### 16.2 Run Handoff Manually (Direct Update)
|
|
53
|
+
|
|
54
|
+
<!-- prereq: 16.1 -->
|
|
55
|
+
|
|
56
|
+
<!-- requires: api_key -->
|
|
57
|
+
|
|
58
|
+
<!-- auto -->
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
cd $FORGE_TEST_REPO
|
|
62
|
+
|
|
63
|
+
# Create a deterministic transcript artifact with clear debugging takeaways.
|
|
64
|
+
mkdir -p .forge/artifacts/test-session-1/transcripts
|
|
65
|
+
cat > .forge/artifacts/test-session-1/transcripts/manual-handoff-direct.jsonl <<'EOF'
|
|
66
|
+
{"requestId":"handoff-1","message":{"role":"user","content":[{"type":"text","text":"Pytest failed with ModuleNotFoundError for tomlkit."}]}}
|
|
67
|
+
{"requestId":"handoff-1","message":{"role":"assistant","content":[{"type":"text","text":"The root cause was a missing dependency in the dev environment. Running uv sync fixed it."}]}}
|
|
68
|
+
{"requestId":"handoff-2","message":{"role":"user","content":[{"type":"text","text":"Please capture that debugging note for next time."}]}}
|
|
69
|
+
{"requestId":"handoff-2","message":{"role":"assistant","content":[{"type":"text","text":"Noted: if tests fail with ModuleNotFoundError for tomlkit, run uv sync before retrying."}]}}
|
|
70
|
+
EOF
|
|
71
|
+
|
|
72
|
+
BEFORE_LINES=$(wc -l < .forge/memory/debugging.md)
|
|
73
|
+
|
|
74
|
+
forge handoff run \
|
|
75
|
+
--session-name test-session-1 \
|
|
76
|
+
--worktree-path $FORGE_TEST_REPO \
|
|
77
|
+
--transcript-rel .forge/artifacts/test-session-1/transcripts/manual-handoff-direct.jsonl
|
|
78
|
+
|
|
79
|
+
AFTER_LINES=$(wc -l < .forge/memory/debugging.md)
|
|
80
|
+
|
|
81
|
+
echo "before=$BEFORE_LINES after=$AFTER_LINES"
|
|
82
|
+
cat .forge/memory/debugging.md
|
|
83
|
+
|
|
84
|
+
ls .forge/artifacts/test-session-1/handoff/review-*.md
|
|
85
|
+
forge session handoff show test-session-1 --latest
|
|
86
|
+
|
|
87
|
+
test "$AFTER_LINES" -gt "$BEFORE_LINES"
|
|
88
|
+
test ! -e .forge/memory/patterns.md
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
- [ ] Missing designated docs are skipped (not created)
|
|
92
|
+
- [ ] Worktree-relative doc paths resolve correctly in the test repo
|
|
93
|
+
- [ ] `forge handoff run` succeeds with the transcript artifact path provided
|
|
94
|
+
- [ ] Existing designated docs are updated with session takeaways
|
|
95
|
+
- [ ] Handoff agent stdout is persisted and visible via `forge session handoff show --latest`
|
|
96
|
+
|
|
97
|
+
### 16.3 Shadow Handoff (`suggested` + `shadows`)
|
|
98
|
+
|
|
99
|
+
<!-- prereq: 16.1 -->
|
|
100
|
+
|
|
101
|
+
<!-- requires: api_key -->
|
|
102
|
+
|
|
103
|
+
<!-- auto -->
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
cd $FORGE_TEST_REPO
|
|
107
|
+
|
|
108
|
+
# Create a shadow doc pair and switch designated_docs to the suggested/shadows surface.
|
|
109
|
+
mkdir -p .forge/memory docs
|
|
110
|
+
cat > docs/team-standards.md <<'EOF'
|
|
111
|
+
# Team Standards
|
|
112
|
+
|
|
113
|
+
- Prefer small, reviewable patches.
|
|
114
|
+
EOF
|
|
115
|
+
|
|
116
|
+
cat > .forge/memory/suggested_standards.md <<'EOF'
|
|
117
|
+
# Suggested Standards
|
|
118
|
+
EOF
|
|
119
|
+
|
|
120
|
+
forge session set memory.auto_update.mode augment --session test-session-1
|
|
121
|
+
forge session set memory.designated_docs '[]' --session test-session-1
|
|
122
|
+
forge session memory add-doc .forge/memory/suggested_standards.md \
|
|
123
|
+
--strategy suggested \
|
|
124
|
+
--shadows docs/team-standards.md \
|
|
125
|
+
--session test-session-1
|
|
126
|
+
forge session memory list-docs --session test-session-1
|
|
127
|
+
|
|
128
|
+
mkdir -p .forge/artifacts/test-session-1/transcripts
|
|
129
|
+
cat > .forge/artifacts/test-session-1/transcripts/manual-handoff-shadow.jsonl <<'EOF'
|
|
130
|
+
{"requestId":"shadow-1","message":{"role":"user","content":[{"type":"text","text":"We kept fixing bugs caused by giant mixed-purpose commits."}]}}
|
|
131
|
+
{"requestId":"shadow-1","message":{"role":"assistant","content":[{"type":"text","text":"A new standard could require small focused commits with clear intent."}]}}
|
|
132
|
+
{"requestId":"shadow-2","message":{"role":"user","content":[{"type":"text","text":"Please propose that as guidance rather than editing the official standards directly."}]}}
|
|
133
|
+
{"requestId":"shadow-2","message":{"role":"assistant","content":[{"type":"text","text":"I will suggest it in the shadow file for human review."}]}}
|
|
134
|
+
EOF
|
|
135
|
+
|
|
136
|
+
cp docs/team-standards.md /tmp/team-standards.before
|
|
137
|
+
SHADOW_BEFORE=$(wc -l < .forge/memory/suggested_standards.md)
|
|
138
|
+
|
|
139
|
+
forge handoff run \
|
|
140
|
+
--session-name test-session-1 \
|
|
141
|
+
--worktree-path $FORGE_TEST_REPO \
|
|
142
|
+
--transcript-rel .forge/artifacts/test-session-1/transcripts/manual-handoff-shadow.jsonl
|
|
143
|
+
|
|
144
|
+
SHADOW_AFTER=$(wc -l < .forge/memory/suggested_standards.md)
|
|
145
|
+
|
|
146
|
+
cat .forge/memory/suggested_standards.md
|
|
147
|
+
|
|
148
|
+
cmp -s docs/team-standards.md /tmp/team-standards.before
|
|
149
|
+
test "$SHADOW_AFTER" -gt "$SHADOW_BEFORE"
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
- [ ] `designated_docs` accepts `strategy: suggested` with `shadows`
|
|
153
|
+
- [ ] Handoff runs successfully against the shadow doc pair
|
|
154
|
+
- [ ] Shadow file gains proposed additions for later human review
|
|
155
|
+
- [ ] Official document is not edited in-place
|
|
156
|
+
|
|
157
|
+
### 16.4 Queued Handoff on Next CLI Startup
|
|
158
|
+
|
|
159
|
+
<!-- prereq: 16.1 -->
|
|
160
|
+
|
|
161
|
+
<!-- requires: api_key -->
|
|
162
|
+
|
|
163
|
+
<!-- auto -->
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
cd $FORGE_TEST_REPO
|
|
167
|
+
|
|
168
|
+
# Restore direct-update config for the queued-path test.
|
|
169
|
+
forge session set memory.auto_update.mode augment --session test-session-1
|
|
170
|
+
forge session set memory.designated_docs '[{"path":".forge/memory/debugging.md","strategy":"debugging","shadows":null},{"path":".forge/memory/patterns.md","strategy":"patterns","shadows":null}]' --session test-session-1
|
|
171
|
+
|
|
172
|
+
cat > .forge/memory/debugging.md <<'EOF'
|
|
173
|
+
# Debugging Notes
|
|
174
|
+
EOF
|
|
175
|
+
|
|
176
|
+
mkdir -p .forge/walkthrough
|
|
177
|
+
cat > .forge/walkthrough/handoff-queued-source.jsonl <<'EOF'
|
|
178
|
+
{"requestId":"queued-1","message":{"role":"user","content":[{"type":"text","text":"Ruff failed because generated fixtures were not formatted."}]}}
|
|
179
|
+
{"requestId":"queued-1","message":{"role":"assistant","content":[{"type":"text","text":"Running make format fixed the generated fixtures and the follow-up lint pass succeeded."}]}}
|
|
180
|
+
{"requestId":"queued-2","message":{"role":"user","content":[{"type":"text","text":"Please preserve that debugging note for the next session."}]}}
|
|
181
|
+
{"requestId":"queued-2","message":{"role":"assistant","content":[{"type":"text","text":"I will capture that this failure mode is resolved by re-running make format before lint."}]}}
|
|
182
|
+
EOF
|
|
183
|
+
|
|
184
|
+
SESSION_ID=$(cat .forge/sessions/test-session-1/forge.session.json | jq -r '.confirmed.claude_session_id')
|
|
185
|
+
BEFORE_LINES=$(wc -l < .forge/memory/debugging.md)
|
|
186
|
+
MARKER="${FORGE_HOME:-$HOME/.forge}/pending-work/handoff-${SESSION_ID}.json"
|
|
187
|
+
|
|
188
|
+
STOP_INPUT=$(jq -nc \
|
|
189
|
+
--arg sid "$SESSION_ID" \
|
|
190
|
+
--arg transcript ".forge/walkthrough/handoff-queued-source.jsonl" \
|
|
191
|
+
'{hook_event_name:"Stop",session_id:$sid,transcript_path:$transcript}')
|
|
192
|
+
|
|
193
|
+
STOP_OUTPUT=$(echo "$STOP_INPUT" | FORGE_SESSION=test-session-1 forge hook stop)
|
|
194
|
+
echo "$STOP_OUTPUT" | jq '.'
|
|
195
|
+
echo "$STOP_OUTPUT" | jq -e '.queued_handoff == true'
|
|
196
|
+
|
|
197
|
+
test -f "$MARKER"
|
|
198
|
+
|
|
199
|
+
# Any later Forge CLI startup should process the queued marker and spawn handoff in the background.
|
|
200
|
+
forge session list >/tmp/handoff-queue-trigger.log
|
|
201
|
+
|
|
202
|
+
for _ in $(seq 1 30); do
|
|
203
|
+
AFTER_LINES=$(wc -l < .forge/memory/debugging.md)
|
|
204
|
+
if [ ! -f "$MARKER" ] && [ "$AFTER_LINES" -gt "$BEFORE_LINES" ]; then
|
|
205
|
+
break
|
|
206
|
+
fi
|
|
207
|
+
sleep 1
|
|
208
|
+
done
|
|
209
|
+
|
|
210
|
+
AFTER_LINES=$(wc -l < .forge/memory/debugging.md)
|
|
211
|
+
echo "before=$BEFORE_LINES after=$AFTER_LINES marker=$MARKER"
|
|
212
|
+
cat .forge/memory/debugging.md
|
|
213
|
+
|
|
214
|
+
test ! -f "$MARKER"
|
|
215
|
+
test "$AFTER_LINES" -gt "$BEFORE_LINES"
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
- [ ] Stop hook reports `queued_handoff: true`
|
|
219
|
+
- [ ] Handoff marker is created under `~/.forge/pending-work/`
|
|
220
|
+
- [ ] A later Forge CLI startup processes the queued handoff automatically
|
|
221
|
+
- [ ] Background handoff updates the designated doc without a direct `forge handoff run`
|
|
222
|
+
- [ ] Pending handoff marker is gone after processing completes
|
|
223
|
+
|
|
224
|
+
---
|