multi-forge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. forge/__init__.py +3 -0
  2. forge/_extensions/agents/.gitkeep +0 -0
  3. forge/_extensions/commands/.gitkeep +0 -0
  4. forge/_extensions/skills/analyze/SKILL.md +87 -0
  5. forge/_extensions/skills/challenge/SKILL.md +91 -0
  6. forge/_extensions/skills/consensus/SKILL.md +120 -0
  7. forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
  8. forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
  9. forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
  10. forge/_extensions/skills/debate/SKILL.md +116 -0
  11. forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
  12. forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
  13. forge/_extensions/skills/panel/SKILL.md +141 -0
  14. forge/_extensions/skills/panel/resources/synthesis.md +103 -0
  15. forge/_extensions/skills/qa/SKILL.md +704 -0
  16. forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
  17. forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
  18. forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
  19. forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
  20. forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
  21. forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
  22. forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
  23. forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
  24. forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
  25. forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
  26. forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
  27. forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
  28. forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
  29. forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
  30. forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
  31. forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
  32. forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
  33. forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
  34. forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
  35. forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
  36. forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
  37. forge/_extensions/skills/qa/resources/checklist.md +103 -0
  38. forge/_extensions/skills/qa/resources/report-template.md +62 -0
  39. forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
  40. forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
  41. forge/_extensions/skills/review/SKILL.md +125 -0
  42. forge/_extensions/skills/review/references/claude-4.6.md +474 -0
  43. forge/_extensions/skills/review/references/claude-4.7.md +710 -0
  44. forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
  45. forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
  46. forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
  47. forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
  48. forge/_extensions/skills/review/resources/code-gemini.md +184 -0
  49. forge/_extensions/skills/review/resources/code-openai.md +203 -0
  50. forge/_extensions/skills/review/resources/code.md +160 -0
  51. forge/_extensions/skills/review-docs/SKILL.md +121 -0
  52. forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
  53. forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
  54. forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
  55. forge/_extensions/skills/review-docs/resources/docs.md +170 -0
  56. forge/_extensions/skills/smoke-test/SKILL.md +27 -0
  57. forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
  58. forge/_extensions/skills/understand/SKILL.md +148 -0
  59. forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
  60. forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
  61. forge/_extensions/skills/understand/resources/code-openai.md +181 -0
  62. forge/_extensions/skills/understand/resources/code.md +163 -0
  63. forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
  64. forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
  65. forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
  66. forge/_extensions/skills/understand/resources/docs.md +177 -0
  67. forge/_extensions/skills/walkthrough/SKILL.md +599 -0
  68. forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
  69. forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
  70. forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
  71. forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
  72. forge/backend/__init__.py +174 -0
  73. forge/backend/adapters/__init__.py +38 -0
  74. forge/backend/adapters/litellm.py +158 -0
  75. forge/backend/creation.py +89 -0
  76. forge/backend/registry.py +178 -0
  77. forge/cli/__init__.py +16 -0
  78. forge/cli/auth.py +483 -0
  79. forge/cli/backend.py +298 -0
  80. forge/cli/claude.py +411 -0
  81. forge/cli/config_cmd.py +303 -0
  82. forge/cli/extensions.py +1001 -0
  83. forge/cli/gc.py +165 -0
  84. forge/cli/guard.py +1018 -0
  85. forge/cli/guards.py +106 -0
  86. forge/cli/handoff.py +110 -0
  87. forge/cli/hooks/__init__.py +36 -0
  88. forge/cli/hooks/_group.py +20 -0
  89. forge/cli/hooks/_helpers.py +149 -0
  90. forge/cli/hooks/commands.py +1677 -0
  91. forge/cli/hooks/direct_commands.py +1304 -0
  92. forge/cli/hooks/install.py +232 -0
  93. forge/cli/hooks/policy.py +151 -0
  94. forge/cli/hooks/read_hygiene.py +74 -0
  95. forge/cli/hooks/verification.py +370 -0
  96. forge/cli/logs.py +406 -0
  97. forge/cli/main.py +292 -0
  98. forge/cli/proxy.py +1821 -0
  99. forge/cli/proxy_costs.py +313 -0
  100. forge/cli/search.py +416 -0
  101. forge/cli/session.py +892 -0
  102. forge/cli/session_addendum.py +81 -0
  103. forge/cli/session_fork.py +750 -0
  104. forge/cli/session_handoff.py +141 -0
  105. forge/cli/session_lifecycle.py +2053 -0
  106. forge/cli/session_manage.py +1336 -0
  107. forge/cli/session_memory.py +201 -0
  108. forge/cli/status_line.py +1398 -0
  109. forge/cli/workflow.py +1964 -0
  110. forge/config/__init__.py +110 -0
  111. forge/config/dataclass_utils.py +88 -0
  112. forge/config/defaults/__init__.py +0 -0
  113. forge/config/defaults/backends/__init__.py +0 -0
  114. forge/config/defaults/backends/litellm.yaml +196 -0
  115. forge/config/defaults/templates/__init__.py +0 -0
  116. forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
  117. forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
  118. forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
  119. forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
  120. forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
  121. forge/config/defaults/templates/litellm-gemini.yaml +21 -0
  122. forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
  123. forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
  124. forge/config/defaults/templates/litellm-openai.yaml +28 -0
  125. forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
  126. forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
  127. forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
  128. forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
  129. forge/config/defaults/templates/openrouter-glm.yaml +23 -0
  130. forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
  131. forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
  132. forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
  133. forge/config/defaults/templates/openrouter-openai.yaml +28 -0
  134. forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
  135. forge/config/loader.py +675 -0
  136. forge/config/schema.py +448 -0
  137. forge/core/__init__.py +5 -0
  138. forge/core/auth/__init__.py +67 -0
  139. forge/core/auth/capabilities.py +219 -0
  140. forge/core/auth/credentials_file.py +244 -0
  141. forge/core/auth/protocols.py +18 -0
  142. forge/core/auth/secrets.py +243 -0
  143. forge/core/auth/template_secrets.py +112 -0
  144. forge/core/data/__init__.py +5 -0
  145. forge/core/data/model_catalog.yaml +1522 -0
  146. forge/core/data/pricing.yaml +140 -0
  147. forge/core/data/system_prompt_addendums/__init__.py +0 -0
  148. forge/core/data/system_prompt_addendums/gemini.md +330 -0
  149. forge/core/data/system_prompt_addendums/openai.md +328 -0
  150. forge/core/llm/__init__.py +231 -0
  151. forge/core/llm/clients/__init__.py +14 -0
  152. forge/core/llm/clients/base.py +115 -0
  153. forge/core/llm/clients/litellm.py +619 -0
  154. forge/core/llm/clients/openai_compat.py +244 -0
  155. forge/core/llm/clients/openrouter.py +234 -0
  156. forge/core/llm/credentials.py +439 -0
  157. forge/core/llm/detection.py +86 -0
  158. forge/core/llm/errors.py +44 -0
  159. forge/core/llm/protocols.py +80 -0
  160. forge/core/llm/types.py +176 -0
  161. forge/core/logging.py +146 -0
  162. forge/core/models/__init__.py +91 -0
  163. forge/core/models/catalog.py +467 -0
  164. forge/core/models/pricing.py +165 -0
  165. forge/core/models/types.py +167 -0
  166. forge/core/naming.py +212 -0
  167. forge/core/ops/__init__.py +73 -0
  168. forge/core/ops/context.py +141 -0
  169. forge/core/ops/gc.py +802 -0
  170. forge/core/ops/proxy.py +146 -0
  171. forge/core/ops/resolution.py +135 -0
  172. forge/core/ops/session.py +344 -0
  173. forge/core/ops/session_context.py +548 -0
  174. forge/core/paths.py +38 -0
  175. forge/core/process.py +54 -0
  176. forge/core/reactive/__init__.py +38 -0
  177. forge/core/reactive/cost_tracking.py +300 -0
  178. forge/core/reactive/env.py +180 -0
  179. forge/core/reactive/proxy.py +78 -0
  180. forge/core/reactive/routing.py +622 -0
  181. forge/core/reactive/session_runner.py +185 -0
  182. forge/core/reactive/structured_output.py +62 -0
  183. forge/core/reactive/tagger.py +94 -0
  184. forge/core/reactive/throttle.py +132 -0
  185. forge/core/state/__init__.py +59 -0
  186. forge/core/state/exceptions.py +59 -0
  187. forge/core/state/io.py +140 -0
  188. forge/core/state/lock.py +99 -0
  189. forge/core/state/timestamps.py +60 -0
  190. forge/core/transcript.py +78 -0
  191. forge/core/typing_helpers.py +24 -0
  192. forge/core/workqueue/__init__.py +67 -0
  193. forge/core/workqueue/queue.py +552 -0
  194. forge/core/workqueue/types.py +63 -0
  195. forge/guard/__init__.py +26 -0
  196. forge/guard/deterministic/__init__.py +26 -0
  197. forge/guard/deterministic/base.py +158 -0
  198. forge/guard/deterministic/coding_standards.py +256 -0
  199. forge/guard/deterministic/registry.py +148 -0
  200. forge/guard/deterministic/tdd.py +171 -0
  201. forge/guard/engine.py +216 -0
  202. forge/guard/protocols.py +91 -0
  203. forge/guard/queries.py +96 -0
  204. forge/guard/semantic/__init__.py +34 -0
  205. forge/guard/semantic/promotion.py +18 -0
  206. forge/guard/semantic/supervisor.py +813 -0
  207. forge/guard/semantic/verdict.py +183 -0
  208. forge/guard/store.py +124 -0
  209. forge/guard/team/__init__.py +6 -0
  210. forge/guard/team/config.py +24 -0
  211. forge/guard/team/handlers.py +209 -0
  212. forge/guard/team/prompts.py +41 -0
  213. forge/guard/types.py +125 -0
  214. forge/guard/workflow/__init__.py +17 -0
  215. forge/guard/workflow/branches.py +67 -0
  216. forge/guard/workflow/config.py +63 -0
  217. forge/guard/workflow/divergence.py +113 -0
  218. forge/guard/workflow/policy.py +87 -0
  219. forge/guard/workflow/stages.py +205 -0
  220. forge/install/__init__.py +55 -0
  221. forge/install/cli.py +281 -0
  222. forge/install/exceptions.py +163 -0
  223. forge/install/hooks.py +109 -0
  224. forge/install/installer.py +1037 -0
  225. forge/install/models.py +321 -0
  226. forge/install/preset.py +272 -0
  227. forge/install/settings_merge.py +831 -0
  228. forge/install/tracking.py +238 -0
  229. forge/install/version.py +141 -0
  230. forge/proxy/__init__.py +0 -0
  231. forge/proxy/base_client.py +181 -0
  232. forge/proxy/client_adapter.py +476 -0
  233. forge/proxy/client_factory.py +531 -0
  234. forge/proxy/converters.py +1206 -0
  235. forge/proxy/cost_logger.py +132 -0
  236. forge/proxy/cost_tracker.py +242 -0
  237. forge/proxy/data_models.py +338 -0
  238. forge/proxy/error_hints.py +92 -0
  239. forge/proxy/metrics.py +222 -0
  240. forge/proxy/model_spec.py +158 -0
  241. forge/proxy/proxies.py +333 -0
  242. forge/proxy/proxy_identity.py +134 -0
  243. forge/proxy/proxy_orchestrator.py +1018 -0
  244. forge/proxy/proxy_startup.py +54 -0
  245. forge/proxy/server.py +1561 -0
  246. forge/proxy/utils.py +537 -0
  247. forge/review/__init__.py +6 -0
  248. forge/review/adversarial.py +111 -0
  249. forge/review/consensus.py +236 -0
  250. forge/review/engine.py +356 -0
  251. forge/review/models.py +437 -0
  252. forge/review/resources/__init__.py +5 -0
  253. forge/review/resources/codereview-performance.md +85 -0
  254. forge/review/resources/codereview-quick.md +75 -0
  255. forge/review/resources/codereview-security.md +92 -0
  256. forge/review/resources/codereview.md +85 -0
  257. forge/review/resources/docreview-quick.md +75 -0
  258. forge/review/resources/docreview.md +86 -0
  259. forge/review/resources/thinkdeep.md +89 -0
  260. forge/review/routing.py +368 -0
  261. forge/review/synthesis.py +73 -0
  262. forge/runtime_config.py +438 -0
  263. forge/search/__init__.py +55 -0
  264. forge/search/bm25_store.py +264 -0
  265. forge/search/content_store.py +197 -0
  266. forge/search/engine.py +352 -0
  267. forge/search/exceptions.py +51 -0
  268. forge/search/extractor.py +234 -0
  269. forge/search/index_state.py +295 -0
  270. forge/search/store.py +215 -0
  271. forge/search/tokenizer.py +24 -0
  272. forge/session/__init__.py +130 -0
  273. forge/session/active.py +339 -0
  274. forge/session/artifacts.py +202 -0
  275. forge/session/claude/__init__.py +50 -0
  276. forge/session/claude/cleanup.py +105 -0
  277. forge/session/claude/invoke.py +236 -0
  278. forge/session/claude/paths.py +200 -0
  279. forge/session/cleanup.py +216 -0
  280. forge/session/config.py +34 -0
  281. forge/session/direct_model.py +107 -0
  282. forge/session/effective.py +169 -0
  283. forge/session/exceptions.py +255 -0
  284. forge/session/handoff.py +881 -0
  285. forge/session/handoff_agent.py +544 -0
  286. forge/session/hooks/__init__.py +35 -0
  287. forge/session/hooks/models.py +73 -0
  288. forge/session/hooks/session_start.py +507 -0
  289. forge/session/identity.py +84 -0
  290. forge/session/index.py +553 -0
  291. forge/session/manager.py +1506 -0
  292. forge/session/models.py +572 -0
  293. forge/session/overrides.py +344 -0
  294. forge/session/plan_resolution.py +286 -0
  295. forge/session/prev_sessions.py +128 -0
  296. forge/session/store.py +431 -0
  297. forge/session/validation.py +47 -0
  298. forge/session/worktree/__init__.py +65 -0
  299. forge/session/worktree/cleanup.py +262 -0
  300. forge/session/worktree/config_copy.py +203 -0
  301. forge/session/worktree/create.py +332 -0
  302. forge/sidecar/__init__.py +29 -0
  303. forge/sidecar/container.py +161 -0
  304. forge/sidecar/docker.py +86 -0
  305. forge/sidecar/secrets.py +19 -0
  306. multi_forge-0.2.0.dist-info/METADATA +242 -0
  307. multi_forge-0.2.0.dist-info/RECORD +311 -0
  308. multi_forge-0.2.0.dist-info/WHEEL +4 -0
  309. multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
  310. multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
  311. multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,599 @@
1
+ ---
2
+ name: forge:walkthrough
3
+ description: Interactive Forge verification walkthrough in a hermetic test environment. Use after installing or upgrading Forge to verify everything works.
4
+ disable-model-invocation: true
5
+ argument-hint: '[--setup-only] [--reset] [--report] [--sidecar]'
6
+ allowed-tools: Read, Bash, Glob # AskUserQuestion deliberately omitted — listing it triggers CC auto-approve bug (github.com/anthropics/claude-code/issues/29547). The tool remains available; omitting it preserves the interactive dialog.
7
+ ---
8
+
9
+ # Walkthrough
10
+
11
+ Interactive verification of Forge installation and features in an isolated test environment. Your real `~/.claude/` is
12
+ never touched.
13
+
14
+ ## Usage
15
+
16
+ ```
17
+ /forge:walkthrough Interactive walkthrough (default)
18
+ /forge:walkthrough --setup-only Create/reset test repo, then stop
19
+ /forge:walkthrough --reset Reset test repo to clean baseline
20
+ /forge:walkthrough --report Save run artifacts (report, state, logs, transcript)
21
+ /forge:walkthrough --sidecar Include sidecar section (requires Docker)
22
+ ```
23
+
24
+ ## Arguments
25
+
26
+ | Argument | Description |
27
+ | -------------- | ---------------------------------------------------------------------------------------------------------------------------- |
28
+ | `--setup-only` | Create or reset the test repo and generate env.sh, then stop. |
29
+ | `--reset` | Reset test repo to clean baseline before running. |
30
+ | `--report` | Save report, state, step logs, Forge debug logs, and transcript marker to a timestamped run directory after the walkthrough. |
31
+ | `--sidecar` | Include sidecar section (section 12). Requires Docker + sidecar image. |
32
+
33
+ ## Execution
34
+
35
+ Follow these steps in order. Do not skip steps.
36
+
37
+ ### Step 1: Parse Arguments and Route
38
+
39
+ Parse `$ARGUMENTS` to extract flags: `--setup-only`, `--reset`, `--report`, `--sidecar`. Track them as booleans
40
+ (`SETUP_ONLY`, `RESET`, `REPORT`, `SIDECAR`) for later phases.
41
+
42
+ **Greet the user:**
43
+
44
+ "I'll walk you through a functional verification of Forge in an isolated test environment. This is **Session A** — we
45
+ work together here. I run automated checks, you watch and ask questions. Later, I'll ask you to open a **Terminal** for
46
+ hands-on commands, and then launch **Session B** — a separate Claude Code instance where you experiment with Forge
47
+ features (hooks, status line, % commands) while I stay here to guide you. I'll install Forge extensions into a hermetic
48
+ sandbox, verify your real `~/.claude/` was not touched, then clean up."
49
+
50
+ If `--setup-only`: "I'll create the isolated test environment and stop -- no tests will run."
51
+
52
+ If `--report`: add "I'll also capture raw step output plus sandbox Forge debug logs and save them with the report when
53
+ we finish."
54
+
55
+ ### Step 2: Walkthrough Mode
56
+
57
+ The walkthrough is a **checklist-driven** interactive demo. You read `checklist.md` section by section, run commands
58
+ through `run-in-repo.sh`, and check assertions. The checklist defines what to run and check; you provide educational
59
+ narration and handle user interactions.
60
+
61
+ **Safety rule:** ALL `forge` CLI invocations MUST go through `run-in-repo.sh` -- even seemingly read-only ones like
62
+ `forge info` can write caches or state files to the real system. Only pure filesystem reads (`ls`, `cat`, `stat`,
63
+ `python3` for reading files, the Read tool) are safe to run directly. NEVER construct raw `forge` commands outside the
64
+ wrapper.
65
+
66
+ #### Phase 1: Setup
67
+
68
+ **Set the setup script** from the skill's own location:
69
+
70
+ ```bash
71
+ SETUP_SCRIPT="${CLAUDE_SKILL_DIR}/scripts/setup-test-repo.sh"
72
+ ```
73
+
74
+ **Handle special modes** before proceeding:
75
+
76
+ - `--setup-only`: run `bash "$SETUP_SCRIPT"` (add `--reset` if that flag is also set), print the env file path, and
77
+ stop. No checklist execution.
78
+ - `--reset` (without `--setup-only`): run `bash "$SETUP_SCRIPT" --reset`, then continue to the walkthrough.
79
+
80
+ **Set the scripts directory** from the skill's own location:
81
+
82
+ ```bash
83
+ SCRIPTS="${CLAUDE_SKILL_DIR}/scripts"
84
+ ```
85
+
86
+ **Resolve `$FORGE_TEST_REPO`**: use the env var if set, otherwise default to
87
+ `~/.forge/manual-testing/walkthrough/test-repo` (or `$FORGE_HOME/manual-testing/walkthrough/test-repo` if `FORGE_HOME`
88
+ is set).
89
+
90
+ **Check for stale install artifacts**: If `$FORGE_TEST_REPO/.claude/commands/` exists (leftover from a previous run),
91
+ ask the user: "Previous walkthrough artifacts detected. Reset the test repo?" If yes, run
92
+ `bash "$SETUP_SCRIPT" --reset`.
93
+
94
+ The setup script already scrubs walkthrough-derived volatile state on reruns (`.forge/artifacts/`,
95
+ `.forge/search-index/`, and `.forge-home/logs`), so a full reset is only needed when installed extensions or repo
96
+ contents drift.
97
+
98
+ **Ensure test repo exists** (for Phase 2 state init):
99
+
100
+ ```bash
101
+ # First run: create the test repo (Section 0.2 will re-run idempotently with tracked assertions)
102
+ # Re-run: setup script preserves the repo baseline and scrubs volatile walkthrough state
103
+ if [ ! -f "$FORGE_TEST_REPO/.forge-walkthrough-marker" ]; then
104
+ bash "$SETUP_SCRIPT"
105
+ fi
106
+ mkdir -p "$FORGE_TEST_REPO/.forge/walkthrough"
107
+ ```
108
+
109
+ **Resolve host-side walkthrough artifact paths**. These live outside the sandboxed `FORGE_HOME` used by
110
+ `run-in-repo.sh`, so reports from Session A stay under the user's normal manual-testing directory:
111
+
112
+ ```bash
113
+ WT_STATE_DIR_RAW="${FORGE_HOME:-$HOME/.forge}/manual-testing/walkthrough"
114
+ WT_STATE_DIR=$(python3 -c 'import os,sys; print(os.path.abspath(os.path.expanduser(os.path.expandvars(sys.argv[1]))))' "$WT_STATE_DIR_RAW")
115
+ WT_STEP_LOGS_DIR="$WT_STATE_DIR/logs"
116
+ WT_FORGE_LOG_SNAPSHOTS="$WT_STATE_DIR/forge-logs-snapshots"
117
+ ```
118
+
119
+ If `--report` was passed, clear any previous run-local step logs / snapshots before execution:
120
+
121
+ ```bash
122
+ if [ "$REPORT" = true ]; then
123
+ rm -rf "$WT_STEP_LOGS_DIR" "$WT_FORGE_LOG_SNAPSHOTS"
124
+ mkdir -p "$WT_STEP_LOGS_DIR" "$WT_FORGE_LOG_SNAPSHOTS"
125
+ fi
126
+ ```
127
+
128
+ #### Phase 1b: Docker Infrastructure Probe (only if `--sidecar`)
129
+
130
+ If `--sidecar` was passed, probe Docker availability before building the checklist index. If `--sidecar` was NOT passed,
131
+ skip this entirely (no Docker dependency for the default walkthrough).
132
+
133
+ ```bash
134
+ # 1. Resolve sidecar image from runtime config (respects user overrides)
135
+ SIDECAR_IMAGE=$(bash "$SCRIPTS/run-in-repo.sh" forge config show --raw 2>/dev/null \
136
+ | grep '^sidecar_image:' | awk '{print $2}')
137
+ SIDECAR_IMAGE="${SIDECAR_IMAGE:-forge-sidecar:latest}"
138
+ ```
139
+
140
+ Store `$SIDECAR_IMAGE` via `walkthrough-state.py var set SIDECAR_IMAGE <value>` for use in checklist variable
141
+ substitution.
142
+
143
+ ```bash
144
+ # 2. Probe Docker daemon + image
145
+ docker info --format '{{.ServerVersion}}' >/dev/null 2>&1 && \
146
+ docker image inspect "$SIDECAR_IMAGE" --format '{{.Id}}' >/dev/null 2>&1 && \
147
+ echo "true" || echo "false"
148
+ ```
149
+
150
+ Store result via `walkthrough-state.py var set INFRA_DOCKER <true|false>`.
151
+
152
+ #### Phase 2: Build Checklist Index
153
+
154
+ **Set the walkthrough checklist** from the skill's own location:
155
+
156
+ ```bash
157
+ CHECKLIST="${CLAUDE_SKILL_DIR}/resources/checklist.md"
158
+ ```
159
+
160
+ Run the checklist parser to get the full structure:
161
+
162
+ ```bash
163
+ python3 "$SCRIPTS/walkthrough-state.py" "$CHECKLIST" index
164
+ ```
165
+
166
+ This returns JSON with all sections, subsections, annotations, and assertion counts. Store this as the checklist index.
167
+
168
+ Initialize progress tracking (always `--force` -- this is the start of a fresh run):
169
+
170
+ ```bash
171
+ python3 "$SCRIPTS/walkthrough-state.py" "$CHECKLIST" init --force "$FORGE_TEST_REPO/.forge/walkthrough/progress.json"
172
+ ```
173
+
174
+ Store the state file path as `$STATE_FILE` for Phase 3.
175
+
176
+ #### Phase 3: Execute Checklist (Main Loop)
177
+
178
+ For each subsection in the index, get its details:
179
+
180
+ ```bash
181
+ python3 "$SCRIPTS/walkthrough-state.py" "$CHECKLIST" step <N.X>
182
+ ```
183
+
184
+ This returns JSON with:
185
+
186
+ - `annotation` / `annotations`: step type(s)
187
+ - `prereqs`: prerequisite step/section IDs, if any
188
+ - `code_blocks`: list of `{code, runnable}` objects -- run entries where `runnable` is `true`; show others as
189
+ display-only
190
+ - `instructions`: prose for the user (human:guided items)
191
+ - `assertions`: list of assertion texts to verify
192
+ - `assertion_count`: number of assertions (deterministic -- do not count manually)
193
+ - `next`: ID of the next step (or null if last)
194
+
195
+ 01. **For each step**, call the parser to get its details. The parser handles all markdown parsing -- the agent never
196
+ reads raw checklist markdown during execution.
197
+
198
+ **Before presenting the step**, check prerequisites:
199
+
200
+ ```bash
201
+ python3 "$SCRIPTS/walkthrough-state.py" "$CHECKLIST" prereq-check "$STATE_FILE" <N.X>
202
+ ```
203
+
204
+ If `ok` is `false`, do **not** run or ask about the blocked step. Render it as skipped, include a short reason such
205
+ as `Skipped -- blocked by prereq: 7.1 (skipped)` or `Skipped -- blocked by prereq: 10.1 (not_run)`, record all its
206
+ assertions as `s`, and continue. A skipped prerequisite is blocking; do **not** treat it as success.
207
+
208
+ 02. **Annotations** map to step types. Never show raw HTML comments in output.
209
+
210
+ | Annotation | Step type | Preamble |
211
+ | ------------------------ | ------------- | -------------------------------------------------------------- |
212
+ | `<!-- auto -->` | `[Automatic]` | "Automatic step -- sit back while I check a few things." |
213
+ | `<!-- human:confirm -->` | `[Review]` | "I'll run this and show you the output for review." |
214
+ | `<!-- human:guided -->` | `[Hands-on]` | "Your turn -- here's what to do in your Terminal / Session B." |
215
+
216
+ 03. **Step presentation format**: Every subsection follows a visual pattern so progress is easy to scan.
217
+
218
+ **Glue calls are silent.** The `walkthrough-state.py step`, `record`, and `var` calls between steps are bookkeeping.
219
+ Do NOT print commentary around them -- just call the tool and move on. The user should see a clean flow of steps
220
+ without JSON output or "now let me fetch the next step" narration.
221
+
222
+ **Step layout:**
223
+
224
+ ```
225
+ --- N.X Step Title [Type] -------------------------
226
+ <preamble from annotation table above>
227
+
228
+ <body: commands, output, or instructions>
229
+
230
+ Results:
231
+ ✔ First assertion passed
232
+ ✘ Second assertion FAILED: reason
233
+ o Third assertion skipped
234
+ ----------------------------------------------------
235
+ ```
236
+
237
+ **`[Hands-on]` body template** -- guided steps use a fixed inner layout so every run looks the same:
238
+
239
+ ```
240
+ --- N.X Step Title [Hands-on] -------------------------
241
+ Your turn -- here's what to do in your Terminal / Session B.
242
+
243
+ In your Terminal (or Session B):
244
+
245
+ 1. First action
246
+ ```
247
+
248
+ command-to-run
249
+
250
+ ```
251
+
252
+ 2. Second action
253
+ ```
254
+
255
+ another-command
256
+
257
+ ```
258
+
259
+ Expected:
260
+ - First assertion text from checklist
261
+ - Second assertion text from checklist
262
+
263
+ If something goes wrong: <failure cue from checklist, if any>
264
+
265
+ Review the instructions above, then answer below.
266
+
267
+
268
+
269
+ <AskUserQuestion>
270
+ ```
271
+
272
+ Rules for the template:
273
+
274
+ - **"In your Terminal:"** (or **"In Session B:"** for live Claude steps) -- always anchor where
275
+ - **Numbered steps** with flush-left code blocks -- no indentation so copy-paste has no leading spaces
276
+ - **"Expected:"** bullet list pulled from the checklist assertions -- tells the user what to look for
277
+ - **Failure cue** line only if the checklist includes one
278
+ - Never rephrase checklist instructions as prose -- copy the structure, fill in runtime values
279
+ - The buffer line and blank lines before AskUserQuestion are mandatory (rule 9)
280
+
281
+ **Section boundaries** appear between sections (not between steps within a section):
282
+
283
+ ```
284
+ Section N Complete: X/Y passed
285
+
286
+ <educational narration from narration table>
287
+
288
+ ====================================================
289
+
290
+ --- M.1 First Step [Type] -------------------------
291
+ ```
292
+
293
+ Use `---` (thin) for step boundaries, `===` (thick) as a single separator line between sections. This gives the user
294
+ a clear visual hierarchy: sections are major milestones, steps are work items within them.
295
+
296
+ Use ✔ for pass, ✘ for fail, o for skip. Each `- [ ]` line in the checklist = one result line. Include a brief note
297
+ in brackets when useful (e.g., `V run-in-repo.sh found [needed for sandbox isolation]`).
298
+
299
+ 04. **Handle by annotation type**:
300
+
301
+ | Annotation | Action |
302
+ | ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
303
+ | `<!-- auto -->` | Run bash block (with variable substitution). Check assertions against output. Show results block. |
304
+ | `<!-- human:confirm -->` | Run bash block via wrapper, show output to user. Use AskUserQuestion: "Does this look correct?" (Pass / Fail / Skip). Show results block. |
305
+ | `<!-- human:guided -->` | Show instructions and bash snippet from the checklist. Do NOT run the bash block yourself. Use AskUserQuestion with context-appropriate framing (see rule 8). Show results block. |
306
+ | `<!-- requires: X -->` | Check infrastructure probe result for `X`. Skip if unavailable (see below). |
307
+ | No annotation | Treat as `<!-- human:confirm -->`. |
308
+
309
+ A subsection can have multiple annotations. Apply all that match. `requires` is checked first (skip before
310
+ attempting anything else).
311
+
312
+ **`requires:` parsing**: The parser returns annotations as raw strings (e.g., `"requires: docker"`). To handle them:
313
+
314
+ 1. Check `annotations[]` for any string starting with `requires:`.
315
+ 2. Extract the requirement name after the colon (e.g., `docker`).
316
+ 3. Look up `INFRA_<NAME>` (uppercased) via `walkthrough-state.py var get` (e.g., `INFRA_DOCKER`).
317
+ 4. If the value is `false` (or the variable doesn't exist), skip the subsection: show `[Skipped -- requires: X]` and
318
+ record all its assertions as `s` (skip).
319
+
320
+ The sidecar section (section 12) uses `<!-- requires: docker -->`. The `INFRA_DOCKER` probe is set in Phase 1b (only
321
+ when `--sidecar` is passed).
322
+
323
+ **`prereq:` handling**: Prerequisites are not step types; they come back in `prereqs[]` and are checked with the
324
+ `prereq-check` command above. The walkthrough uses them to skip Session B-dependent sections cleanly when Session B
325
+ was not launched, and to skip Search follow-up steps when the user chose not to exit Session B.
326
+
327
+ 05. **Variable substitution**: Replace these variables in bash blocks before running:
328
+
329
+ | Variable | Source |
330
+ | ------------------ | ------------------------------------------------- |
331
+ | `$SCRIPTS` | Resolved scripts directory (Phase 1) |
332
+ | `$SETUP_SCRIPT` | Resolved setup script path (Phase 1) |
333
+ | `$FORGE_TEST_REPO` | Resolved test repo path (Phase 1) |
334
+ | `$PROXY_ID` | Captured from section 6.1 proxy creation output |
335
+ | `$PROXY_BASE_URL` | Captured from section 6.1 proxy creation output |
336
+ | `$SIDECAR_IMAGE` | Resolved sidecar image name (Phase 1b, if probed) |
337
+
338
+ When a command outputs a proxy ID or base URL, persist it in the state file:
339
+
340
+ ```bash
341
+ python3 "$SCRIPTS/walkthrough-state.py" "$CHECKLIST" var "$STATE_FILE" set PROXY_ID <value>
342
+ ```
343
+
344
+ Retrieve with `var ... get PROXY_ID` when needed for substitution.
345
+
346
+ For blocks that start with `bash "$SCRIPTS/run-in-repo.sh"`, the wrapper handles CWD and env. For blocks without the
347
+ wrapper prefix (e.g., python3 mtime snapshots, `ls`, `test`), run directly -- these are read-only host operations.
348
+
349
+ 06. **Executing code blocks**: For each entry in the parser's `code_blocks` array where `runnable` is `true`, run `code`
350
+ as **one** Bash tool call. A single fenced block = one call, even if it spans multiple lines (e.g.,
351
+ `python3 -c "..."`). Entries where `runnable` is `false` are display-only snippets -- show them to the user in
352
+ `human:guided` steps but do not execute them.
353
+
354
+ **Default debug logging**: the walkthrough sandbox exports `FORGE_DEBUG=1` via `.forge/walkthrough/env.sh`, so Forge
355
+ commands write debug logs to `$FORGE_TEST_REPO/.forge-home/logs/...`.
356
+
357
+ **Before a block that contains `forge logs --clean`** and only when `--report` is enabled, snapshot the current
358
+ sandbox Forge logs so evidence survives the cleanup step:
359
+
360
+ ```bash
361
+ SNAP="$WT_FORGE_LOG_SNAPSHOTS/N.X/pre-clean"
362
+ rm -rf "$SNAP"
363
+ if [ -d "$FORGE_TEST_REPO/.forge-home/logs" ]; then
364
+ mkdir -p "$SNAP"
365
+ cp -R "$FORGE_TEST_REPO/.forge-home/logs/." "$SNAP"/
366
+ fi
367
+ ```
368
+
369
+ **When `--report` is enabled**, save raw command output to a per-step host-side log file:
370
+
371
+ ```bash
372
+ mkdir -p "$WT_STEP_LOGS_DIR"
373
+ cat > "$WT_STEP_LOGS_DIR/N.X.log" <<'EOF'
374
+ <raw output>
375
+ EOF
376
+ ```
377
+
378
+ **After classifying each step's assertions**, record results in the state file:
379
+
380
+ ```bash
381
+ python3 "$SCRIPTS/walkthrough-state.py" "$CHECKLIST" record "$STATE_FILE" <N.X> <results>
382
+ ```
383
+
384
+ Where `<results>` is comma-separated: `p` (pass), `f` (fail), `s` (skip) -- one per assertion. Example:
385
+ `record "$STATE_FILE" 6.1 p,p` for a step where both assertions passed. The output shows progress:
386
+ `6.1: 2/2 pass | Section 6: 2/7 | Overall: 27/51`.
387
+
388
+ 07. **Flag gate** -- If `--sidecar` was NOT passed, skip section 12 (Sidecar) entirely. Record all its assertions as `s`
389
+ (skip) and move directly to section 13 (Cleanup).
390
+
391
+ 08. **Gate rules** -- check after each section completes:
392
+
393
+ | If section fails... | Then... |
394
+ | ------------------- | --------------------------------- |
395
+ | 0 (Setup) | Stop. Setup is broken. |
396
+ | 2 (Install) | Skip Section 3 (can't verify). |
397
+ | 6 (Proxy/Session) | Skip Sections 7-11 (no proxy). |
398
+ | Any section | Section 13 (Cleanup) always runs. |
399
+
400
+ 09. **For `human:guided` items**: CRITICAL -- print the full instructions and bash snippet from the checklist **before**
401
+ calling AskUserQuestion. Do **not** end immediately on the last instruction line or code fence: Claude Code's dialog
402
+ overlays the bottom few terminal lines. After the real instructions, print one short disposable buffer line such as
403
+ `Review the instructions above, then answer below.` and then print **at least three blank lines** before calling
404
+ AskUserQuestion. Treat that buffer line and blank space as sacrificial padding. The user must see what to do BEFORE
405
+ being asked to confirm. The instructions appear in the step body between the opening preamble and the
406
+ AskUserQuestion call. If you put instructions after the question, the user sees only the question with no context.
407
+
408
+ **Match question framing and options to the step type:**
409
+
410
+ | Step asks user to... | Question style | Options |
411
+ | --------------------------------- | ------------------------------- | ---------------------------------- |
412
+ | Perform an action (open, launch) | "Have you [action]?" | Done / Skip / Stop walkthrough |
413
+ | Verify something (status, output) | "[Expected result] visible?" | Yes / No, something's wrong / Skip |
414
+ | Both (run command + check result) | "Did [expected result] appear?" | Yes / No, something's wrong / Skip |
415
+
416
+ Keep the AskUserQuestion prompt itself short enough to fit on one line when possible. Put detail in the printed
417
+ instructions, not in the dialog. Don't use "Done" as an answer to a yes/no question. "Did %help show commands?"
418
+ needs Yes/No, not Done.
419
+
420
+ The user acts in their Terminal window or Session B. If they choose "Stop walkthrough", skip all remaining sections
421
+ and go to Phase 4 (Summary).
422
+
423
+ **Do not invent Claude availability failures**: For guided steps that involve a live Claude Code session
424
+ (`forge claude start`, `forge session start`, Session B, status line checks, `%` commands, etc.), do **not**
425
+ recommend "Skip" merely because the agent cannot drive the TUI itself. Recommend "Skip" only when you have concrete
426
+ evidence that live Claude launching is unavailable:
427
+
428
+ - A direct probe fails, for example:
429
+
430
+ ```bash
431
+ command -v claude >/dev/null 2>&1
432
+ ```
433
+
434
+ - The user reports an actual launch failure such as `claude: command not found`.
435
+
436
+ If the current walkthrough already contains evidence that Claude launched successfully, treat live Claude as
437
+ available and continue guiding the user instead of steering them toward `Skip`.
438
+
439
+ 10. **Educational narration**: after each `## N.` section completes, print a brief explanation:
440
+
441
+ | After Section | Say |
442
+ | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
443
+ | 0 (Setup) | "Test repo ready. Your real `~/.claude/` timestamps are recorded as a baseline." |
444
+ | 1 (Terminal) | "You now have a sandboxed terminal. Commands there target the test repo, not your real system." |
445
+ | 2 (Install) | "Extensions installed. The wrapper enforced 4 safety gates before running the install." |
446
+ | 3 (Verify) | "Hooks, skills, commands all landed correctly. Pre-existing settings survived the install." |
447
+ | 4 (Untouched) | "Real system confirmed untouched -- all timestamps match the baseline." |
448
+ | 5 (CLI) | "You've seen the Forge CLI surface -- sessions, proxies, config, guard, all managed through `forge`." |
449
+ | 6 (Proxy/Session) | "Proxies route API calls; sessions track your workspace. Together they let you switch models without changing code." |
450
+ | 7 (Session B) | "A live Claude session with Forge hooks, status line, and % commands active." |
451
+ | 8 (% Commands) | "Direct commands let you control Forge from inside a Claude session without leaving the conversation." |
452
+ | 9 (Guard Policy) | "The guard enforces coding policies at tool boundaries. Deny messages include intent (why the policy exists) so models comply with the goal, not just the check." |
453
+ | 10 (Search) | "Search indexes your session transcripts for later retrieval. The BM25 engine works per-project -- no external service needed." |
454
+ | 11 (Session State) | "The session manifest captures intent (what you wanted), overrides (live changes), and confirmed (what hooks observed). Forking shows how sessions derive from each other." |
455
+ | 12 (Sidecar) | "Sidecar bundles proxy + Claude in Docker -- lifecycle coupling, port isolation, no host proxy needed." |
456
+ | 13 (Cleanup) | "Sandbox cleaned. Everything removed, real system still pristine." |
457
+
458
+ #### Common Mistakes (DON'T)
459
+
460
+ - **DON'T count assertions manually.** Use `walkthrough-state.py record` and `report` for all counting. LLMs get
461
+ arithmetic wrong.
462
+ - **DON'T combine multiple Bash commands in one call.** Run each `code_blocks` entry as a separate Bash call. Piped
463
+ multi-command blocks fail silently in the Bash tool.
464
+ - **DON'T put instructions after AskUserQuestion.** The user sees the question modal immediately -- anything you print
465
+ after it appears below their answer, not above the question. Print instructions BEFORE the tool call.
466
+ - **DO add a real visual buffer before AskUserQuestion.** Use a short sacrificial buffer line plus at least three blank
467
+ lines so the dialog covers padding, not the instructions or command snippet.
468
+ - **DON'T assume Claude Code is unavailable without evidence.** For `human:guided` live-session steps, only recommend
469
+ `Skip` after a real failed probe (`command -v claude`) or an actual user-reported launch error.
470
+ - **DON'T invent CLI commands.** Run ONLY commands from the checklist's `code_blocks`. If a command doesn't exist, the
471
+ walkthrough will show a confusing error.
472
+ - **DON'T use `$HOME` in Bash tool calls.** Use fully resolved absolute paths (e.g.,
473
+ `/Users/.../.forge/manual-testing/walkthrough/test-repo` not `$HOME/.forge/manual-testing/walkthrough/test-repo`). The
474
+ Bash tool's environment may not expand shell variables reliably.
475
+ - **DON'T run `forge` commands without the wrapper.** Even `forge info` can write caches. Use `run-in-repo.sh` for
476
+ everything except pure filesystem reads (Read tool, Glob tool, `python3`, `test`, `ls`).
477
+ - **DON'T modify files during the walkthrough.** This skill has Read, Bash, and Glob only -- no Write or Edit. The
478
+ walkthrough is verification, not modification.
479
+ - **DON'T ignore script failures.** If `walkthrough-state.py` exits with a non-zero code, STOP. The error message on
480
+ stderr tells you what went wrong (count mismatch, hash drift, corrupt state). Do not proceed with stale data.
481
+
482
+ #### Phase 4: Summary
483
+
484
+ Get the final report from the state file:
485
+
486
+ ```bash
487
+ python3 "$SCRIPTS/walkthrough-state.py" "$CHECKLIST" report "$STATE_FILE"
488
+ ```
489
+
490
+ This returns JSON with per-section pass/fail/skip counts, failures list, gaps, and totals. Render it as the results
491
+ table. The script provides all numbers -- do not count manually.
492
+
493
+ ```
494
+ Walkthrough Results
495
+ ====================================
496
+ Section Pass Fail Skip Expected
497
+ -----------------------------------------------------
498
+ 0. Setup 7 0 0 7
499
+ ...
500
+ -----------------------------------------------------
501
+ TOTAL N 0 0 N
502
+
503
+ Failures: (none)
504
+ Gaps: (none)
505
+ ====================================
506
+ ```
507
+
508
+ #### Phase 4b: Save Run Artifacts (`--report` only)
509
+
510
+ When `--report` is set, do not stop after printing the summary. Continue directly into artifact save.
511
+
512
+ ```bash
513
+ RUN_DIR="$WT_STATE_DIR/runs/$(date +%Y-%m-%d-%H%M%S)"
514
+ mkdir -p "$RUN_DIR"
515
+ ```
516
+
517
+ 1. Generate the final report with `walkthrough-state.py report` and write the rendered markdown to `$RUN_DIR/report.md`.
518
+
519
+ 2. Copy the state file:
520
+
521
+ ```bash
522
+ cp "$STATE_FILE" "$RUN_DIR/state.json"
523
+ ```
524
+
525
+ 3. Copy raw step logs when present:
526
+
527
+ ```bash
528
+ if [ -d "$WT_STEP_LOGS_DIR" ]; then
529
+ cp -R "$WT_STEP_LOGS_DIR" "$RUN_DIR/step-logs"
530
+ fi
531
+ ```
532
+
533
+ 4. Copy any pre-clean Forge log snapshots when present:
534
+
535
+ ```bash
536
+ if [ -d "$WT_FORGE_LOG_SNAPSHOTS" ]; then
537
+ cp -R "$WT_FORGE_LOG_SNAPSHOTS" "$RUN_DIR/forge-logs-snapshots"
538
+ fi
539
+ ```
540
+
541
+ 5. Copy the current sandbox Forge debug logs when present:
542
+
543
+ ```bash
544
+ if [ -d "$FORGE_TEST_REPO/.forge-home/logs" ]; then
545
+ mkdir -p "$RUN_DIR/forge-logs/final"
546
+ cp -R "$FORGE_TEST_REPO/.forge-home/logs/." "$RUN_DIR/forge-logs/final"
547
+ fi
548
+ ```
549
+
550
+ 6. Generate a transcript claim token and write the marker so only this walkthrough session can copy the transcript here
551
+ when it ends:
552
+
553
+ ```bash
554
+ WT_TRANSCRIPT_TOKEN="forge-walkthrough-transcript-token:$(python3 - <<'PY'
555
+ import uuid
556
+ print(uuid.uuid4())
557
+ PY
558
+ )"
559
+ python3 - <<'PY' "$RUN_DIR" "$WT_STATE_DIR/.pending-transcript" "$WT_TRANSCRIPT_TOKEN"
560
+ import json
561
+ import sys
562
+
563
+ run_dir, marker_path, token = sys.argv[1:4]
564
+ with open(marker_path, "w", encoding="utf-8") as handle:
565
+ json.dump({"run_dir": run_dir, "transcript_contains": token}, handle)
566
+ handle.write("\n")
567
+ PY
568
+ ```
569
+
570
+ Tell the user: "Walkthrough artifacts saved to `$RUN_DIR`. Forge step logs and debug logs were copied when present.
571
+ Transcript claim token: `$WT_TRANSCRIPT_TOKEN`. Transcript will be added when this walkthrough session ends."
572
+
573
+ Tip: "For a quick non-interactive check, use `/forge:smoke-test`. For the full QA checklist in Docker, use `/forge:qa`
574
+ (requires `forge extension enable --profile full`)."
575
+
576
+ ## Safety Model
577
+
578
+ | Tier | Scripts involved | What can go wrong | Mitigation |
579
+ | ----------- | ------------------------------- | --------------------------- | ----------------------------------------- |
580
+ | Walkthrough | `run-in-repo.sh` (agent-driven) | Install targets real system | 4 safety gates + agent mtime verification |
581
+
582
+ ### Safety Gates (run-in-repo.sh)
583
+
584
+ Every command in the walkthrough passes through these gates:
585
+
586
+ 1. **Denylist** -- refuses FORGE_TEST_REPO = empty, `/`, `$HOME`, `/Users`, `/tmp`, `/var`, etc.
587
+ 2. **Gate 1** -- env.sh exists (test repo not deleted)
588
+ 3. **Gate 2** -- marker file exists (this is actually a test repo)
589
+ 4. **Gate 3** -- FORGE_HOME isolation: FORGE_HOME points to `$FORGE_TEST_REPO/.forge-home` (not real `~/.forge/`)
590
+ 5. **Gate 4** -- structure check: `.forge/walkthrough/` and `CLAUDE.md` exist
591
+
592
+ Any gate failure = loud error message + exit 1. No silent fallthrough.
593
+
594
+ ## Tips
595
+
596
+ - **Quick check**: For a quick non-interactive health check, use `/forge:smoke-test`.
597
+ - **Full QA**: For the full QA checklist in Docker, use `/forge:qa` (requires `--profile full`).
598
+ - **Robustness principle**: The user should never see an error you could have avoided. If something is known to fail,
599
+ use the working alternative directly.