thehood 0.1.0-preview.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/CODE_OF_CONDUCT.md +21 -0
  2. package/CONTRIBUTING.md +58 -0
  3. package/LICENSE +21 -0
  4. package/PRIVACY.md +49 -0
  5. package/README.md +264 -0
  6. package/SECURITY.md +31 -0
  7. package/dist/bridges/chatgptWebBridge.d.ts +2 -0
  8. package/dist/bridges/chatgptWebBridge.js +981 -0
  9. package/dist/bridges/chatgptWebBridge.js.map +1 -0
  10. package/dist/cli/args.d.ts +9 -0
  11. package/dist/cli/args.js +82 -0
  12. package/dist/cli/args.js.map +1 -0
  13. package/dist/cli/format.d.ts +56 -0
  14. package/dist/cli/format.js +752 -0
  15. package/dist/cli/format.js.map +1 -0
  16. package/dist/cli/main.d.ts +2 -0
  17. package/dist/cli/main.js +996 -0
  18. package/dist/cli/main.js.map +1 -0
  19. package/dist/cli/mcpConfig.d.ts +36 -0
  20. package/dist/cli/mcpConfig.js +98 -0
  21. package/dist/cli/mcpConfig.js.map +1 -0
  22. package/dist/index.d.ts +37 -0
  23. package/dist/index.js +38 -0
  24. package/dist/index.js.map +1 -0
  25. package/dist/mcp/protocol.d.ts +44 -0
  26. package/dist/mcp/protocol.js +33 -0
  27. package/dist/mcp/protocol.js.map +1 -0
  28. package/dist/mcp/server.d.ts +1 -0
  29. package/dist/mcp/server.js +106 -0
  30. package/dist/mcp/server.js.map +1 -0
  31. package/dist/mcp/tools.d.ts +10 -0
  32. package/dist/mcp/tools.js +2200 -0
  33. package/dist/mcp/tools.js.map +1 -0
  34. package/dist/mcp/validation.d.ts +8 -0
  35. package/dist/mcp/validation.js +67 -0
  36. package/dist/mcp/validation.js.map +1 -0
  37. package/dist/providers/chatgptWeb.d.ts +2 -0
  38. package/dist/providers/chatgptWeb.js +26 -0
  39. package/dist/providers/chatgptWeb.js.map +1 -0
  40. package/dist/providers/claudeCode.d.ts +4 -0
  41. package/dist/providers/claudeCode.js +32 -0
  42. package/dist/providers/claudeCode.js.map +1 -0
  43. package/dist/providers/codexCli.d.ts +6 -0
  44. package/dist/providers/codexCli.js +25 -0
  45. package/dist/providers/codexCli.js.map +1 -0
  46. package/dist/providers/codexCliModels.d.ts +23 -0
  47. package/dist/providers/codexCliModels.js +147 -0
  48. package/dist/providers/codexCliModels.js.map +1 -0
  49. package/dist/providers/localCommand.d.ts +26 -0
  50. package/dist/providers/localCommand.js +614 -0
  51. package/dist/providers/localCommand.js.map +1 -0
  52. package/dist/providers/markdownPayload.d.ts +7 -0
  53. package/dist/providers/markdownPayload.js +29 -0
  54. package/dist/providers/markdownPayload.js.map +1 -0
  55. package/dist/providers/responseSchema.d.ts +3 -0
  56. package/dist/providers/responseSchema.js +187 -0
  57. package/dist/providers/responseSchema.js.map +1 -0
  58. package/dist/providers/router.d.ts +3 -0
  59. package/dist/providers/router.js +21 -0
  60. package/dist/providers/router.js.map +1 -0
  61. package/dist/providers/stub.d.ts +2 -0
  62. package/dist/providers/stub.js +177 -0
  63. package/dist/providers/stub.js.map +1 -0
  64. package/dist/providers/types.d.ts +37 -0
  65. package/dist/providers/types.js +2 -0
  66. package/dist/providers/types.js.map +1 -0
  67. package/dist/runtime/agentBoard.d.ts +79 -0
  68. package/dist/runtime/agentBoard.js +166 -0
  69. package/dist/runtime/agentBoard.js.map +1 -0
  70. package/dist/runtime/agentBoardArtifact.d.ts +9 -0
  71. package/dist/runtime/agentBoardArtifact.js +171 -0
  72. package/dist/runtime/agentBoardArtifact.js.map +1 -0
  73. package/dist/runtime/agentRunner.d.ts +17 -0
  74. package/dist/runtime/agentRunner.js +92 -0
  75. package/dist/runtime/agentRunner.js.map +1 -0
  76. package/dist/runtime/approvalInbox.d.ts +54 -0
  77. package/dist/runtime/approvalInbox.js +143 -0
  78. package/dist/runtime/approvalInbox.js.map +1 -0
  79. package/dist/runtime/approvalPolicy.d.ts +11 -0
  80. package/dist/runtime/approvalPolicy.js +58 -0
  81. package/dist/runtime/approvalPolicy.js.map +1 -0
  82. package/dist/runtime/artifacts.d.ts +23 -0
  83. package/dist/runtime/artifacts.js +48 -0
  84. package/dist/runtime/artifacts.js.map +1 -0
  85. package/dist/runtime/browserManager.d.ts +37 -0
  86. package/dist/runtime/browserManager.js +356 -0
  87. package/dist/runtime/browserManager.js.map +1 -0
  88. package/dist/runtime/canonicalMemory.d.ts +23 -0
  89. package/dist/runtime/canonicalMemory.js +134 -0
  90. package/dist/runtime/canonicalMemory.js.map +1 -0
  91. package/dist/runtime/chatGptPageReadiness.d.ts +16 -0
  92. package/dist/runtime/chatGptPageReadiness.js +74 -0
  93. package/dist/runtime/chatGptPageReadiness.js.map +1 -0
  94. package/dist/runtime/commandRunner.d.ts +18 -0
  95. package/dist/runtime/commandRunner.js +115 -0
  96. package/dist/runtime/commandRunner.js.map +1 -0
  97. package/dist/runtime/commandSafety.d.ts +7 -0
  98. package/dist/runtime/commandSafety.js +61 -0
  99. package/dist/runtime/commandSafety.js.map +1 -0
  100. package/dist/runtime/config.d.ts +10 -0
  101. package/dist/runtime/config.js +107 -0
  102. package/dist/runtime/config.js.map +1 -0
  103. package/dist/runtime/crewLanes.d.ts +2 -0
  104. package/dist/runtime/crewLanes.js +123 -0
  105. package/dist/runtime/crewLanes.js.map +1 -0
  106. package/dist/runtime/criticPolicy.d.ts +17 -0
  107. package/dist/runtime/criticPolicy.js +50 -0
  108. package/dist/runtime/criticPolicy.js.map +1 -0
  109. package/dist/runtime/defaults.d.ts +5 -0
  110. package/dist/runtime/defaults.js +100 -0
  111. package/dist/runtime/defaults.js.map +1 -0
  112. package/dist/runtime/directives.d.ts +3 -0
  113. package/dist/runtime/directives.js +218 -0
  114. package/dist/runtime/directives.js.map +1 -0
  115. package/dist/runtime/doctor.d.ts +36 -0
  116. package/dist/runtime/doctor.js +185 -0
  117. package/dist/runtime/doctor.js.map +1 -0
  118. package/dist/runtime/errors.d.ts +20 -0
  119. package/dist/runtime/errors.js +41 -0
  120. package/dist/runtime/errors.js.map +1 -0
  121. package/dist/runtime/externalTransfer.d.ts +20 -0
  122. package/dist/runtime/externalTransfer.js +156 -0
  123. package/dist/runtime/externalTransfer.js.map +1 -0
  124. package/dist/runtime/fanout.d.ts +64 -0
  125. package/dist/runtime/fanout.js +263 -0
  126. package/dist/runtime/fanout.js.map +1 -0
  127. package/dist/runtime/gitEvidence.d.ts +10 -0
  128. package/dist/runtime/gitEvidence.js +80 -0
  129. package/dist/runtime/gitEvidence.js.map +1 -0
  130. package/dist/runtime/handoffs.d.ts +32 -0
  131. package/dist/runtime/handoffs.js +100 -0
  132. package/dist/runtime/handoffs.js.map +1 -0
  133. package/dist/runtime/ids.d.ts +2 -0
  134. package/dist/runtime/ids.js +4 -0
  135. package/dist/runtime/ids.js.map +1 -0
  136. package/dist/runtime/localStateIgnore.d.ts +9 -0
  137. package/dist/runtime/localStateIgnore.js +98 -0
  138. package/dist/runtime/localStateIgnore.js.map +1 -0
  139. package/dist/runtime/loop.d.ts +14 -0
  140. package/dist/runtime/loop.js +1863 -0
  141. package/dist/runtime/loop.js.map +1 -0
  142. package/dist/runtime/loopRecommendation.d.ts +109 -0
  143. package/dist/runtime/loopRecommendation.js +566 -0
  144. package/dist/runtime/loopRecommendation.js.map +1 -0
  145. package/dist/runtime/loopResponsibilities.d.ts +2 -0
  146. package/dist/runtime/loopResponsibilities.js +395 -0
  147. package/dist/runtime/loopResponsibilities.js.map +1 -0
  148. package/dist/runtime/loopRunner.d.ts +28 -0
  149. package/dist/runtime/loopRunner.js +81 -0
  150. package/dist/runtime/loopRunner.js.map +1 -0
  151. package/dist/runtime/operatorNextActions.d.ts +2 -0
  152. package/dist/runtime/operatorNextActions.js +344 -0
  153. package/dist/runtime/operatorNextActions.js.map +1 -0
  154. package/dist/runtime/paths.d.ts +9 -0
  155. package/dist/runtime/paths.js +14 -0
  156. package/dist/runtime/paths.js.map +1 -0
  157. package/dist/runtime/permissions.d.ts +9 -0
  158. package/dist/runtime/permissions.js +73 -0
  159. package/dist/runtime/permissions.js.map +1 -0
  160. package/dist/runtime/progressPacket.d.ts +12 -0
  161. package/dist/runtime/progressPacket.js +512 -0
  162. package/dist/runtime/progressPacket.js.map +1 -0
  163. package/dist/runtime/protectedPaths.d.ts +6 -0
  164. package/dist/runtime/protectedPaths.js +48 -0
  165. package/dist/runtime/protectedPaths.js.map +1 -0
  166. package/dist/runtime/providers.d.ts +13 -0
  167. package/dist/runtime/providers.js +60 -0
  168. package/dist/runtime/providers.js.map +1 -0
  169. package/dist/runtime/reconciliation.d.ts +17 -0
  170. package/dist/runtime/reconciliation.js +283 -0
  171. package/dist/runtime/reconciliation.js.map +1 -0
  172. package/dist/runtime/redaction.d.ts +1 -0
  173. package/dist/runtime/redaction.js +5 -0
  174. package/dist/runtime/redaction.js.map +1 -0
  175. package/dist/runtime/remoteRepoContext.d.ts +77 -0
  176. package/dist/runtime/remoteRepoContext.js +316 -0
  177. package/dist/runtime/remoteRepoContext.js.map +1 -0
  178. package/dist/runtime/repoContext.d.ts +50 -0
  179. package/dist/runtime/repoContext.js +399 -0
  180. package/dist/runtime/repoContext.js.map +1 -0
  181. package/dist/runtime/repoGateway.d.ts +64 -0
  182. package/dist/runtime/repoGateway.js +308 -0
  183. package/dist/runtime/repoGateway.js.map +1 -0
  184. package/dist/runtime/responseContracts.d.ts +3 -0
  185. package/dist/runtime/responseContracts.js +86 -0
  186. package/dist/runtime/responseContracts.js.map +1 -0
  187. package/dist/runtime/reviewLanes.d.ts +2 -0
  188. package/dist/runtime/reviewLanes.js +343 -0
  189. package/dist/runtime/reviewLanes.js.map +1 -0
  190. package/dist/runtime/reviewRouting.d.ts +51 -0
  191. package/dist/runtime/reviewRouting.js +152 -0
  192. package/dist/runtime/reviewRouting.js.map +1 -0
  193. package/dist/runtime/revisionPacket.d.ts +38 -0
  194. package/dist/runtime/revisionPacket.js +144 -0
  195. package/dist/runtime/revisionPacket.js.map +1 -0
  196. package/dist/runtime/revisionTrail.d.ts +2 -0
  197. package/dist/runtime/revisionTrail.js +162 -0
  198. package/dist/runtime/revisionTrail.js.map +1 -0
  199. package/dist/runtime/role-assignment.d.ts +4 -0
  200. package/dist/runtime/role-assignment.js +21 -0
  201. package/dist/runtime/role-assignment.js.map +1 -0
  202. package/dist/runtime/roleRoster.d.ts +28 -0
  203. package/dist/runtime/roleRoster.js +96 -0
  204. package/dist/runtime/roleRoster.js.map +1 -0
  205. package/dist/runtime/runInsights.d.ts +121 -0
  206. package/dist/runtime/runInsights.js +305 -0
  207. package/dist/runtime/runInsights.js.map +1 -0
  208. package/dist/runtime/runMonitor.d.ts +33 -0
  209. package/dist/runtime/runMonitor.js +143 -0
  210. package/dist/runtime/runMonitor.js.map +1 -0
  211. package/dist/runtime/runtime.d.ts +15 -0
  212. package/dist/runtime/runtime.js +199 -0
  213. package/dist/runtime/runtime.js.map +1 -0
  214. package/dist/runtime/runtimeInfo.d.ts +9 -0
  215. package/dist/runtime/runtimeInfo.js +76 -0
  216. package/dist/runtime/runtimeInfo.js.map +1 -0
  217. package/dist/runtime/store.d.ts +4 -0
  218. package/dist/runtime/store.js +48 -0
  219. package/dist/runtime/store.js.map +1 -0
  220. package/dist/runtime/summons.d.ts +25 -0
  221. package/dist/runtime/summons.js +403 -0
  222. package/dist/runtime/summons.js.map +1 -0
  223. package/dist/runtime/teamPresets.d.ts +14 -0
  224. package/dist/runtime/teamPresets.js +153 -0
  225. package/dist/runtime/teamPresets.js.map +1 -0
  226. package/dist/runtime/types.d.ts +505 -0
  227. package/dist/runtime/types.js +28 -0
  228. package/dist/runtime/types.js.map +1 -0
  229. package/dist/runtime/validationCommands.d.ts +18 -0
  230. package/dist/runtime/validationCommands.js +106 -0
  231. package/dist/runtime/validationCommands.js.map +1 -0
  232. package/dist/tui/dashboard.d.ts +41 -0
  233. package/dist/tui/dashboard.js +1115 -0
  234. package/dist/tui/dashboard.js.map +1 -0
  235. package/docs/ARCHITECTURE.md +277 -0
  236. package/docs/CLI_SPEC.md +396 -0
  237. package/docs/CODEX_SETUP.md +288 -0
  238. package/docs/COMPLETION_CONTRACT.md +52 -0
  239. package/docs/CONTRIBUTOR_GUIDE.md +70 -0
  240. package/docs/DEMO.md +62 -0
  241. package/docs/GLOSSARY.md +46 -0
  242. package/docs/GOAL_LOOP_SCHEDULE.md +50 -0
  243. package/docs/KNOWN_LIMITATIONS.md +29 -0
  244. package/docs/LICENSING.md +21 -0
  245. package/docs/LOOP_RECIPES.md +290 -0
  246. package/docs/LOOP_SELECTION_UX.md +118 -0
  247. package/docs/MCP_SPEC.md +689 -0
  248. package/docs/MEMORY_AND_RECONCILIATION.md +222 -0
  249. package/docs/NPM_PUBLISHING.md +51 -0
  250. package/docs/OPEN_DECISIONS.md +81 -0
  251. package/docs/PROMPT_SCHEMAS.md +411 -0
  252. package/docs/PROVIDER_ADAPTERS.md +323 -0
  253. package/docs/PROVIDER_MATRIX.md +21 -0
  254. package/docs/PUBLIC_REPO_READINESS.md +49 -0
  255. package/docs/RESEARCH_NOTES.md +92 -0
  256. package/docs/ROADMAP.md +94 -0
  257. package/docs/ROLE_CONTRACTS.md +252 -0
  258. package/docs/RUNTIME_LOOP.md +240 -0
  259. package/docs/SECURITY_AND_PRIVACY.md +161 -0
  260. package/docs/TESTING_AND_VERIFICATION.md +180 -0
  261. package/docs/TRUST_MODEL.md +65 -0
  262. package/docs/decisions/0001-runtime-first-cli-and-mcp.md +23 -0
  263. package/docs/decisions/0002-provider-neutral-role-mapping.md +43 -0
  264. package/docs/decisions/0003-separate-implementation-and-verification.md +27 -0
  265. package/docs/product/README.md +14 -0
  266. package/docs/product/model-selection.md +88 -0
  267. package/docs/product/positioning.md +37 -0
  268. package/docs/product/pro-usage-modes.md +70 -0
  269. package/docs/product/roadmap.md +57 -0
  270. package/docs/product/role-policy.md +89 -0
  271. package/docs/product/runtime-invariants.md +44 -0
  272. package/docs/release/v0.1.0-preview.0.md +48 -0
  273. package/examples/stub-demo/README.md +25 -0
  274. package/package.json +55 -0
@@ -0,0 +1,180 @@
1
+ # Testing And Verification
2
+
3
+ Verification is an independent runtime phase, not an implementer self-report.
4
+
5
+ ## Core Rule
6
+
7
+ The implementer and verifier cannot be the same agent for the same task.
8
+
9
+ Implementers may run tests for feedback, but their results are not authoritative until the runtime captures logs and a separate verifier reviews them.
10
+
11
+ ## Responsibilities
12
+
13
+ ### Implementer
14
+
15
+ May:
16
+
17
+ - run local checks for feedback
18
+ - report commands it ran
19
+ - report unresolved risks
20
+
21
+ Must not:
22
+
23
+ - mark its own work accepted
24
+ - edit tests to make its implementation pass without explicit approval
25
+ - hide failing commands
26
+
27
+ ### Runtime
28
+
29
+ Must:
30
+
31
+ - run configured validation commands
32
+ - capture raw logs
33
+ - capture exit codes
34
+ - capture diffs before and after
35
+ - enforce protected path policies
36
+
37
+ Current implementation:
38
+
39
+ - `thehood exec <run-id> -- <command> [args...]` captures command logs as artifacts.
40
+ - `thehood evidence <run-id>` captures git status, git diff, and protected path matches.
41
+ - The verification phase discovers package validation scripts in `typecheck`, `test`, `lint`, `build` order, runs the first available script through the runtime command runner, and attaches a validation summary artifact before verifier review.
42
+ - After validation evidence is captured, the runtime attaches a `review_routing` artifact that classifies implementation risk and records which subjective review lanes are required or skipped.
43
+ - Local command providers such as Codex CLI and Claude Code attach redacted stdout/stderr `log` artifacts plus compact `provider_invocation` artifacts with command, role, provider/model, workspace mode, sandbox or permission mode, exit code, timeout state, output lengths and refs, parse status, and isolated patch refs when present.
44
+ - Isolated implementer patches stop at an approval gate, then deterministic runtime integration applies the approved patch and writes an integration report before verifier review.
45
+ - Integrated patches that touch protected test, fixture, snapshot, or eval paths stop at a separate approval gate before verifier review.
46
+ - Completed runs attach a runtime-owned final report artifact with command, artifact, and approval refs.
47
+ - QA, verifier, or validation risk can cause the runtime to call a read-only critic and attach a `critic_trigger` artifact before continuing to verifier or an approval gate.
48
+ - Fixable QA, critic, or verifier revision findings can cause the runtime to attach a `revision_packet` artifact and delegate a repair pass back to the implementer within the same max-iteration budget.
49
+ - Revision trails link each revision packet to its repair pass, post-repair validation evidence, and review responses when present; they are visibility metadata only.
50
+ - Completed runs, progress packets, status output, agent board snapshots, and optional dashboard artifact payloads include derived review lane metadata for verifier, runtime QA/validation, QA tester, and critic evidence when present. They also include loop responsibility schedules and product-facing crew lane trails that summarize planner, implementer, verifier, runtime QA, model-assisted QA tester, critic, reconciliation, integration, approval, and completion ownership from existing evidence. These lanes, schedules, board cards, and dashboard cards summarize existing runtime evidence; they do not schedule new work, grant tools, or replace verifier approval.
51
+ - MCP host responses are compact by default and should be tested as refs-only navigation surfaces. Full plans, logs, progress packets, and provider outputs must remain available through artifact reads instead of being mirrored into every MCP status or loop response.
52
+ - Plugin skills, dashboard cards, and Codex-native subagents are delegation or display helpers. Their output is advisory unless it is captured and routed through TheHood runtime evidence, approvals, and verifier policy.
53
+ - Runs fail closed before the next provider call once recorded provider responses reach `maxIterations`.
54
+
55
+ ### QA Tester
56
+
57
+ May:
58
+
59
+ - inspect runtime-captured evidence
60
+ - inspect diffs, plans, and provider artifacts
61
+ - identify missed cases and product risks
62
+ - recommend deterministic validation commands
63
+
64
+ Must not:
65
+
66
+ - edit files
67
+ - change tests
68
+ - claim a command passed unless the runtime captured it
69
+ - satisfy runtime QA/validation gates
70
+ - accept work on behalf of the verifier
71
+
72
+ ### Verifier
73
+
74
+ Must:
75
+
76
+ - inspect runtime-captured evidence
77
+ - classify failures
78
+ - compare output against acceptance criteria
79
+ - recommend approve, revise, abort, or ask_user
80
+
81
+ Must not:
82
+
83
+ - edit files
84
+ - update tests
85
+ - apply patches
86
+
87
+ ## Protected Test Paths
88
+
89
+ Default protected patterns:
90
+
91
+ ```yaml
92
+ protected_test_paths:
93
+ - "**/test/**"
94
+ - "**/tests/**"
95
+ - "**/*.spec.*"
96
+ - "**/*.test.*"
97
+ - "**/__snapshots__/**"
98
+ - "**/fixtures/**"
99
+ - "**/evals/**"
100
+ ```
101
+
102
+ Changes under protected paths are classified as `TEST_CHANGE`.
103
+
104
+ A `TEST_CHANGE` requires:
105
+
106
+ - explicit reason
107
+ - separate approval
108
+ - verifier review
109
+ - final user-visible mention
110
+
111
+ ## Verification Commands
112
+
113
+ The runtime should discover project commands from existing files before inventing commands.
114
+
115
+ Examples:
116
+
117
+ - `package.json`
118
+ - `pyproject.toml`
119
+ - `Cargo.toml`
120
+ - `Package.swift`
121
+ - `Makefile`
122
+ - CI configuration
123
+
124
+ The runtime should prefer existing project validation commands.
125
+
126
+ ## Review Routing
127
+
128
+ Deterministic validation is always required for implementation verification.
129
+
130
+ The current routing policy is conservative:
131
+
132
+ - Verifier review remains required for implementation runs when a verifier is assigned.
133
+ - Model-assisted QA is risk-gated. It runs for standard or high-risk implementation evidence and can be skipped for narrow docs/copy-only changes.
134
+ - Critic remains escalation-only and is triggered from QA, verifier, or validation evidence by runtime policy.
135
+ - Missing verifier assignment stops at an approval gate instead of silently completing.
136
+
137
+ The `review_routing` artifact records the risk tier, action, required lanes, skipped roles, compact signals, and reasons. It is display and orchestration evidence; it does not replace validation logs, QA output, verifier verdicts, or critic trigger artifacts.
138
+
139
+ ## Verdicts
140
+
141
+ | Verdict | Meaning |
142
+ | --- | --- |
143
+ | `approve` | Criteria satisfied, no blocking risk |
144
+ | `revise` | Fixable issue exists |
145
+ | `abort` | Task is unsafe, impossible, or outside scope |
146
+ | `ask_user` | Human decision required |
147
+
148
+ ## Evidence Format
149
+
150
+ Verifier output should reference evidence:
151
+
152
+ ```yaml
153
+ evidence:
154
+ - kind: test_log
155
+ ref: logs/run-123/npm-test.txt
156
+ finding: "npm test passed with exit code 0"
157
+ - kind: diff
158
+ ref: diffs/run-123/iteration-2.patch
159
+ finding: "Only src/export.ts changed"
160
+ ```
161
+
162
+ ## Anti-Patterns
163
+
164
+ - Same model edits and verifies.
165
+ - Verifier receives only the implementer's summary.
166
+ - Test files are silently modified.
167
+ - Final report says "tests passed" without command names and exit codes.
168
+ - Runtime applies an isolated patch before explicit approval.
169
+ - A same-run summon labeled `qa`, `review`, or `critique` is treated as satisfying a required verifier or QA gate.
170
+ - A same-run fan-out is treated as a scheduler, acceptance vote, or substitute for runtime validation.
171
+ - A model QA response is treated as proof that validation commands passed.
172
+ - A critic response is treated as proof that implementation is accepted or deterministic validation passed.
173
+ - A provider invocation artifact is treated as proof that code behavior is correct; it only proves the local agent adapter command ran and returned parseable or fallback output.
174
+ - A stale pre-revision verifier or validation result is treated as satisfying the post-repair review gate.
175
+ - A risk-routing decision is treated as proof that code behavior is correct without validation and verifier evidence.
176
+ - A crew lane trail is treated as proof that a gate was satisfied without reading the lane's artifact, event, or command evidence.
177
+ - An agent board card is treated as proof that a gate was satisfied without reading the card's artifact, event, handoff, or command evidence.
178
+ - An agent board dashboard card is treated as proof that a gate was satisfied without reading the underlying artifact, event, handoff, or command evidence.
179
+ - A Codex-native subagent appearing in the Subagents panel is treated as satisfying a TheHood runtime lane without runtime evidence and verifier review.
180
+ - A revision trail is treated as proof that the repair is correct without reading the post-repair validation and review refs.
@@ -0,0 +1,65 @@
1
+ # Trust Model
2
+
3
+ TheHood's trust model is deliberately simple:
4
+
5
+ - Models suggest.
6
+ - The runtime enforces.
7
+ - Users stay in control.
8
+
9
+ The runtime is the authority for state, permissions, approvals, artifacts, and verification. Provider sessions are disposable.
10
+
11
+ ## Runtime Authority
12
+
13
+ The runtime owns:
14
+
15
+ - run state
16
+ - role assignments
17
+ - approval gates
18
+ - provider directives
19
+ - provider response validation
20
+ - command execution metadata
21
+ - git evidence
22
+ - isolated patch capture and integration reports
23
+ - protected path classification
24
+ - final reports and progress packets
25
+
26
+ Models can summarize or recommend actions, but summaries do not replace runtime evidence.
27
+
28
+ ## Role Separation
29
+
30
+ The implementer and verifier must not be the same authority for the same task. Verifier, QA tester, critic, and researcher roles are read-only. Same-run summons and fan-outs are advisory sidecar evidence; they do not satisfy required verifier or runtime validation lanes.
31
+
32
+ ## Evidence
33
+
34
+ Runtime-captured evidence wins over model claims. A completed run should point to exact artifacts such as:
35
+
36
+ - command logs with cwd, args, duration, and exit code
37
+ - git status and diff snapshots
38
+ - provider invocation artifacts
39
+ - validation command artifacts
40
+ - review routing artifacts
41
+ - verifier or critic responses
42
+ - final reports and progress packets
43
+
44
+ ## External Transfers
45
+
46
+ Before local repo context, progress packets, or memory bodies cross a browser/API provider boundary, TheHood writes a transfer manifest. The manifest records destination, purpose, source refs, byte counts, risk class, bounded preview, and approval copy.
47
+
48
+ Refs-only GitHub connector context names remote coordinates instead of local file excerpts and is selected only when the provider connector route is confirmed. It does not replace transfer manifests for local context bodies.
49
+
50
+ ## Fail-Closed Behavior
51
+
52
+ The runtime should stop instead of guessing when it sees:
53
+
54
+ - missing approval
55
+ - protected test/fixture/snapshot/eval changes
56
+ - secret-risk transfers
57
+ - unverified provider output
58
+ - schema-invalid model responses
59
+ - verifier `ask_user` or `abort`
60
+ - max iteration exhaustion
61
+ - dirty-checkout integration blockers
62
+
63
+ ## Public Repo Boundary
64
+
65
+ The public repository must not include credentials, browser profiles, provider transcripts, private run logs, local `.thehood` state, environment files, generated package archives, or real private repo data. Examples and demos must be synthetic.
@@ -0,0 +1,23 @@
1
+ # 0001: Runtime First With CLI And MCP
2
+
3
+ ## Status
4
+
5
+ Accepted
6
+
7
+ ## Context
8
+
9
+ TheHood needs to run agent loops from Codex and eventually a macOS menubar app. The orchestration logic must not live in a frontend or a single chat session.
10
+
11
+ ## Decision
12
+
13
+ Build a local runtime first. Expose it through a CLI and MCP server before building a macOS UI.
14
+
15
+ The CLI is the complete control plane. The MCP server lets Codex trigger runtime actions. The menubar app can later provide status, approvals, and quick triggers over the same runtime.
16
+
17
+ ## Consequences
18
+
19
+ - The system can run headless.
20
+ - Codex integration does not own orchestration safety.
21
+ - The menubar app remains thin.
22
+ - Runtime behavior is easier to test and audit.
23
+
@@ -0,0 +1,43 @@
1
+ # 0002: Provider Neutral Role Mapping
2
+
3
+ ## Status
4
+
5
+ Accepted
6
+
7
+ ## Context
8
+
9
+ Users should be able to choose which model performs which role. Codex is the default role owner for new repos, but ChatGPT Pro may orchestrate one run, while Claude Opus, Fable, or another provider may orchestrate the next when the user configures it.
10
+
11
+ ## Decision
12
+
13
+ TheHood will use provider-neutral role mapping.
14
+
15
+ Each role is assigned by provider and model:
16
+
17
+ ```yaml
18
+ roles:
19
+ orchestrator:
20
+ provider: codex-cli
21
+ model: default
22
+ implementer:
23
+ provider: codex-cli
24
+ model: default
25
+ qa:
26
+ provider: codex-cli
27
+ model: spark
28
+ verifier:
29
+ provider: codex-cli
30
+ model: spark
31
+ critic:
32
+ provider: codex-cli
33
+ model: spark
34
+ ```
35
+
36
+ Provider adapters normalize requests and responses into TheHood schemas.
37
+
38
+ ## Consequences
39
+
40
+ - GPT, Claude, Codex, Claude Code, and local models can collaborate.
41
+ - Provider-specific behavior is isolated.
42
+ - Role contracts stay stable even as providers change.
43
+ - Users can swap orchestrators or workers without changing runtime logic.
@@ -0,0 +1,27 @@
1
+ # 0003: Separate Implementation And Verification
2
+
3
+ ## Status
4
+
5
+ Accepted
6
+
7
+ ## Context
8
+
9
+ An agent that implements a change has an incentive to justify its own work. It may overlook failures or modify tests to make the result pass.
10
+
11
+ ## Decision
12
+
13
+ The implementer and verifier must not be the same agent for the same task.
14
+
15
+ Implementers may run tests for feedback, but the authoritative verification phase belongs to the runtime and a separate verifier.
16
+
17
+ The verifier has no edit tools.
18
+
19
+ Changes to tests, fixtures, snapshots, or evaluation files are protected and require explicit classification and approval.
20
+
21
+ ## Consequences
22
+
23
+ - The system does not let an implementer grade its own work.
24
+ - Test changes become visible and reviewable.
25
+ - Runtime-captured logs become the evidence base.
26
+ - Verification can be assigned to a different model family for stronger review.
27
+
@@ -0,0 +1,14 @@
1
+ # Product Strategy
2
+
3
+ TheHood is a deterministic local runtime that turns Codex into a governed multi-model workbench. The runtime owns loop control, permissions, evidence, approvals, artifacts, and role separation. Users can assign GPT, Claude, Codex, API, or local models to the roles that fit the work.
4
+
5
+ The product promise is not hidden Pro usage or one preferred model family. The promise is governed execution with deliberate model choice: Codex as the default workbench, Claude as an independent second judge or preferred alternate worker, and ChatGPT Pro as visible strategic judgment when the stakes justify it.
6
+
7
+ ## Product Docs
8
+
9
+ - [Positioning](positioning.md)
10
+ - [Agent Usage Modes](pro-usage-modes.md)
11
+ - [Model Selection](model-selection.md)
12
+ - [Role Policy](role-policy.md)
13
+ - [Runtime Invariants](runtime-invariants.md)
14
+ - [Product Roadmap](roadmap.md)
@@ -0,0 +1,88 @@
1
+ # Model Selection
2
+
3
+ TheHood lets users assign a provider and model to each runtime role. Codex stays the operating surface, but the loop can use GPT, Claude, Codex, API, or local models according to the user's role map.
4
+
5
+ Every assignment uses:
6
+
7
+ ```text
8
+ provider:model
9
+ ```
10
+
11
+ Examples:
12
+
13
+ ```bash
14
+ thehood roles set orchestrator chatgpt-web:chatgpt-pro --repo .
15
+ thehood roles set planner claude-code:opus --repo .
16
+ thehood roles set implementer claude-code:sonnet --repo .
17
+ thehood roles set qa codex-cli:spark --repo .
18
+ thehood roles set verifier claude-code:sonnet --repo .
19
+ thehood roles set critic claude-code:fable --repo .
20
+ ```
21
+
22
+ ## Selection Rules
23
+
24
+ - `codex-cli` discovers live model slugs from `codex debug models`.
25
+ - `claude-code` supports known aliases such as `sonnet`, `opus`, `haiku`, `mythos`, and `fable`, and passes explicit non-default names through to the local Claude CLI.
26
+ - `chatgpt-web` supports `chatgpt-pro` and `configured`; the user must select and confirm the intended ChatGPT model in the browser bridge path.
27
+ - `openai-api` and `anthropic-api` are future API providers. Their configs expose `configured` model slots, but the adapters are not implemented yet.
28
+ - `configured` means "use the provider's configured/default local selection" for local CLI providers.
29
+
30
+ The runtime should not hardcode a final model menu. Model catalogs change faster than TheHood releases. Built-in aliases are convenience labels; custom names remain valid when the user's configured provider supports them.
31
+
32
+ ## Common Role Maps
33
+
34
+ Codex default:
35
+
36
+ ```text
37
+ orchestrator: codex-cli:default
38
+ implementer: codex-cli:default
39
+ qa: codex-cli:spark
40
+ verifier: codex-cli:spark
41
+ critic: codex-cli:spark
42
+ ```
43
+
44
+ Claude second judge:
45
+
46
+ ```text
47
+ orchestrator: codex-cli:default
48
+ implementer: codex-cli:default
49
+ qa: codex-cli:spark
50
+ verifier: codex-cli:spark
51
+ critic: claude-code:sonnet
52
+ ```
53
+
54
+ Spark plus Sonnet:
55
+
56
+ ```text
57
+ orchestrator: codex-cli:default
58
+ implementer: codex-cli:spark
59
+ qa: codex-cli:spark
60
+ verifier: claude-code:sonnet
61
+ critic: claude-code:sonnet
62
+ ```
63
+
64
+ Claude builder:
65
+
66
+ ```text
67
+ orchestrator: codex-cli:default
68
+ implementer: claude-code:sonnet
69
+ qa: codex-cli:spark
70
+ verifier: codex-cli:spark
71
+ critic: codex-cli:spark
72
+ ```
73
+
74
+ Pro plus Claude high assurance:
75
+
76
+ ```text
77
+ orchestrator: chatgpt-web:chatgpt-pro
78
+ implementer: codex-cli:default
79
+ qa: codex-cli:spark
80
+ verifier: claude-code:sonnet
81
+ critic: claude-code:sonnet
82
+ ```
83
+
84
+ ## Runtime Boundaries
85
+
86
+ Provider choice does not grant authority. The runtime still owns approval gates, external transfer manifests, artifact storage, validation evidence, isolated patch integration, protected-path classification, and implementer/verifier separation.
87
+
88
+ Claude, Pro, GPT, Codex, and local agents can suggest, implement, review, or judge according to role. None of them can override runtime enforcement.
@@ -0,0 +1,37 @@
1
+ # Positioning
2
+
3
+ TheHood gives Codex a governed runtime for using the best available model at each layer of an agent loop.
4
+
5
+ The runtime controls execution. Codex is the default workbench. Claude can be brought in as an independent second judge or preferred alternate worker. ChatGPT Pro can be used as visible premium strategic judgment when the work is ambiguous, high-value, reputational, or needs reconciliation.
6
+
7
+ ## Core Claim
8
+
9
+ TheHood is not a hidden bridge that lets Codex silently spend another provider's reasoning. It is a local agent runtime that makes model choice visible, policy-driven, and auditable.
10
+
11
+ ## Product Shape
12
+
13
+ - Runtime conductor: deterministic loop control, role separation, approvals, evidence, artifacts, and verification gates.
14
+ - Codex-first workbench: Codex remains the default control and implementation surface.
15
+ - User-controlled model roles: users can assign GPT, Claude, Codex, API, or local models to orchestrator, planner, implementer, QA, verifier, critic, and reconciliation roles.
16
+ - Claude second judge: Claude is a simple way to challenge Codex or Pro output with an independent model family, or to act as the user's preferred builder/reviewer.
17
+ - Visible Pro escalation: Pro is used deliberately for strategic planning, product judgment, reconciliation, critique, and high-reputation review.
18
+ - Connector fallback: when direct Pro calls are blocked by Codex or tenant host policy, ChatGPT MCP connector mode is the safe handoff path.
19
+ - Trusted MCP host preview: ChatGPT Developer Mode can reach a local TheHood MCP server through Secure MCP Tunnel, while TheHood still owns repo access, approvals, logs, and verification gates.
20
+
21
+ ## Tradeoff
22
+
23
+ The original pitch, "Codex can use ChatGPT Pro," is simple and compelling. It is also too easy to misread as Pro being the hidden conductor and too narrow for users who want Claude, GPT, and Codex in the same workflow.
24
+
25
+ The stronger pitch is more precise:
26
+
27
+ - The runtime executes and enforces.
28
+ - Users choose model owners per role.
29
+ - Claude can challenge, verify, or implement by user preference.
30
+ - Pro advises and judges when premium strategic reasoning is worth it.
31
+ - Users can see which model was used, why it was recommended, and whether it was automatic.
32
+
33
+ This is slightly less viral than the original pitch, but it is more trustworthy and easier to defend for serious software work.
34
+
35
+ ## User-Facing Line
36
+
37
+ Use TheHood when you want Codex to become a governed multi-model workbench: Codex builds, Claude can second-judge or assist, Pro can approve strategy, and the runtime enforces the loop.
@@ -0,0 +1,70 @@
1
+ # Agent Usage Modes
2
+
3
+ Agent usage modes define the product's reasoning posture. They decide when to recommend Codex-only work, Claude second judgment, Pro strategic judgment, or a high-assurance combination. They do not weaken runtime gates.
4
+
5
+ `Balanced` is the default mode.
6
+
7
+ | Mode | Posture | Claude is recommended when | Pro is recommended when | Automatic use can happen when | Approval is needed when |
8
+ | --- | --- | --- | --- | --- | --- |
9
+ | Efficient | Minimize premium and cross-model usage. | User explicitly asks, a loop repeats, or Codex/Spark output needs a quick second judge. | Architecture, product, or planning ambiguity; repeated failure; risky judgment. | Deadlock, repeated failure, or explicit configured escalation. | External transfer is sensitive, budgets would be exceeded, or use is discretionary. |
10
+ | Balanced | Use the model that likely improves the outcome. | Codex implemented a risky change, agents disagree, or the user asks for Claude in Codex. | Planning, reconciliation, agent disagreement, high-risk refactors, release decisions. | Low-risk configured Claude review, ambiguous planning, or reconciliation after disagreement. | Context transfer is large or sensitive, or the action crosses runtime gates. |
11
+ | High Assurance | Optimize for correctness and public trust. | Public docs, release plans, security, privacy, architecture, migration, or high-risk implementation review. | Final strategic plan review, release-risk review, unresolved QA/verifier conflict. | Claude critic/verifier before final strategic approval, and Pro final review when configured. | Any edit, dependency, network, protected-path, or sensitive-transfer gate. |
12
+ | Pro-led | Pro leads strategy while runtime controls mechanics. | Red-team Pro plans, verify Codex implementation, or act as the user's preferred alternate worker. | Most planning, prioritization, critique, reconciliation, and final judgment. | Strategic phases, Claude red-team review, and reconciliation by default. | Any mutation, risky external transfer, provider readiness issue, or policy override. |
13
+
14
+ ## Recommendation Rules
15
+
16
+ The runtime should recommend Claude when one or more of these are true:
17
+
18
+ - The user wants to use both Claude and GPT inside the Codex workflow.
19
+ - Codex or Pro produced the plan and an independent model-family critique would reduce risk.
20
+ - Codex implemented a risky change and the verifier should be different.
21
+ - The user prefers Claude for implementation, writing, cautious reasoning, or review.
22
+ - The task calls for a second judge but not full premium strategic approval.
23
+
24
+ The runtime should recommend Pro when one or more of these are true:
25
+
26
+ - The task is ambiguous and the next step is not obvious.
27
+ - Agents disagree or produce incompatible plans.
28
+ - The decision affects architecture, security, privacy, pricing, public messaging, or product trust.
29
+ - A prior loop failed, stalled, or returned low-confidence output.
30
+ - The result is user-facing or reputationally important.
31
+ - The runtime needs reconciliation across planner, implementer, QA, verifier, and critic output.
32
+
33
+ ## Automatic Use
34
+
35
+ Automatic model calls are allowed only when the selected mode permits that class of escalation. Automatic does not mean hidden.
36
+
37
+ Every automatic model call must record:
38
+
39
+ - mode
40
+ - reason code
41
+ - role
42
+ - provider/model
43
+ - context source refs
44
+ - approval or auto-approval basis
45
+ - resulting artifact refs
46
+
47
+ User-facing copy should be short:
48
+
49
+ > Bringing in Claude because Codex produced the implementation and this review needs an independent model family. Runtime gates still control edits, approvals, evidence, and execution.
50
+
51
+ > Using Pro because this decision is ambiguous and affects product architecture. Runtime gates still control edits, approvals, evidence, and execution.
52
+
53
+ ## Approval Rules
54
+
55
+ Approval is required for risk, not merely because Claude or Pro exists.
56
+
57
+ Require approval when:
58
+
59
+ - context is sensitive, unusually large, or external-transfer-bound
60
+ - configured provider budgets, size limits, or frequency limits would be exceeded
61
+ - a provider bridge requires setup, authentication, or session handoff
62
+ - the task crosses edit, dependency, network, protected-path, or integration gates
63
+ - Efficient mode is active and cross-model use is discretionary
64
+
65
+ Do not require a separate approval when:
66
+
67
+ - the selected mode permits the provider call
68
+ - the call is planning or review only
69
+ - context stays inside configured low-risk limits
70
+ - the runtime records the provider invocation and artifacts
@@ -0,0 +1,57 @@
1
+ # Product Roadmap
2
+
3
+ This roadmap turns provider-neutral model choice into product behavior without weakening runtime authority.
4
+
5
+ ## Phase 1: Product Decision And Docs
6
+
7
+ - Position TheHood as a governed multi-model runtime for Codex.
8
+ - Document agent usage modes across Codex, Claude, Pro, API, and local providers.
9
+ - Document model selection and passthrough alias policy.
10
+ - Document role policy.
11
+ - Document configurable policy versus runtime invariants.
12
+ - Add concise UX copy for recommended, automatic, approval-required, and blocked provider paths.
13
+
14
+ Done means a user can understand which model is used, why it is recommended, and what it cannot override.
15
+
16
+ ## Phase 2: UX And Audit Surface
17
+
18
+ - Add a mode selector.
19
+ - Add a model picker with provider:model assignments, known aliases, and custom passthrough entry.
20
+ - Show a compact "Why this agent?" explanation before or during Claude, Pro, or other provider use.
21
+ - Record provider usage as audit/timeline events.
22
+ - Show approval copy for sensitive or large context transfer.
23
+ - Expose settings for budgets, thresholds, role assignments, and redaction preferences.
24
+
25
+ Done means every model-backed call has a visible reason and a mode-derived policy basis.
26
+
27
+ ## Phase 3: Runtime Policy Integration
28
+
29
+ - Map usage modes to runtime policy.
30
+ - Add escalation reason codes.
31
+ - Gate Claude, Pro, and API calls through provider readiness, host-policy preflight where applicable, and transfer policy.
32
+ - Preserve directive acknowledgement validation.
33
+ - Attach provider invocation and response artifacts for every model-backed call.
34
+ - Surface model policy as listed, discovered, or passthrough in doctor, roster, MCP, and settings views.
35
+
36
+ Done means model calls can be automatic where policy allows, but never hidden and never authoritative over runtime gates.
37
+
38
+ ## Phase 4: Quality Loops
39
+
40
+ - Use Pro for reconciliation after agent disagreement.
41
+ - Use Claude as independent critic, verifier, or alternate implementer when configured.
42
+ - Use Pro critic or final review for strategic/product decisions.
43
+ - Use High Assurance final review for public or reputational work.
44
+ - Escalate to Pro after deadlock or repeated failure where configured.
45
+ - Escalate to Claude second judgment after Codex self-review risk, implementation risk, or user preference triggers.
46
+
47
+ Done means cross-model use reduces bad loops instead of adding another loop.
48
+
49
+ ## Phase 5: Public Messaging
50
+
51
+ - Update README and public docs with the governed-runtime framing.
52
+ - Show examples for Efficient, Balanced, High Assurance, and Pro-led modes.
53
+ - Show examples for Claude second judge, Spark plus Sonnet, Claude builder, and Pro plus Claude high assurance.
54
+ - Explain evidence transfer, connector mode, and privacy boundaries.
55
+ - Keep "Codex can use Pro" and "Claude inside Codex" as capabilities, not authority claims.
56
+
57
+ Done means the short pitch is compelling and the trust model is inspectable.