thehood 0.1.0-preview.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/CODE_OF_CONDUCT.md +21 -0
  2. package/CONTRIBUTING.md +58 -0
  3. package/LICENSE +21 -0
  4. package/PRIVACY.md +49 -0
  5. package/README.md +264 -0
  6. package/SECURITY.md +31 -0
  7. package/dist/bridges/chatgptWebBridge.d.ts +2 -0
  8. package/dist/bridges/chatgptWebBridge.js +981 -0
  9. package/dist/bridges/chatgptWebBridge.js.map +1 -0
  10. package/dist/cli/args.d.ts +9 -0
  11. package/dist/cli/args.js +82 -0
  12. package/dist/cli/args.js.map +1 -0
  13. package/dist/cli/format.d.ts +56 -0
  14. package/dist/cli/format.js +752 -0
  15. package/dist/cli/format.js.map +1 -0
  16. package/dist/cli/main.d.ts +2 -0
  17. package/dist/cli/main.js +996 -0
  18. package/dist/cli/main.js.map +1 -0
  19. package/dist/cli/mcpConfig.d.ts +36 -0
  20. package/dist/cli/mcpConfig.js +98 -0
  21. package/dist/cli/mcpConfig.js.map +1 -0
  22. package/dist/index.d.ts +37 -0
  23. package/dist/index.js +38 -0
  24. package/dist/index.js.map +1 -0
  25. package/dist/mcp/protocol.d.ts +44 -0
  26. package/dist/mcp/protocol.js +33 -0
  27. package/dist/mcp/protocol.js.map +1 -0
  28. package/dist/mcp/server.d.ts +1 -0
  29. package/dist/mcp/server.js +106 -0
  30. package/dist/mcp/server.js.map +1 -0
  31. package/dist/mcp/tools.d.ts +10 -0
  32. package/dist/mcp/tools.js +2200 -0
  33. package/dist/mcp/tools.js.map +1 -0
  34. package/dist/mcp/validation.d.ts +8 -0
  35. package/dist/mcp/validation.js +67 -0
  36. package/dist/mcp/validation.js.map +1 -0
  37. package/dist/providers/chatgptWeb.d.ts +2 -0
  38. package/dist/providers/chatgptWeb.js +26 -0
  39. package/dist/providers/chatgptWeb.js.map +1 -0
  40. package/dist/providers/claudeCode.d.ts +4 -0
  41. package/dist/providers/claudeCode.js +32 -0
  42. package/dist/providers/claudeCode.js.map +1 -0
  43. package/dist/providers/codexCli.d.ts +6 -0
  44. package/dist/providers/codexCli.js +25 -0
  45. package/dist/providers/codexCli.js.map +1 -0
  46. package/dist/providers/codexCliModels.d.ts +23 -0
  47. package/dist/providers/codexCliModels.js +147 -0
  48. package/dist/providers/codexCliModels.js.map +1 -0
  49. package/dist/providers/localCommand.d.ts +26 -0
  50. package/dist/providers/localCommand.js +614 -0
  51. package/dist/providers/localCommand.js.map +1 -0
  52. package/dist/providers/markdownPayload.d.ts +7 -0
  53. package/dist/providers/markdownPayload.js +29 -0
  54. package/dist/providers/markdownPayload.js.map +1 -0
  55. package/dist/providers/responseSchema.d.ts +3 -0
  56. package/dist/providers/responseSchema.js +187 -0
  57. package/dist/providers/responseSchema.js.map +1 -0
  58. package/dist/providers/router.d.ts +3 -0
  59. package/dist/providers/router.js +21 -0
  60. package/dist/providers/router.js.map +1 -0
  61. package/dist/providers/stub.d.ts +2 -0
  62. package/dist/providers/stub.js +177 -0
  63. package/dist/providers/stub.js.map +1 -0
  64. package/dist/providers/types.d.ts +37 -0
  65. package/dist/providers/types.js +2 -0
  66. package/dist/providers/types.js.map +1 -0
  67. package/dist/runtime/agentBoard.d.ts +79 -0
  68. package/dist/runtime/agentBoard.js +166 -0
  69. package/dist/runtime/agentBoard.js.map +1 -0
  70. package/dist/runtime/agentBoardArtifact.d.ts +9 -0
  71. package/dist/runtime/agentBoardArtifact.js +171 -0
  72. package/dist/runtime/agentBoardArtifact.js.map +1 -0
  73. package/dist/runtime/agentRunner.d.ts +17 -0
  74. package/dist/runtime/agentRunner.js +92 -0
  75. package/dist/runtime/agentRunner.js.map +1 -0
  76. package/dist/runtime/approvalInbox.d.ts +54 -0
  77. package/dist/runtime/approvalInbox.js +143 -0
  78. package/dist/runtime/approvalInbox.js.map +1 -0
  79. package/dist/runtime/approvalPolicy.d.ts +11 -0
  80. package/dist/runtime/approvalPolicy.js +58 -0
  81. package/dist/runtime/approvalPolicy.js.map +1 -0
  82. package/dist/runtime/artifacts.d.ts +23 -0
  83. package/dist/runtime/artifacts.js +48 -0
  84. package/dist/runtime/artifacts.js.map +1 -0
  85. package/dist/runtime/browserManager.d.ts +37 -0
  86. package/dist/runtime/browserManager.js +356 -0
  87. package/dist/runtime/browserManager.js.map +1 -0
  88. package/dist/runtime/canonicalMemory.d.ts +23 -0
  89. package/dist/runtime/canonicalMemory.js +134 -0
  90. package/dist/runtime/canonicalMemory.js.map +1 -0
  91. package/dist/runtime/chatGptPageReadiness.d.ts +16 -0
  92. package/dist/runtime/chatGptPageReadiness.js +74 -0
  93. package/dist/runtime/chatGptPageReadiness.js.map +1 -0
  94. package/dist/runtime/commandRunner.d.ts +18 -0
  95. package/dist/runtime/commandRunner.js +115 -0
  96. package/dist/runtime/commandRunner.js.map +1 -0
  97. package/dist/runtime/commandSafety.d.ts +7 -0
  98. package/dist/runtime/commandSafety.js +61 -0
  99. package/dist/runtime/commandSafety.js.map +1 -0
  100. package/dist/runtime/config.d.ts +10 -0
  101. package/dist/runtime/config.js +107 -0
  102. package/dist/runtime/config.js.map +1 -0
  103. package/dist/runtime/crewLanes.d.ts +2 -0
  104. package/dist/runtime/crewLanes.js +123 -0
  105. package/dist/runtime/crewLanes.js.map +1 -0
  106. package/dist/runtime/criticPolicy.d.ts +17 -0
  107. package/dist/runtime/criticPolicy.js +50 -0
  108. package/dist/runtime/criticPolicy.js.map +1 -0
  109. package/dist/runtime/defaults.d.ts +5 -0
  110. package/dist/runtime/defaults.js +100 -0
  111. package/dist/runtime/defaults.js.map +1 -0
  112. package/dist/runtime/directives.d.ts +3 -0
  113. package/dist/runtime/directives.js +218 -0
  114. package/dist/runtime/directives.js.map +1 -0
  115. package/dist/runtime/doctor.d.ts +36 -0
  116. package/dist/runtime/doctor.js +185 -0
  117. package/dist/runtime/doctor.js.map +1 -0
  118. package/dist/runtime/errors.d.ts +20 -0
  119. package/dist/runtime/errors.js +41 -0
  120. package/dist/runtime/errors.js.map +1 -0
  121. package/dist/runtime/externalTransfer.d.ts +20 -0
  122. package/dist/runtime/externalTransfer.js +156 -0
  123. package/dist/runtime/externalTransfer.js.map +1 -0
  124. package/dist/runtime/fanout.d.ts +64 -0
  125. package/dist/runtime/fanout.js +263 -0
  126. package/dist/runtime/fanout.js.map +1 -0
  127. package/dist/runtime/gitEvidence.d.ts +10 -0
  128. package/dist/runtime/gitEvidence.js +80 -0
  129. package/dist/runtime/gitEvidence.js.map +1 -0
  130. package/dist/runtime/handoffs.d.ts +32 -0
  131. package/dist/runtime/handoffs.js +100 -0
  132. package/dist/runtime/handoffs.js.map +1 -0
  133. package/dist/runtime/ids.d.ts +2 -0
  134. package/dist/runtime/ids.js +4 -0
  135. package/dist/runtime/ids.js.map +1 -0
  136. package/dist/runtime/localStateIgnore.d.ts +9 -0
  137. package/dist/runtime/localStateIgnore.js +98 -0
  138. package/dist/runtime/localStateIgnore.js.map +1 -0
  139. package/dist/runtime/loop.d.ts +14 -0
  140. package/dist/runtime/loop.js +1863 -0
  141. package/dist/runtime/loop.js.map +1 -0
  142. package/dist/runtime/loopRecommendation.d.ts +109 -0
  143. package/dist/runtime/loopRecommendation.js +566 -0
  144. package/dist/runtime/loopRecommendation.js.map +1 -0
  145. package/dist/runtime/loopResponsibilities.d.ts +2 -0
  146. package/dist/runtime/loopResponsibilities.js +395 -0
  147. package/dist/runtime/loopResponsibilities.js.map +1 -0
  148. package/dist/runtime/loopRunner.d.ts +28 -0
  149. package/dist/runtime/loopRunner.js +81 -0
  150. package/dist/runtime/loopRunner.js.map +1 -0
  151. package/dist/runtime/operatorNextActions.d.ts +2 -0
  152. package/dist/runtime/operatorNextActions.js +344 -0
  153. package/dist/runtime/operatorNextActions.js.map +1 -0
  154. package/dist/runtime/paths.d.ts +9 -0
  155. package/dist/runtime/paths.js +14 -0
  156. package/dist/runtime/paths.js.map +1 -0
  157. package/dist/runtime/permissions.d.ts +9 -0
  158. package/dist/runtime/permissions.js +73 -0
  159. package/dist/runtime/permissions.js.map +1 -0
  160. package/dist/runtime/progressPacket.d.ts +12 -0
  161. package/dist/runtime/progressPacket.js +512 -0
  162. package/dist/runtime/progressPacket.js.map +1 -0
  163. package/dist/runtime/protectedPaths.d.ts +6 -0
  164. package/dist/runtime/protectedPaths.js +48 -0
  165. package/dist/runtime/protectedPaths.js.map +1 -0
  166. package/dist/runtime/providers.d.ts +13 -0
  167. package/dist/runtime/providers.js +60 -0
  168. package/dist/runtime/providers.js.map +1 -0
  169. package/dist/runtime/reconciliation.d.ts +17 -0
  170. package/dist/runtime/reconciliation.js +283 -0
  171. package/dist/runtime/reconciliation.js.map +1 -0
  172. package/dist/runtime/redaction.d.ts +1 -0
  173. package/dist/runtime/redaction.js +5 -0
  174. package/dist/runtime/redaction.js.map +1 -0
  175. package/dist/runtime/remoteRepoContext.d.ts +77 -0
  176. package/dist/runtime/remoteRepoContext.js +316 -0
  177. package/dist/runtime/remoteRepoContext.js.map +1 -0
  178. package/dist/runtime/repoContext.d.ts +50 -0
  179. package/dist/runtime/repoContext.js +399 -0
  180. package/dist/runtime/repoContext.js.map +1 -0
  181. package/dist/runtime/repoGateway.d.ts +64 -0
  182. package/dist/runtime/repoGateway.js +308 -0
  183. package/dist/runtime/repoGateway.js.map +1 -0
  184. package/dist/runtime/responseContracts.d.ts +3 -0
  185. package/dist/runtime/responseContracts.js +86 -0
  186. package/dist/runtime/responseContracts.js.map +1 -0
  187. package/dist/runtime/reviewLanes.d.ts +2 -0
  188. package/dist/runtime/reviewLanes.js +343 -0
  189. package/dist/runtime/reviewLanes.js.map +1 -0
  190. package/dist/runtime/reviewRouting.d.ts +51 -0
  191. package/dist/runtime/reviewRouting.js +152 -0
  192. package/dist/runtime/reviewRouting.js.map +1 -0
  193. package/dist/runtime/revisionPacket.d.ts +38 -0
  194. package/dist/runtime/revisionPacket.js +144 -0
  195. package/dist/runtime/revisionPacket.js.map +1 -0
  196. package/dist/runtime/revisionTrail.d.ts +2 -0
  197. package/dist/runtime/revisionTrail.js +162 -0
  198. package/dist/runtime/revisionTrail.js.map +1 -0
  199. package/dist/runtime/role-assignment.d.ts +4 -0
  200. package/dist/runtime/role-assignment.js +21 -0
  201. package/dist/runtime/role-assignment.js.map +1 -0
  202. package/dist/runtime/roleRoster.d.ts +28 -0
  203. package/dist/runtime/roleRoster.js +96 -0
  204. package/dist/runtime/roleRoster.js.map +1 -0
  205. package/dist/runtime/runInsights.d.ts +121 -0
  206. package/dist/runtime/runInsights.js +305 -0
  207. package/dist/runtime/runInsights.js.map +1 -0
  208. package/dist/runtime/runMonitor.d.ts +33 -0
  209. package/dist/runtime/runMonitor.js +143 -0
  210. package/dist/runtime/runMonitor.js.map +1 -0
  211. package/dist/runtime/runtime.d.ts +15 -0
  212. package/dist/runtime/runtime.js +199 -0
  213. package/dist/runtime/runtime.js.map +1 -0
  214. package/dist/runtime/runtimeInfo.d.ts +9 -0
  215. package/dist/runtime/runtimeInfo.js +76 -0
  216. package/dist/runtime/runtimeInfo.js.map +1 -0
  217. package/dist/runtime/store.d.ts +4 -0
  218. package/dist/runtime/store.js +48 -0
  219. package/dist/runtime/store.js.map +1 -0
  220. package/dist/runtime/summons.d.ts +25 -0
  221. package/dist/runtime/summons.js +403 -0
  222. package/dist/runtime/summons.js.map +1 -0
  223. package/dist/runtime/teamPresets.d.ts +14 -0
  224. package/dist/runtime/teamPresets.js +153 -0
  225. package/dist/runtime/teamPresets.js.map +1 -0
  226. package/dist/runtime/types.d.ts +505 -0
  227. package/dist/runtime/types.js +28 -0
  228. package/dist/runtime/types.js.map +1 -0
  229. package/dist/runtime/validationCommands.d.ts +18 -0
  230. package/dist/runtime/validationCommands.js +106 -0
  231. package/dist/runtime/validationCommands.js.map +1 -0
  232. package/dist/tui/dashboard.d.ts +41 -0
  233. package/dist/tui/dashboard.js +1115 -0
  234. package/dist/tui/dashboard.js.map +1 -0
  235. package/docs/ARCHITECTURE.md +277 -0
  236. package/docs/CLI_SPEC.md +396 -0
  237. package/docs/CODEX_SETUP.md +288 -0
  238. package/docs/COMPLETION_CONTRACT.md +52 -0
  239. package/docs/CONTRIBUTOR_GUIDE.md +70 -0
  240. package/docs/DEMO.md +62 -0
  241. package/docs/GLOSSARY.md +46 -0
  242. package/docs/GOAL_LOOP_SCHEDULE.md +50 -0
  243. package/docs/KNOWN_LIMITATIONS.md +29 -0
  244. package/docs/LICENSING.md +21 -0
  245. package/docs/LOOP_RECIPES.md +290 -0
  246. package/docs/LOOP_SELECTION_UX.md +118 -0
  247. package/docs/MCP_SPEC.md +689 -0
  248. package/docs/MEMORY_AND_RECONCILIATION.md +222 -0
  249. package/docs/NPM_PUBLISHING.md +51 -0
  250. package/docs/OPEN_DECISIONS.md +81 -0
  251. package/docs/PROMPT_SCHEMAS.md +411 -0
  252. package/docs/PROVIDER_ADAPTERS.md +323 -0
  253. package/docs/PROVIDER_MATRIX.md +21 -0
  254. package/docs/PUBLIC_REPO_READINESS.md +49 -0
  255. package/docs/RESEARCH_NOTES.md +92 -0
  256. package/docs/ROADMAP.md +94 -0
  257. package/docs/ROLE_CONTRACTS.md +252 -0
  258. package/docs/RUNTIME_LOOP.md +240 -0
  259. package/docs/SECURITY_AND_PRIVACY.md +161 -0
  260. package/docs/TESTING_AND_VERIFICATION.md +180 -0
  261. package/docs/TRUST_MODEL.md +65 -0
  262. package/docs/decisions/0001-runtime-first-cli-and-mcp.md +23 -0
  263. package/docs/decisions/0002-provider-neutral-role-mapping.md +43 -0
  264. package/docs/decisions/0003-separate-implementation-and-verification.md +27 -0
  265. package/docs/product/README.md +14 -0
  266. package/docs/product/model-selection.md +88 -0
  267. package/docs/product/positioning.md +37 -0
  268. package/docs/product/pro-usage-modes.md +70 -0
  269. package/docs/product/roadmap.md +57 -0
  270. package/docs/product/role-policy.md +89 -0
  271. package/docs/product/runtime-invariants.md +44 -0
  272. package/docs/release/v0.1.0-preview.0.md +48 -0
  273. package/examples/stub-demo/README.md +25 -0
  274. package/package.json +55 -0
@@ -0,0 +1,252 @@
1
+ # Role Contracts
2
+
3
+ Role contracts define what each agent is responsible for, what it may access, and what it must return.
4
+
5
+ The same model provider can fill different roles across different runs, but a single agent instance must not hold conflicting powers inside the same run.
6
+
7
+ ## Role Matrix
8
+
9
+ | Role | Responsibility | Edit Tools | Shell Tools | Acceptance Power |
10
+ | --- | --- | --- | --- | --- |
11
+ | Orchestrator | Plan, delegate, compare evidence, control loop | No by default | Limited | No final authority without evidence |
12
+ | Planner | Create implementation plan and risks | No | Read-only | No |
13
+ | Researcher | Inspect repo, docs, logs, and external references | No | Read/search only | No |
14
+ | Implementer | Make scoped code changes | Yes, scoped | Yes, scoped | No |
15
+ | QA Tester | Find missed cases and recommend validation from evidence | No | Read-only | No |
16
+ | Verifier | Validate output against acceptance criteria | No | Test/log tools only | Recommends approve/revise/abort |
17
+ | Critic | Find risks, missing cases, design flaws | No | Read-only | No |
18
+ | Integrator | Apply approved patches | Yes, deterministic | Limited | No |
19
+ | Citation Agent | Verify evidence and attribution | No | Read/search only | No |
20
+
21
+ ## Hard Invariants
22
+
23
+ - Implementer and verifier cannot be the same agent for the same task.
24
+ - Verifier cannot edit files.
25
+ - QA tester cannot edit files.
26
+ - Critic cannot edit files.
27
+ - Researcher cannot edit files.
28
+ - Integrator applies only approved patches.
29
+ - Runtime command logs beat model summaries.
30
+ - Test changes must be explicit and separately reviewed.
31
+
32
+ ## Same-Run Summons
33
+
34
+ A summon is a runtime-owned read-only call to an existing role on an existing run. It is how the runtime can ask a planner, researcher, QA tester, verifier, or critic to review a slice, perform QA, challenge assumptions, or gather evidence without changing the main loop owner.
35
+
36
+ Summons carry:
37
+
38
+ - role
39
+ - kind such as `review`, `qa`, `critique`, `research`, or `plan`
40
+ - brief and optional persona
41
+ - constraints
42
+ - artifact refs used as evidence
43
+ - optional one-call provider assignment
44
+
45
+ A summon does not grant edit tools, apply patches, accept work, or rewrite the run's role mapping. Model-backed summon providers still require provider-invocation approval unless autopilot policy auto-approves the bounded gate.
46
+
47
+ Summon responses can appear as read-only sidecar evidence on review ownership lanes. They are useful for QA, critique, and second opinions, but they cannot satisfy required verifier ownership, replace runtime-captured validation evidence, or advance the main state machine.
48
+
49
+ A fan-out is a bounded group of summons on the same run. It uses the same read-only role limits, provider approval gates, and sidecar evidence rules as individual summons. Active approval gates stop later fan-out items, while contained advisory provider failures are recorded without granting those failures acceptance authority. Fan-out writes a compact group artifact for visibility, but it does not create a new scheduler, grant edit tools, or satisfy required review gates.
50
+
51
+ Crew lane trails are runtime-derived display views over role responsibilities, review ownership, and handoffs. They make the hood readable as planner, builder, QA, verifier, critic, integrator, operator, reconciliation, and completion lanes, but they do not grant authority beyond the role contract below.
52
+
53
+ ## Review Ownership
54
+
55
+ Review ownership is derived by the runtime from canonical run evidence. A lane records the owner, provider/model assignment when the owner is a role, whether the lane is required, whether its evidence can satisfy required gates, and compact artifact/event refs.
56
+
57
+ - Verifier ownership is satisfied only by a main verifier response under the runtime loop.
58
+ - Runtime QA/validation ownership is satisfied only by runtime-captured validation evidence and command metadata.
59
+ - QA tester ownership is advisory model evidence and cannot satisfy runtime validation.
60
+ - Review routing ownership is deterministic runtime policy evidence. It can require, skip, or sequence subjective lanes, but it cannot accept work by itself.
61
+ - Critic ownership is advisory. The runtime may call the critic from a `critic_trigger` policy decision, but critic output cannot satisfy validation, verifier, or completion gates.
62
+ - Same-run summons are sidecar evidence and remain read-only.
63
+ - Same-run fan-outs are grouped sidecar evidence and remain read-only.
64
+ - Fixable QA, critic, or verifier findings can become a runtime-owned `revision_packet` handoff back to the implementer. The packet and derived revision trail do not grant reviewer edit power and do not satisfy validation or verifier gates.
65
+
66
+ ## Orchestrator
67
+
68
+ The orchestrator owns the strategy, not the filesystem.
69
+
70
+ Inputs:
71
+
72
+ - user goal
73
+ - repo context
74
+ - constraints
75
+ - role mapping
76
+ - current state
77
+ - worker results
78
+ - verifier verdicts
79
+ - critic feedback
80
+ - budgets
81
+
82
+ Allowed tools:
83
+
84
+ - read run state
85
+ - create plan
86
+ - delegate task
87
+ - request approval
88
+ - ask for verification
89
+ - ask for critique
90
+
91
+ Disallowed by default:
92
+
93
+ - direct file edits
94
+ - direct shell commands
95
+ - direct patch application
96
+
97
+ Outputs:
98
+
99
+ - plan
100
+ - task assignments
101
+ - approval request
102
+ - continue/revise/abort decision
103
+ - final synthesis
104
+
105
+ ## Implementer
106
+
107
+ The implementer owns scoped changes.
108
+
109
+ Inputs:
110
+
111
+ - narrow task objective
112
+ - allowed paths
113
+ - relevant files
114
+ - acceptance criteria
115
+ - disallowed changes
116
+ - testing hints
117
+
118
+ Allowed tools:
119
+
120
+ - read files
121
+ - edit allowed files
122
+ - run scoped commands
123
+ - inspect diff
124
+
125
+ Disallowed by default:
126
+
127
+ - changing protected test assets without explicit approval
128
+ - changing unrelated files
129
+ - applying its own output to the final checkout
130
+ - claiming acceptance
131
+
132
+ Outputs:
133
+
134
+ - changed files
135
+ - diff summary
136
+ - commands run
137
+ - self-check notes
138
+ - unresolved risks
139
+
140
+ When a repair pass is delegated, inputs also include the latest `revision_packet` ref and compact repair objective. The implementer must treat it as a narrow patch brief, not permission to broaden scope or accept the work.
141
+
142
+ ## QA Tester
143
+
144
+ The QA tester is a read-only model-assisted tester, usually a cheaper model such as Codex Spark.
145
+
146
+ The runtime calls QA only when review routing requires behavior or regression review. A skipped QA lane must have a recorded routing reason; skipped QA never weakens deterministic validation or verifier requirements.
147
+
148
+ Inputs:
149
+
150
+ - user goal
151
+ - current plan
152
+ - diff summaries
153
+ - runtime command metadata
154
+ - validation artifacts
155
+ - verifier or critic evidence when present
156
+
157
+ Allowed tools:
158
+
159
+ - read files
160
+ - inspect diffs
161
+ - inspect logs
162
+ - recommend deterministic validation commands
163
+
164
+ Disallowed:
165
+
166
+ - editing files
167
+ - changing tests
168
+ - running or claiming command results unless the runtime captured them
169
+ - satisfying runtime QA/validation gates
170
+ - accepting work
171
+
172
+ Outputs:
173
+
174
+ - verdict: `pass`, `needs_revision`, `needs_more_evidence`, or `blocked`
175
+ - missed cases
176
+ - suggested validation commands
177
+ - product or regression risks
178
+ - summary grounded in runtime evidence
179
+
180
+ ## Verifier
181
+
182
+ The verifier owns independent assessment.
183
+
184
+ The current routing policy keeps verifier review required for implementation runs when a verifier is assigned. If the verifier role is missing, the runtime stops for user review instead of completing the run.
185
+
186
+ Inputs:
187
+
188
+ - user goal
189
+ - acceptance criteria
190
+ - diff
191
+ - changed files
192
+ - raw test logs
193
+ - runtime command metadata
194
+ - implementer notes
195
+
196
+ Allowed tools:
197
+
198
+ - read files
199
+ - inspect diffs
200
+ - inspect logs
201
+ - request deterministic commands through runtime
202
+
203
+ Disallowed:
204
+
205
+ - editing files
206
+ - changing tests
207
+ - applying patches
208
+ - accepting unverifiable claims
209
+
210
+ Outputs:
211
+
212
+ - verdict: `approve`, `revise`, `abort`, or `ask_user`
213
+ - evidence
214
+ - failed criteria
215
+ - recommended next action
216
+
217
+ ## Critic
218
+
219
+ The critic challenges the plan or patch.
220
+
221
+ The runtime may invoke the critic automatically when QA, verifier, or deterministic validation evidence indicates risk. That invocation is recorded as a `critic_trigger` artifact with a reason code and evidence refs.
222
+
223
+ Best used for:
224
+
225
+ - high-risk tasks
226
+ - architectural changes
227
+ - security-sensitive work
228
+ - unclear product behavior
229
+ - provider disagreement
230
+
231
+ Outputs:
232
+
233
+ - risks
234
+ - missing cases
235
+ - alternate designs
236
+ - blocking concerns
237
+ - non-blocking concerns
238
+
239
+ The critic must not edit files, apply patches, mark tests as passed, or approve completion.
240
+
241
+ ## Integrator
242
+
243
+ The integrator applies approved changes.
244
+
245
+ The integrator should be deterministic runtime code when possible, not a general model agent.
246
+
247
+ Responsibilities:
248
+
249
+ - apply approved patch
250
+ - verify target checkout state
251
+ - ensure no unrelated files are included
252
+ - capture final diff
@@ -0,0 +1,240 @@
1
+ # Runtime Loop
2
+
3
+ TheHood runs a bounded agent loop. The loop is stateful, inspectable, and controlled by runtime rules rather than model habit.
4
+
5
+ ## Loop Summary
6
+
7
+ ```text
8
+ 1. Receive user goal
9
+ 2. Inspect repo and constraints
10
+ 3. Ask orchestrator for plan
11
+ 4. Request user approval when needed
12
+ 5. Delegate scoped task to implementer
13
+ 6. Capture diff and implementation notes
14
+ 7. Run deterministic validation commands
15
+ 8. Write a runtime-owned review-routing artifact from changed paths, protected-path matches, validation evidence, and role state
16
+ 9. Ask the mapped read-only QA tester when routing requires behavior or regression review
17
+ 10. Ask verifier for verdict using raw evidence
18
+ 11. Ask critic when risk or ambiguity warrants it
19
+ 12. Write a revision packet and delegate repair when findings are fixable
20
+ 13. Ask user, abort, or integrate when runtime policy requires it
21
+ 14. Produce final report with evidence
22
+ 15. Reconcile planner state against implementation evidence when needed
23
+ ```
24
+
25
+ Each provider call is preceded by a runtime-built directive artifact containing role instructions, prompt variables, tool permissions, and the expected output contract. The provider response must satisfy that contract before the runtime advances to the next state.
26
+
27
+ Local command providers such as `codex-cli` and `claude-code` also write redacted stdout/stderr `log` artifacts plus a compact `provider_invocation` artifact after the command exits. The invocation artifact records role, provider/model, command args, workspace mode, sandbox or permission mode when available, exit code, timeout state, output lengths and refs, parse status, and any isolated patch ref. It proves the runtime actually spawned the local agent adapter, but it does not replace the provider response, validation logs, or verifier verdict.
28
+
29
+ Provider directives include a `directiveAck` marker. Browser-backed adapters must require the current marker in the provider response before accepting schema-valid JSON, which prevents stale ChatGPT Web project or conversation context from being mistaken for the current run.
30
+
31
+ The `AgentResponse` JSON envelope is deliberately mechanical. It carries status, a short summary, required role-control fields such as `action`, `status`, or `verdict`, refs, and the directive acknowledgement. Human-facing plans, reports, reviews, critique, rationale, acceptance criteria, and long next-step writeups should live in `data.<required_data_key>.markdown` as GitHub-flavored Markdown. Status surfaces expose only a bounded markdown preview plus the response artifact ref; the full response artifact remains the source of truth.
32
+
33
+ The runtime also records typed `handoffs` on the run record whenever work crosses a meaningful boundary between roles, an approval gate mediates the next transition, or a run completes. Display labels such as `Agent 1 / Orchestrator`, `Agent 2 / Implementer`, and `Agent 3 / Verifier` are derived from runtime roles and assignments. They are inspectable lane labels, not new permissions or authority.
34
+
35
+ Same-run summons are read-only sidecar calls attached to an existing run. A summon carries an explicit brief, kind such as `qa` or `critique`, optional one-call provider assignment, constraints, and artifact refs. The runtime records the handoff and provider artifacts, but the summoned agent does not advance the main state machine or gain edit authority. Model-backed summon providers still pass through provider-invocation approval; autopilot can auto-approve that bounded gate when policy allows it.
36
+
37
+ Same-run fan-out is a bounded group of summons attached to one run. The current runtime executes fan-out items sequentially so provider approval gates stay explicit and auditable, then writes a compact `fanout` artifact with item statuses, artifact refs, and bounds. Repo config can lower the fan-out item cap through `defaults.fanoutMaxItems`, while the runtime hard cap remains 8. An active approval gate stops later fan-out items until the gate is handled, but a contained advisory provider failure is recorded and later read-only items can still run. Fan-out is an evidence fan-in surface, not a scheduler: grouped responses remain sidecar evidence and cannot satisfy required verifier or runtime QA/validation lanes.
38
+
39
+ Review lanes are runtime-derived gate metadata, not separate schedulers. The runtime derives verifier, runtime QA/validation, QA tester, and critic lanes from existing canonical evidence such as verifier responses, validation command artifacts, tool events, QA tester responses, critic responses, and read-only summon responses. Each lane carries bounded ownership metadata: owner label, role or runtime owner, provider/model assignment when known, required or optional status, current state, compact summary, and artifact/event refs. Final reports and progress packets expose those lanes so CLI, MCP, TUI, and future app surfaces can display reviewer/tester/QA/critic state without owning orchestration logic. A summoned agent can add read-only sidecar evidence, but a summon does not satisfy or replace a required verifier or runtime QA/validation lane.
40
+
41
+ Critic triggers are runtime decisions, not model decisions. When QA, verifier, or deterministic validation evidence indicates risk, the runtime can invoke the configured read-only critic and write a `critic_trigger` artifact with the reason code, source roles, evidence refs, and critic response ref. The critic can recommend revision or missing evidence, but it cannot edit, satisfy validation, approve completion, or replace verifier ownership.
42
+
43
+ Review routing is a runtime decision, not a model decision. After deterministic validation evidence is captured, the runtime writes a `review_routing` artifact with a conservative risk tier, required lanes, skipped-role reasons, and compact signals. The first policy slice keeps verifier review required for implementation runs, gates model-assisted QA by risk, and keeps critic escalation controlled by critic policy. Low risk is intentionally narrow, such as docs/copy-only changes with passing deterministic validation; runtime, provider, approval, artifact, CLI/TUI behavior, MCP, protected-path, dependency, network, schema, or validation changes route to stronger review.
44
+
45
+ Revision packets are runtime repair handoffs, not reviewer authority. When QA returns `needs_revision`, verifier returns `revise`, or critic returns `needs_revision`, the runtime writes a compact `revision_packet` artifact and moves the run back to `implementing` with that packet in the implementer directive context. The next QA/verifier pass must use fresh post-repair runtime evidence. Verifier `ask_user` or `abort`, protected test gates, unsafe critic feedback, max-iteration failures, and other hard policy gates still stop instead of silently revising.
46
+
47
+ Revision trails are runtime-derived visibility over that repair path. A trail item links the revision packet, delegation event, implementer repair response, post-repair validation artifacts, review responses, handoff refs, and terminal completion event when present. It lets CLI, MCP, TUI, final reports, and progress packets show whether a repair is still active or already reviewed. It does not make stale pre-repair QA, validation, or verifier evidence valid for the post-repair pass.
48
+
49
+ Loop responsibility schedules are runtime-derived visibility snapshots over the same canonical evidence. A schedule names the current planner/orchestrator, implementer, verifier, runtime QA/validation, model-assisted QA tester, critic, reconciliation, integration, operator approval, and completion responsibilities with compact owner, status, gate, artifact, event, and handoff refs. The schedule does not add permissions, call providers, satisfy gates, or replace the state machine; it lets CLI, MCP, TUI, and future app surfaces show who owns the next responsibility without duplicating orchestration logic.
50
+
51
+ Crew lane trails are the product-facing version of those same evidence snapshots. The runtime derives `crewLanes` from loop responsibilities and review lanes so operators can see the hood as planner, builder, runtime QA, model QA tester, verifier, critic, integrator, approval, reconciliation, and completion lanes. Each lane includes authority (`edit`, `read_only`, `runtime`, or `operator`), required/advisory state, compact refs, and whether the lane can satisfy a gate. The Codex-facing `agentBoard` merges those dynamic lane snapshots with the configured role roster and provider health so app surfaces can render visible agent cards without owning orchestration logic. A derived dashboard artifact can expose the same board as bounded Codex-renderable cards and tables. Crew lanes, agent board cards, and dashboard artifacts are display and handoff evidence only; they do not schedule agents, grant tools, or make advisory sidecars authoritative.
52
+
53
+ The headless loop runner is a repeat driver over the same runtime state machine. It repeatedly advances a run until terminal state, required manual approval, no progress, or a caller-supplied cycle cap. It does not approve manual gates, add roles, schedule sidecar evidence, or replace verifier/runtime QA ownership.
54
+
55
+ ## State Machine
56
+
57
+ ```text
58
+ created
59
+ -> planning
60
+ -> awaiting_approval
61
+ -> delegating
62
+ -> implementing
63
+ -> verifying
64
+ -> critiquing
65
+ -> integrating
66
+ -> completed
67
+
68
+ Any state
69
+ -> failed
70
+ -> aborted
71
+ ```
72
+
73
+ ## Iteration Inputs
74
+
75
+ Every iteration should include:
76
+
77
+ - current run state
78
+ - original user goal
79
+ - current plan
80
+ - previous findings
81
+ - current diff
82
+ - raw command logs
83
+ - verifier verdict
84
+ - critic verdict when present
85
+ - open questions
86
+ - remaining budget
87
+ - stop conditions
88
+
89
+ ## Stop Conditions
90
+
91
+ The loop must stop when any of these are true:
92
+
93
+ - success criteria are satisfied
94
+ - max iterations reached
95
+ - token budget reached
96
+ - time budget reached
97
+ - command failure requires user action
98
+ - protected file change requires explicit approval
99
+ - model output fails schema validation repeatedly
100
+ - provider response status is `blocked` or `failed`
101
+ - user aborts the run
102
+
103
+ ## Approval Gates
104
+
105
+ User approval is required before:
106
+
107
+ - editing files in a target repo unless the run mode already authorizes it
108
+ - modifying tests, fixtures, snapshots, or evaluation files
109
+ - installing dependencies
110
+ - running commands with external side effects
111
+ - using network access when the policy requires approval
112
+ - invoking model-backed providers such as `chatgpt-web`, `claude-code`, or `codex-cli` for read-only repo work
113
+ - sending runtime-captured repo context to browser or API model providers such as `chatgpt-web`, unless the user's external transfer policy auto-approves the manifest
114
+ - sending runtime-captured progress or memory packets to browser or API model providers, unless the user's external transfer policy auto-approves the manifest
115
+ - applying a worker patch to the main checkout
116
+ - continuing to verification after an applied worker patch changes protected test, fixture, snapshot, or eval paths
117
+ - switching orchestrator or verifier mid-run for an active task
118
+
119
+ When `approvalPolicy.mode` is `autopilot`, the user has pre-authorized the runtime to approve bounded low-risk gates without another prompt. Autopilot may approve provider invocation, implementation start, external transfers that pass transfer-manifest policy, isolated patch application, and runtime-owned revision packet repair while the provider-call budget allows. It must still stop for secret-risk transfers, protected test/fixture/snapshot/eval changes, destructive or dependency/network commands that require explicit command approval, dirty-checkout integration blockers, max-iteration failures, verifier `ask_user` or `abort`, and unsafe critic outcomes.
120
+
121
+ Codex or tenant host-policy gates happen before TheHood can invoke an external model-backed provider. The runtime cannot auto-approve those gates, but MCP clients should call `thehood_model_access` first so the user sees a local-only packet with destinations, data boundary, approval copy, and fallback paths instead of a repeated long disclosure prompt.
122
+
123
+ When an approval reason includes an exact phrase such as `Approval message must mention "apply isolated patch"`, the runtime enforces that phrase before recording an approving transition.
124
+
125
+ Before sending repo context or a progress packet to a browser or API provider, the runtime writes a `transfer_manifest` artifact. The approval gate points at that manifest so CLI, MCP, TUI, and future app surfaces can show the destination provider, purpose, source artifacts, byte counts, hashes, risk class, exact approval phrase, and bounded preview before anything leaves the machine. If the user configures `approvalPolicy.mode: auto_low_risk`, `approvalPolicy.mode: autopilot`, or `externalTransfers: auto_low_risk`, bounded transfers that do not have `secret_risk` can be auto-approved; the runtime still records the manifest, approval event, and `approval_auto_approved` event.
126
+
127
+ `maxIterations` is enforced from persisted provider responses. If the next transition would call another provider after the run has already recorded `maxIterations` agent responses, the runtime fails closed with reason `max_iterations`.
128
+
129
+ ## Runtime-Owned Evidence
130
+
131
+ The runtime captures evidence directly:
132
+
133
+ - git status before and after
134
+ - diff before and after each worker
135
+ - command and args
136
+ - cwd
137
+ - exit code
138
+ - stdout
139
+ - stderr
140
+ - duration
141
+ - tool permission decision
142
+ - protected path classification
143
+ - final report artifacts for completed runs
144
+ - progress packet artifacts for later planner reconciliation
145
+ - provider invocation artifacts for local Codex CLI and Claude Code adapter executions
146
+ - derived review ownership metadata for verifier, runtime QA/validation, model-assisted QA tester, critic, and read-only summon evidence
147
+ - review routing artifacts explaining risk tier, required lanes, skipped roles, and routing reasons
148
+ - critic trigger artifacts explaining why an advisory critic was called
149
+ - revision packet artifacts explaining why repair was delegated back to the implementer
150
+ - revision trail items linking repair delegation to post-repair validation and review
151
+ - external transfer manifest artifacts before approved provider transfers
152
+ - typed handoff records for role delegation, approval mediation, and completion
153
+
154
+ Models may summarize evidence, but summaries are not authoritative.
155
+
156
+ ## Memory
157
+
158
+ TheHood's memory is canonical runtime state, not provider session context. Exact artifacts, run records, command logs, diffs, approvals, verifier verdicts, and final reports are authoritative. Summaries, vector memories, graph memories, and model reflections are derived navigation layers.
159
+
160
+ Provider directives should assume that browser and API conversation context may be stale or empty. The runtime rehydrates providers from bounded packets that point back to exact artifacts.
161
+
162
+ Each provider directive includes a bounded `canonicalMemory` object. It is a refs-only project memory index containing the current run snapshot, recent run summaries, and latest progress packet, reconciliation, repo context, provider execution, final report, review routing, and transfer manifest refs when available. It does not include large artifact bodies. Providers must treat this runtime-owned memory as authoritative and ignore stale provider session context unless that context is repeated in the directive.
163
+
164
+ ## Final Reports
165
+
166
+ Completed read-only and verified implementation runs attach a `report` artifact with `kind: "final_report"`. The report includes the run goal, final state, stop reason, completing role, artifact refs, command metadata, approval events, bounded review ownership lanes, bounded crew lanes, and bounded revision trail items. The runtime also stores a bounded progress packet artifact after completion so a later planner reconciliation step can ask for external-transfer approval using an exact artifact ref.
167
+
168
+ Run status insights expose the latest progress packet, reconciliation, repo context, provider execution, final report, and transfer manifest refs. They also expose bounded revision trails, crew lane trails, loop responsibility schedules, and operator next actions derived by the runtime from run state, approvals, provider waits, terminal state, revision handoffs, and review ownership lanes. Revision trails, crew lanes, loop schedules, and operator next actions are navigation aids over canonical artifacts; they do not replace artifact reads when a reviewer needs the full evidence and they do not weaken approval policy.
169
+
170
+ MCP host responses are compact by default so long TheHood sessions do not exhaust the Codex conversation context with repeated status dumps. Default MCP outputs include run state, counts, latest artifact refs, recent events, compact insights, and bounded agent-board cards. Full run evidence remains in `.thehood` artifacts and run records; MCP callers can request `detail: "full"` on runtime tools or read exact artifact refs through `thehood_read_artifact` when they intentionally need the verbose payload.
171
+
172
+ Provider status is also authoritative. A worker response with `blocked` pauses at an approval gate. A worker response with `failed` fails the run. The runtime must not advance blocked or failed implementation into verification.
173
+
174
+ ## Planner Reconciliation
175
+
176
+ Planner reconciliation closes the loop after a plan has been implemented and verified. The runtime builds a progress packet from canonical artifacts, applies the user's external transfer approval policy, sends the packet to the selected planner or orchestrator after approval, validates the response, and stores the result as a reconciliation artifact.
177
+
178
+ Reconciliation should answer which plan items are complete, which criteria remain open, whether the implementation deviated from the plan, and what the next slice should be. It is advisory; runtime evidence and approval gates remain authoritative.
179
+
180
+ ## Failure Classes
181
+
182
+ Verifier and runtime failures should be classified into stable categories:
183
+
184
+ - `test_failure`
185
+ - `lint_failure`
186
+ - `typecheck_failure`
187
+ - `build_failure`
188
+ - `schema_failure`
189
+ - `permission_denied`
190
+ - `approval_required`
191
+ - `provider_error`
192
+ - `ambiguous_goal`
193
+ - `unsafe_action`
194
+ - `budget_exhausted`
195
+ - `unknown_failure`
196
+
197
+ ## Integration Rule
198
+
199
+ Only the runtime applies approved changes to the target checkout.
200
+
201
+ Implementers can produce patches. Local CLI implementers run in isolated git worktrees by default; TheHood captures their diff as a run artifact and stops before applying it. After explicit approval, deterministic runtime code applies the patch to the target checkout, captures a runtime-owned integration report artifact, and only then proceeds toward verification. If the integrated patch changes protected test, fixture, snapshot, or eval paths, the runtime stops for a separate approval before verification. Implementers do not get to self-merge.
202
+
203
+ ## Current Loop
204
+
205
+ The current implementation can advance approved runs using the deterministic `stub` provider:
206
+
207
+ ```text
208
+ delegating
209
+ -> orchestrator response
210
+ -> orchestrator response schema validation
211
+ -> implementing
212
+ -> implementer response
213
+ -> implementer response schema validation
214
+ -> awaiting_approval when an isolated patch artifact must be applied
215
+ -> integrating
216
+ -> awaiting_approval when integrated protected paths need separate approval
217
+ -> verifying
218
+ -> git evidence capture
219
+ -> package validation command capture
220
+ -> review routing artifact capture
221
+ -> mapped QA tester response when required by routing
222
+ -> critic response when runtime trigger policy detects QA, verifier, or validation risk
223
+ -> revision packet and implementer repair pass when QA, critic, or verifier returns a fixable revision finding
224
+ -> verifier response
225
+ -> verifier response schema validation
226
+ -> final report and progress packet artifacts
227
+ -> completed or awaiting_approval
228
+ ```
229
+
230
+ ChatGPT Web is wired through a user-configured bridge command. API model providers are not wired yet. Local Codex CLI and Claude Code adapters are available through the same directive and response-validation contract.
231
+
232
+ Read-only runs can also execute a mapped guest role directly:
233
+
234
+ - `plan` uses `planner` when assigned, otherwise `orchestrator`
235
+ - `research` uses `researcher` when assigned, otherwise `orchestrator`
236
+ - `review` uses `critic` when assigned, otherwise `orchestrator`
237
+
238
+ For read-only runs, model-backed providers such as `chatgpt-web`, `claude-code`, and `codex-cli` require provider-invocation approval before the first provider call unless autopilot policy auto-approves that bounded gate. When a direct read-only role or same-run summon uses `chatgpt-web`, local git reports a GitHub remote, a clean checkout, and `HEAD` matching the tracked upstream ref, and the active ChatGPT Web bridge GitHub connector surface is confirmed, the runtime attaches a refs-only `remote_context` artifact before the provider call. When a read-only orchestrator or planner returns `action: "delegate"` to a repo reader or inspector, the runtime treats that as an evidence request and applies that same confirmed GitHub connector route before falling back to local context capture. When it returns a ready handoff to another configured read-only role such as `planner`, `researcher`, `qa`, `verifier`, or `critic` with `requiresMoreEvidence` not set to true, the runtime records a role handoff and invokes that role instead of recapturing repo context. If the target read-only role is not assigned, the runtime stops at an explicit missing-role approval gate. A `remote_context` artifact instructs ChatGPT Web to use its GitHub connector at the exact commit instead of sending local file excerpts. Local runtime artifacts and checkout state remain authoritative, and the runtime falls back to deterministic local capture for local-only, dirty, unpushed, non-ChatGPT, or unconfirmed ChatGPT Web GitHub connector routes. Local capture writes a bounded `repo_context` artifact using deterministic filesystem reads. For browser or API model providers such as `chatgpt-web`, `openai-api`, and `anthropic-api`, the runtime then writes a `transfer_manifest` artifact before sending local repo context back to the provider. Manual policy stops at a second approval gate; auto-low-risk transfer policy and autopilot can auto-approve bounded non-secret manifests while still recording the manifest, approval event, and `approval_auto_approved` event. If the role requests another delegation after a local context pack exists, the runtime captures a follow-up context only when the structured decision names concrete repo paths that were not already captured or were previously captured only as truncated excerpts. If the role requests another delegation after a remote connector context already exists, the runtime stops at an approval gate rather than looping. Provider directives rehydrate local repo context from all captured context artifacts, assembling continuation chunks into one ordered excerpt per path, and include any refs-only remote connector context separately. Broad or fully duplicate repeated evidence delegations still stop at an approval gate instead of looping indefinitely.
239
+
240
+ When a read-only orchestrator or planner returns `action: "delegate"` with `delegateTo: "implementer"` or `nextRole: "implementer"` and does not set `requiresMoreEvidence: true`, the runtime treats the response as a completed implementation handoff plan. A `plan` run then writes the usual final report and progress packet instead of recapturing repo context.
@@ -0,0 +1,161 @@
1
+ # Security And Privacy
2
+
3
+ TheHood coordinates powerful local actions. Security and privacy must be designed into the runtime from the start.
4
+
5
+ ## Principles
6
+
7
+ - User credentials stay local.
8
+ - Provider sessions are never logged.
9
+ - Runtime permissions are explicit.
10
+ - Models do not receive secrets by default.
11
+ - Dangerous actions require approval.
12
+ - Logs are useful but redacted.
13
+ - The system fails closed when unsure.
14
+
15
+ ## Secrets
16
+
17
+ Secrets include:
18
+
19
+ - API keys
20
+ - browser cookies
21
+ - session tokens
22
+ - OAuth credentials
23
+ - SSH keys
24
+ - private repo URLs when sensitive
25
+ - payment or customer data
26
+ - private prompts containing confidential data
27
+
28
+ Rules:
29
+
30
+ - Do not print secrets to logs.
31
+ - Do not include secrets in model context.
32
+ - Redact environment variables by default.
33
+ - Store provider credentials using OS keychain or provider CLI config when possible.
34
+ - Treat custom provider:model aliases as user preference only; they do not bypass provider access controls, command readiness, approval gates, or external transfer review.
35
+ - Require explicit user approval before invoking model-backed providers for read-only repo work.
36
+ - Require explicit user approval before sending runtime-captured repo context to browser or API model providers unless the user-configured external transfer policy auto-approves a bounded non-secret-risk manifest.
37
+ - Require explicit user approval before sending runtime-captured progress, memory, or reconciliation packets to browser or API model providers unless the user-configured external transfer policy auto-approves a bounded non-secret-risk manifest.
38
+ - Treat Codex or tenant external-disclosure gates as outside runtime control. TheHood may provide a local-only model-access preflight packet and compact approval copy, but it must not bypass host policy or repeatedly ask the user to approve the same blocked disclosure in new wording.
39
+
40
+ ## Browser-Based Providers
41
+
42
+ The ChatGPT Web adapter is sensitive because it depends on a user-authenticated browser session.
43
+
44
+ Rules:
45
+
46
+ - Use an isolated browser profile when possible.
47
+ - Prefer the TheHood-managed persistent Chrome profile over the user's main Chrome profile.
48
+ - Do not export cookies.
49
+ - Do not write session tokens to disk.
50
+ - Do not bypass provider restrictions.
51
+ - Detect model availability visibly and fail if uncertain.
52
+ - Capture only visible model outputs needed for the run.
53
+ - Do not invoke ChatGPT Web for repo work until the user explicitly approves that provider invocation.
54
+ - Do not send bounded repo context back to ChatGPT Web until the user explicitly approves that external context transfer or has configured low-risk external transfer auto-approval.
55
+ - For clean pushed GitHub repos, ChatGPT Web may receive a refs-only `remote_context` artifact instead of local file excerpts only after the active bridge GitHub connector surface is confirmed. The directive tells ChatGPT to use its GitHub connector at the exact commit; it does not grant TheHood new GitHub access, attach ChatGPT apps, or bypass the user's ChatGPT connector permissions.
56
+ - In autopilot mode, treat the user-configured approval policy as the approval source and still fail closed on secret-risk transfers, protected test changes, destructive commands, dependency installs, and dirty-checkout integration blockers.
57
+ - Runtime-owned revision packets may route fixable reviewer findings back to the implementer, but unsafe critic feedback, verifier `ask_user` or `abort`, and protected-path gates must still stop for review.
58
+
59
+ ## Filesystem Safety
60
+
61
+ The runtime must:
62
+
63
+ - inspect dirty state before edits
64
+ - avoid reverting unrelated user changes
65
+ - isolate worker changes in worktrees when possible
66
+ - enforce allowed and disallowed paths
67
+ - block protected test changes unless approved
68
+ - run edit-capable local agents in isolated git worktrees by default and capture patch artifacts for review
69
+ - require explicit approval before applying isolated worker patches to the target checkout
70
+ - require separate approval before verifier review when an applied worker patch changes protected test, fixture, snapshot, or eval paths
71
+ - require `THEHOOD_ALLOW_DIRECT_EDIT=1` before a local agent can edit the target checkout directly
72
+ - keep repo-local runtime state in `.thehood/`, automatically add `.thehood/` and `.thehood-browser.json` to `.git/info/exclude` for git checkouts, and treat that state as private local evidence rather than source code
73
+
74
+ ## MCP Repo Gateway
75
+
76
+ MCP repo gateway tools are read-only, bounded, and skip `.git`, `.thehood`, dependency/build output, and secret-looking paths.
77
+
78
+ Rules:
79
+
80
+ - Treat every repo gateway tool result as data disclosed to the connected MCP host.
81
+ - Use trusted MCP hosts only, especially when connecting ChatGPT Developer Mode to private local repos.
82
+ - Prefer Secure MCP Tunnel over public tunnels for private repos.
83
+ - Keep write tools separate from read tools and gated by runtime approval.
84
+ - Do not expose broad filesystem access; every path must stay relative to the configured repo root.
85
+
86
+ ## Command Safety
87
+
88
+ Commands should be classified before execution:
89
+
90
+ - read-only
91
+ - local write
92
+ - network
93
+ - dependency install
94
+ - destructive
95
+ - credential-sensitive
96
+
97
+ Approval is required for commands with risky side effects.
98
+
99
+ ## Logging
100
+
101
+ Logs should include:
102
+
103
+ - command
104
+ - cwd
105
+ - duration
106
+ - exit code
107
+ - redacted stdout
108
+ - redacted stderr
109
+ - permission decision
110
+
111
+ Logs should not include:
112
+
113
+ - raw cookies
114
+ - API keys
115
+ - hidden browser state
116
+ - full secret-bearing environment
117
+
118
+ ## Memory Safety
119
+
120
+ Memory is a control channel. Retrieved memories, reflections, and summaries can influence model decisions, so they must be treated as derived data rather than authority.
121
+
122
+ Rules:
123
+
124
+ - Preserve exact source artifacts before creating derived memories.
125
+ - Keep provenance for every derived memory: source refs, run id, created timestamp, commit or repo state when applicable, and derivation method.
126
+ - Prefer exact excerpts and artifact refs over summary-only memory packets.
127
+ - Mark superseded or invalidated plan state instead of silently overwriting it.
128
+ - Tell browser-backed providers to ignore stale provider session context and use only TheHood-supplied state.
129
+ - Require browser-backed provider responses to acknowledge the current directive before accepting schema-valid output.
130
+ - Keep directive-level `canonicalMemory` bounded and refs-only. It may name latest progress, reconciliation, repo context, final report, and transfer manifest artifacts, but it must not inline large artifact bodies.
131
+ - GitHub connector repo context is also refs-only and requires a confirmed provider connector route before selection. It may name owner, repo, branch, upstream, and commit, but it must not inline local file bodies.
132
+ - If future work sends larger memory bodies to browser or API providers, treat that as a `memory_packet` external transfer and require a transfer manifest before the data leaves the machine.
133
+ - Keep advanced memory engines pluggable and rebuildable from canonical artifacts.
134
+
135
+ ## External Transfer Review
136
+
137
+ Before local repo context bodies or runtime memory cross a browser or API provider boundary, TheHood should create a transfer manifest and point the approval gate at it. Confirmed refs-only GitHub connector context names remote coordinates instead of local file bodies; it is recorded as a `remote_context` artifact and does not replace transfer manifests for local context packs.
138
+
139
+ The manifest records:
140
+
141
+ - destination provider and role
142
+ - transfer purpose
143
+ - source artifact refs
144
+ - byte counts and hashes
145
+ - risk class
146
+ - exact approval phrase
147
+ - bounded preview of the artifact content
148
+
149
+ Approval should be based on that manifest. Manual approval is the default. When `externalTransfers` is set to `auto_low_risk`, the runtime can auto-approve bounded transfers whose risk class is not `secret_risk`; it still records the manifest and an approval event before sending. The provider response, transfer manifest, and source artifacts remain separate so a later reviewer can distinguish what was sent from what the provider concluded.
150
+
151
+ ## Public Repo Boundary
152
+
153
+ The public repo should not contain:
154
+
155
+ - real provider credentials
156
+ - personal browser profiles
157
+ - private prompts
158
+ - run logs from private repos
159
+ - hardcoded local paths as defaults
160
+
161
+ Fixtures should be synthetic.