adam-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of adam-cli might be problematic. Click here for more details.

Files changed (132) hide show
  1. adam_cli-0.1.0/.claude/settings.local.json +74 -0
  2. adam_cli-0.1.0/.gitignore +12 -0
  3. adam_cli-0.1.0/CLAUDE.md +463 -0
  4. adam_cli-0.1.0/PKG-INFO +286 -0
  5. adam_cli-0.1.0/README.md +258 -0
  6. adam_cli-0.1.0/alembic.ini +45 -0
  7. adam_cli-0.1.0/docker-compose.yml +32 -0
  8. adam_cli-0.1.0/examples/balagan-tower-defense/README.md +60 -0
  9. adam_cli-0.1.0/examples/balagan-tower-defense/spec.md +248 -0
  10. adam_cli-0.1.0/examples/balagan-tower-defense/tech-stack.md +50 -0
  11. adam_cli-0.1.0/pyproject.toml +67 -0
  12. adam_cli-0.1.0/src/adam/__init__.py +3 -0
  13. adam_cli-0.1.0/src/adam/__main__.py +5 -0
  14. adam_cli-0.1.0/src/adam/agents/__init__.py +20 -0
  15. adam_cli-0.1.0/src/adam/agents/architect.py +50 -0
  16. adam_cli-0.1.0/src/adam/agents/base.py +326 -0
  17. adam_cli-0.1.0/src/adam/agents/diagnostician.py +48 -0
  18. adam_cli-0.1.0/src/adam/agents/file_implementer.py +62 -0
  19. adam_cli-0.1.0/src/adam/agents/integration_auditor.py +61 -0
  20. adam_cli-0.1.0/src/adam/agents/module_planner.py +62 -0
  21. adam_cli-0.1.0/src/adam/agents/repair_agent.py +62 -0
  22. adam_cli-0.1.0/src/adam/agents/route_discoverer.py +165 -0
  23. adam_cli-0.1.0/src/adam/agents/scaffolder.py +104 -0
  24. adam_cli-0.1.0/src/adam/agents/test_writer.py +63 -0
  25. adam_cli-0.1.0/src/adam/cli/__init__.py +1 -0
  26. adam_cli-0.1.0/src/adam/cli/app.py +391 -0
  27. adam_cli-0.1.0/src/adam/cli/bootstrap.py +216 -0
  28. adam_cli-0.1.0/src/adam/cli/checkpoints.py +110 -0
  29. adam_cli-0.1.0/src/adam/cli/display.py +306 -0
  30. adam_cli-0.1.0/src/adam/config.py +69 -0
  31. adam_cli-0.1.0/src/adam/context/__init__.py +19 -0
  32. adam_cli-0.1.0/src/adam/context/condenser.py +97 -0
  33. adam_cli-0.1.0/src/adam/context/loader.py +340 -0
  34. adam_cli-0.1.0/src/adam/db/__init__.py +0 -0
  35. adam_cli-0.1.0/src/adam/db/migrations/__init__.py +0 -0
  36. adam_cli-0.1.0/src/adam/db/migrations/env.py +57 -0
  37. adam_cli-0.1.0/src/adam/db/migrations/script.py.mako +26 -0
  38. adam_cli-0.1.0/src/adam/db/migrations/versions/001_initial_schema.py +341 -0
  39. adam_cli-0.1.0/src/adam/db/migrations/versions/__init__.py +0 -0
  40. adam_cli-0.1.0/src/adam/db/session.py +73 -0
  41. adam_cli-0.1.0/src/adam/errors.py +31 -0
  42. adam_cli-0.1.0/src/adam/execution/__init__.py +13 -0
  43. adam_cli-0.1.0/src/adam/execution/dependencies.py +202 -0
  44. adam_cli-0.1.0/src/adam/execution/dev_server.py +333 -0
  45. adam_cli-0.1.0/src/adam/execution/runner.py +166 -0
  46. adam_cli-0.1.0/src/adam/git/__init__.py +5 -0
  47. adam_cli-0.1.0/src/adam/git/manager.py +183 -0
  48. adam_cli-0.1.0/src/adam/inspection/__init__.py +20 -0
  49. adam_cli-0.1.0/src/adam/inspection/api_smoke.py +256 -0
  50. adam_cli-0.1.0/src/adam/inspection/cli_verify.py +306 -0
  51. adam_cli-0.1.0/src/adam/inspection/evaluator.py +192 -0
  52. adam_cli-0.1.0/src/adam/inspection/screenshotter.py +167 -0
  53. adam_cli-0.1.0/src/adam/llm/__init__.py +5 -0
  54. adam_cli-0.1.0/src/adam/llm/client.py +255 -0
  55. adam_cli-0.1.0/src/adam/llm/json_extract.py +129 -0
  56. adam_cli-0.1.0/src/adam/logging_config.py +69 -0
  57. adam_cli-0.1.0/src/adam/models/__init__.py +26 -0
  58. adam_cli-0.1.0/src/adam/models/analytics.py +86 -0
  59. adam_cli-0.1.0/src/adam/models/base.py +33 -0
  60. adam_cli-0.1.0/src/adam/models/core.py +93 -0
  61. adam_cli-0.1.0/src/adam/models/events.py +31 -0
  62. adam_cli-0.1.0/src/adam/models/obligations.py +29 -0
  63. adam_cli-0.1.0/src/adam/models/testing.py +32 -0
  64. adam_cli-0.1.0/src/adam/orchestrator/__init__.py +20 -0
  65. adam_cli-0.1.0/src/adam/orchestrator/checkpoint.py +103 -0
  66. adam_cli-0.1.0/src/adam/orchestrator/engine.py +1169 -0
  67. adam_cli-0.1.0/src/adam/orchestrator/file_loop.py +415 -0
  68. adam_cli-0.1.0/src/adam/orchestrator/obligations.py +185 -0
  69. adam_cli-0.1.0/src/adam/orchestrator/planner.py +419 -0
  70. adam_cli-0.1.0/src/adam/orchestrator/policies.py +20 -0
  71. adam_cli-0.1.0/src/adam/orchestrator/stop_conditions.py +133 -0
  72. adam_cli-0.1.0/src/adam/profiles.py +100 -0
  73. adam_cli-0.1.0/src/adam/project.py +67 -0
  74. adam_cli-0.1.0/src/adam/prompts/__init__.py +5 -0
  75. adam_cli-0.1.0/src/adam/prompts/loader.py +41 -0
  76. adam_cli-0.1.0/src/adam/prompts/templates/architect.j2 +55 -0
  77. adam_cli-0.1.0/src/adam/prompts/templates/code_quality.j2 +46 -0
  78. adam_cli-0.1.0/src/adam/prompts/templates/diagnostician.j2 +42 -0
  79. adam_cli-0.1.0/src/adam/prompts/templates/file_implementer.j2 +87 -0
  80. adam_cli-0.1.0/src/adam/prompts/templates/integration_auditor.j2 +51 -0
  81. adam_cli-0.1.0/src/adam/prompts/templates/module_planner.j2 +63 -0
  82. adam_cli-0.1.0/src/adam/prompts/templates/performance.j2 +49 -0
  83. adam_cli-0.1.0/src/adam/prompts/templates/repair_agent.j2 +56 -0
  84. adam_cli-0.1.0/src/adam/prompts/templates/route_discoverer.j2 +37 -0
  85. adam_cli-0.1.0/src/adam/prompts/templates/security.j2 +50 -0
  86. adam_cli-0.1.0/src/adam/prompts/templates/test_writer.j2 +45 -0
  87. adam_cli-0.1.0/src/adam/repair/__init__.py +5 -0
  88. adam_cli-0.1.0/src/adam/repair/planner.py +105 -0
  89. adam_cli-0.1.0/src/adam/store/__init__.py +7 -0
  90. adam_cli-0.1.0/src/adam/store/events.py +76 -0
  91. adam_cli-0.1.0/src/adam/store/slicer.py +193 -0
  92. adam_cli-0.1.0/src/adam/store/store.py +297 -0
  93. adam_cli-0.1.0/src/adam/types.py +253 -0
  94. adam_cli-0.1.0/src/adam/validation/__init__.py +13 -0
  95. adam_cli-0.1.0/src/adam/validation/base.py +139 -0
  96. adam_cli-0.1.0/src/adam/validation/file_classifier.py +120 -0
  97. adam_cli-0.1.0/src/adam/validation/hard/__init__.py +1 -0
  98. adam_cli-0.1.0/src/adam/validation/hard/build_checker.py +41 -0
  99. adam_cli-0.1.0/src/adam/validation/hard/lint_runner.py +40 -0
  100. adam_cli-0.1.0/src/adam/validation/hard/test_runner.py +41 -0
  101. adam_cli-0.1.0/src/adam/validation/hard/type_checker.py +40 -0
  102. adam_cli-0.1.0/src/adam/validation/soft/__init__.py +1 -0
  103. adam_cli-0.1.0/src/adam/validation/soft/code_quality.py +71 -0
  104. adam_cli-0.1.0/src/adam/validation/soft/performance.py +69 -0
  105. adam_cli-0.1.0/src/adam/validation/soft/security.py +69 -0
  106. adam_cli-0.1.0/tests/__init__.py +0 -0
  107. adam_cli-0.1.0/tests/test_bootstrap.py +127 -0
  108. adam_cli-0.1.0/tests/test_checkpoints.py +24 -0
  109. adam_cli-0.1.0/tests/test_context_loader.py +111 -0
  110. adam_cli-0.1.0/tests/test_dependencies.py +93 -0
  111. adam_cli-0.1.0/tests/test_dev_server.py +53 -0
  112. adam_cli-0.1.0/tests/test_display.py +40 -0
  113. adam_cli-0.1.0/tests/test_e2e.py +485 -0
  114. adam_cli-0.1.0/tests/test_execution.py +79 -0
  115. adam_cli-0.1.0/tests/test_file_classifier.py +95 -0
  116. adam_cli-0.1.0/tests/test_file_loop_extras.py +51 -0
  117. adam_cli-0.1.0/tests/test_git_manager.py +113 -0
  118. adam_cli-0.1.0/tests/test_inspection.py +85 -0
  119. adam_cli-0.1.0/tests/test_integration.py +351 -0
  120. adam_cli-0.1.0/tests/test_json_extract.py +74 -0
  121. adam_cli-0.1.0/tests/test_llm_client.py +71 -0
  122. adam_cli-0.1.0/tests/test_obligations.py +41 -0
  123. adam_cli-0.1.0/tests/test_observation.py +265 -0
  124. adam_cli-0.1.0/tests/test_profiles.py +46 -0
  125. adam_cli-0.1.0/tests/test_project.py +38 -0
  126. adam_cli-0.1.0/tests/test_prompts.py +67 -0
  127. adam_cli-0.1.0/tests/test_repair_planner.py +91 -0
  128. adam_cli-0.1.0/tests/test_stop_conditions.py +146 -0
  129. adam_cli-0.1.0/tests/test_types.py +91 -0
  130. adam_cli-0.1.0/tests/test_validation.py +92 -0
  131. adam_cli-0.1.0/tests/test_visual_pipeline.py +178 -0
  132. adam_cli-0.1.0/uv.lock +1100 -0
@@ -0,0 +1,74 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(for dir:*)",
5
+ "Bash(do touch:*)",
6
+ "Bash(done)",
7
+ "Bash(uv sync:*)",
8
+ "Bash(uv run:*)",
9
+ "Bash(ALEMBIC_CONFIG=alembic.ini uv run alembic revision --autogenerate -m \"initial_schema\" --sql)",
10
+ "Bash(find /Users/avramscore/Projects/adam/src/adam -name \"*.py\" -not -path \"*__pycache__*\" -exec cat {} +)",
11
+ "Bash(find /Users/avramscore/Projects/adam/tests -name \"*.py\" -not -path \"*__pycache__*\" -exec cat {} +)",
12
+ "Bash(find /Users/avramscore/Projects/adam/src/adam -name \"*.j2\" -exec cat {} +)",
13
+ "Bash(grep:*)",
14
+ "Bash(python3:*)",
15
+ "WebSearch",
16
+ "WebFetch(domain:kenney.nl)",
17
+ "WebFetch(domain:opengameart.org)",
18
+ "Bash(mkdir -p /tmp/kenney-td)",
19
+ "Bash(curl -L -o /tmp/kenney-td/pack.zip \"https://kenney.nl/media/pages/assets/tower-defense-top-down/07c78e15e4-1716898025/kenney_tower-defense-top-down.zip\")",
20
+ "Read(//private/tmp/kenney-td/**)",
21
+ "Bash(mkdir -p /Users/avramscore/Projects/balagan/context/assets)",
22
+ "Bash(cp /tmp/balagan-spec/spec.md /Users/avramscore/Projects/balagan/context/spec.md)",
23
+ "Bash(cp /tmp/balagan-spec/tech-stack.md /Users/avramscore/Projects/balagan/context/tech-stack.md)",
24
+ "Bash(cp \"/tmp/kenney-td/PNG/Default size/\"*.png /Users/avramscore/Projects/balagan/context/assets/)",
25
+ "Read(//Users/avramscore/Projects/balagan/context/assets/**)",
26
+ "Bash(find \"/tmp/kenney-td/PNG/Default size\" -name \"*.png\" -exec cp {} /Users/avramscore/Projects/balagan/context/assets/)",
27
+ "Bash(for f:*)",
28
+ "Bash(do sed:*)",
29
+ "WebFetch(domain:docs.anthropic.com)",
30
+ "Bash(cd:*)",
31
+ "Bash(do)",
32
+ "Bash(sed -i '' 's/notes: str = \"\"\"\"/notes: Any = \"\"\"\"/' \"$f\")",
33
+ "Bash(cp /Users/avramscore/Projects/postwriter/site/package.json /Users/avramscore/Projects/adam-site/)",
34
+ "Bash(cp /Users/avramscore/Projects/postwriter/site/tsconfig.json /Users/avramscore/Projects/adam-site/)",
35
+ "Bash(cp /Users/avramscore/Projects/postwriter/site/tsconfig.app.json /Users/avramscore/Projects/adam-site/)",
36
+ "Bash(cp /Users/avramscore/Projects/postwriter/site/tsconfig.node.json /Users/avramscore/Projects/adam-site/)",
37
+ "Bash(cp /Users/avramscore/Projects/postwriter/site/vite.config.ts /Users/avramscore/Projects/adam-site/)",
38
+ "Bash(npm install:*)",
39
+ "Bash(npm run:*)",
40
+ "Bash(source ~/.nvm/nvm.sh)",
41
+ "Bash(nvm use:*)",
42
+ "Bash(fnm use:*)",
43
+ "Bash(open:*)",
44
+ "Bash(lsof -ti:4173,4174,5174,5175)",
45
+ "Bash(xargs kill:*)",
46
+ "Bash(git add:*)",
47
+ "Bash(git commit:*)",
48
+ "Bash(git push:*)",
49
+ "Bash(ls:*)",
50
+ "Read(//tmp/**)",
51
+ "Read(//Users/avramscore/.ssh/**)",
52
+ "Bash(ssh:*)",
53
+ "Bash(chmod:*)",
54
+ "Bash(rsync -avz --delete dist/ root@204.168.219.211:/var/www/meetadam/)",
55
+ "Bash(dig +short meetadam.app A)",
56
+ "Bash(nslookup meetadam.app)",
57
+ "Bash(curl -sI https://meetadam.app)",
58
+ "Bash(curl -svI https://meetadam.app)",
59
+ "Bash(curl -sI http://meetadam.app)",
60
+ "Bash(find /Users/avramscore/Projects/balagan/tests -name *.ts)",
61
+ "Bash(xargs cat:*)",
62
+ "Bash(uv pip:*)",
63
+ "WebFetch(domain:mysecond.app)",
64
+ "Bash(npx tsc:*)",
65
+ "Bash(git reset:*)",
66
+ "Bash(git rm:*)",
67
+ "Bash(git remote:*)",
68
+ "Bash(rsync -avz --delete dist/ root@204.168.219.211:/var/www/meetadam/play/)",
69
+ "Bash(curl -sI https://meetadam.app/play)",
70
+ "Bash(pip index:*)",
71
+ "Bash(uv build:*)"
72
+ ]
73
+ }
74
+ }
@@ -0,0 +1,12 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .venv/
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .adam/
8
+ .adam-screenshots/
9
+ *.db
10
+ .pytest_cache/
11
+ .ruff_cache/
12
+ .mypy_cache/
@@ -0,0 +1,463 @@
1
+ # CLAUDE.md
2
+
3
+ ## Project: Adam — Orchestrated Long-Form Software Engineering
4
+
5
+ This document specifies a system for generating, testing, inspecting, and iterating complete software projects using a hierarchy of specialised agents, explicit project state, multi-pass critique, and repair loops. The architecture is derived from the Postwriter novel generation system, adapted for code. The core insight is the same: one-pass generation is inadequate for serious engineering, and the solution is an orchestrated pipeline of narrow agents operating in act→observe→verify→repair cycles.
6
+
7
+ The target is not code completion or snippet generation. The target is a system that autonomously builds entire software projects — from architecture through implementation through testing through visual inspection — and does not stop until critic agents have high confidence the project meets its specification.
8
+
9
+ ---
10
+
11
+ ## 1. Cognitive Model
12
+
13
+ The system models software engineering as a layered cognitive process, analogous to how the human brain uses different subsystems for different tasks:
14
+
15
+ ### Layer 1: Pattern Recognition (Haiku tier)
16
+ Fast, cheap, mechanical checks. Syntax validation, linting, type checking, import resolution, file existence verification. These are the "looks like rain" judgements — no deep reasoning required.
17
+
18
+ ### Layer 2: Trained Intuition (Sonnet tier)
19
+ Implementation decisions that an experienced engineer makes without deep deliberation. Choosing data structures, naming conventions, file organisation, API design patterns, error handling strategies. The workhorse layer — most code is written here.
20
+
21
+ ### Layer 3: Architectural Reasoning (Opus tier)
22
+ High-level design decisions requiring broad context and deep reasoning. System architecture, technology selection, interface design, dependency management, performance strategy, security model. Used sparingly but critically — at project inception and at major structural decision points.
23
+
24
+ ### Layer 4: Verification and Critique (mixed tiers)
25
+ Observation of the system's own output. Running tests, reading error messages, viewing rendered UI, checking accessibility, measuring performance, evaluating code quality. This is the feedback loop that makes the system self-correcting.
26
+
27
+ ---
28
+
29
+ ## 2. Objective
30
+
31
+ Build an orchestration framework for software engineering that:
32
+
33
+ - plans at multiple scales (architecture → modules → files → functions)
34
+ - implements at the file level with awareness of the full project
35
+ - maintains explicit canonical project state outside the code
36
+ - tests continuously and automatically
37
+ - visually inspects rendered output using vision-capable models
38
+ - audits code quality, security, accessibility, and performance
39
+ - performs constrained repair loops when tests fail or critics flag issues
40
+ - supports iterative refinement based on human feedback
41
+ - knows when to stop — when critic confidence is high and tests pass
42
+
43
+ The system should optimise for:
44
+
45
+ - correctness (tests pass, no runtime errors)
46
+ - completeness (all specified features implemented)
47
+ - code quality (readable, maintainable, idiomatic)
48
+ - visual fidelity (UI matches specification or reasonable defaults)
49
+ - security (no obvious vulnerabilities)
50
+ - performance (no egregious inefficiencies)
51
+
52
+ ---
53
+
54
+ ## 3. Non-goals
55
+
56
+ This system is not a code autocompleter.
57
+ This system is not a chatbot that answers programming questions.
58
+ This system is not a linter or static analysis tool.
59
+ This system is not limited to a single language or framework.
60
+
61
+ ---
62
+
63
+ ## 4. Core Design Principle
64
+
65
+ Treat the project as four linked representations:
66
+
67
+ 1. **Code layer** — the source files themselves
68
+ 2. **Project-state layer** — what has been implemented, what remains, what depends on what, what has been tested, what has been visually verified
69
+ 3. **Quality layer** — linting results, type checking results, test results, critic scores, security audit results
70
+ 4. **Specification layer** — what was asked for, acceptance criteria, constraints, user preferences
71
+
72
+ No important reasoning should depend on code alone if it can instead depend on structured state.
73
+
74
+ ---
75
+
76
+ ## 5. Operating Model
77
+
78
+ The system works hierarchically:
79
+
80
+ - project specification
81
+ - architecture and technology decisions
82
+ - module decomposition
83
+ - file-level implementation
84
+ - function-level implementation
85
+ - test writing and execution
86
+ - visual inspection (for UI projects)
87
+ - quality audit
88
+ - repair of failures
89
+
90
+ Each level has:
91
+ - its own representation
92
+ - its own goals
93
+ - its own validator types
94
+ - its own repair loop
95
+ - explicit dependency links upward and downward
96
+
97
+ A file-level implementer should not be forced to infer the entire architecture.
98
+ A quality auditor should not flatten local implementation decisions.
99
+
100
+ ---
101
+
102
+ ## 6. Architecture Overview
103
+
104
+ ### 6.1 Primary Subsystems
105
+
106
+ - **Orchestrator** — task decomposition, ordering, state coordination, stop conditions
107
+ - **Project Store** — canonical state: what exists, what works, what's pending
108
+ - **Planning Layer** — architect, module planner, file planner, dependency resolver
109
+ - **Implementation Layer** — file writer, function writer, test writer
110
+ - **Execution Layer** — test runner, build runner, dev server launcher
111
+ - **Observation Layer** — output reader, screenshot taker, vision analyser
112
+ - **Validation Layer** — hard validators (tests pass, types check, lints clean) and soft critics (code quality, security, performance, accessibility, visual fidelity)
113
+ - **Repair Layer** — error diagnosis, targeted fix generation, regression prevention
114
+ - **Context Loader** — reads spec files from the project directory
115
+
116
+ ### 6.2 Execution Phases
117
+
118
+ 1. Project bootstrap (read specs, ask user questions)
119
+ 2. Architecture design (Opus)
120
+ 3. Module and file planning (Sonnet)
121
+ 4. Iterative implementation loop:
122
+ a. Implement next file/module
123
+ b. Run tests
124
+ c. If tests fail → diagnose → repair → re-test (up to N rounds)
125
+ d. Run soft critics
126
+ e. If critics flag issues → repair → re-validate
127
+ f. Visual inspection (if UI project)
128
+ g. Mark module complete
129
+ 5. Integration testing
130
+ 6. Full quality audit
131
+ 7. Visual audit (screenshot every page/state, evaluate with vision)
132
+ 8. Final repair pass
133
+ 9. Declare done — only when all tests pass AND all critics score above threshold
134
+
135
+ ---
136
+
137
+ ## 7. Canonical Data Model
138
+
139
+ ### 7.1 Project Model
140
+
141
+ - title
142
+ - description
143
+ - specification (from context files + user answers)
144
+ - technology stack
145
+ - architecture decisions
146
+ - status (planning, implementing, testing, auditing, complete)
147
+
148
+ ### 7.2 Module Model
149
+
150
+ - name
151
+ - purpose
152
+ - dependencies (other modules)
153
+ - files
154
+ - status (pending, implementing, tested, complete)
155
+ - test coverage
156
+
157
+ ### 7.3 File Model
158
+
159
+ - path
160
+ - purpose
161
+ - language
162
+ - dependencies (other files, external packages)
163
+ - status (pending, written, tested, reviewed)
164
+ - quality scores
165
+
166
+ ### 7.4 Test Model
167
+
168
+ - path
169
+ - type (unit, integration, e2e, visual)
170
+ - target files/modules
171
+ - status (pending, passing, failing)
172
+ - last run output
173
+ - failure diagnosis
174
+
175
+ ### 7.5 Task Model (Obligation Ledger)
176
+
177
+ Similar to Postwriter's promise model. Tracks:
178
+ - what was specified
179
+ - what has been implemented
180
+ - what has been tested
181
+ - what remains
182
+ - what is blocked and by what
183
+
184
+ ---
185
+
186
+ ## 8. Agent Roles
187
+
188
+ ### 8.1 Architect (Opus)
189
+ - Reads specification
190
+ - Chooses technology stack
191
+ - Designs module structure
192
+ - Defines interfaces between modules
193
+ - Identifies critical path
194
+ - Makes build/deploy decisions
195
+
196
+ ### 8.2 Module Planner (Sonnet)
197
+ - Breaks modules into files
198
+ - Defines file purposes and interfaces
199
+ - Orders implementation by dependency
200
+ - Identifies what needs tests
201
+
202
+ ### 8.3 File Implementer (Sonnet)
203
+ - Writes a single file from its specification + context
204
+ - Has access to: file spec, module spec, interfaces of dependencies, project conventions
205
+ - Does NOT see the entire codebase — only what's relevant (context slicing)
206
+
207
+ ### 8.4 Test Writer (Sonnet)
208
+ - Writes tests for implemented files
209
+ - Has access to: the implementation, the spec, the module interfaces
210
+ - Writes unit tests, integration tests, and (for UI) visual test specifications
211
+
212
+ ### 8.5 Test Runner (Haiku + shell execution)
213
+ - Executes test suites
214
+ - Parses output
215
+ - Classifies failures (syntax error, logic error, missing dependency, flaky test)
216
+ - Reports structured results
217
+
218
+ ### 8.6 Visual Inspector (Opus with vision)
219
+ - Takes screenshots of rendered UI (via headless browser)
220
+ - Evaluates visual output against specification
221
+ - Flags: layout issues, missing elements, broken styling, accessibility problems
222
+ - Provides structured feedback for repair
223
+
224
+ ### 8.7 Code Quality Critic (Sonnet)
225
+ - Reviews code for: readability, maintainability, idiomaticity
226
+ - Checks for: dead code, unnecessary complexity, poor naming, missing error handling
227
+ - Scores and provides specific repair suggestions
228
+
229
+ ### 8.8 Security Critic (Sonnet)
230
+ - Reviews for: injection vulnerabilities, exposed secrets, insecure defaults, missing input validation
231
+ - Checks dependencies for known vulnerabilities
232
+
233
+ ### 8.9 Performance Critic (Sonnet)
234
+ - Reviews for: N+1 queries, unnecessary re-renders, missing indexes, unbounded loops, memory leaks
235
+ - Provides specific suggestions
236
+
237
+ ### 8.10 Error Diagnostician (Sonnet)
238
+ - Receives: test failure output, stack trace, relevant source code
239
+ - Produces: diagnosis (root cause), proposed fix (specific code change), confidence level
240
+
241
+ ### 8.11 Repair Agent (Sonnet)
242
+ - Receives: diagnosis + proposed fix + preserve constraints
243
+ - Applies the minimum change needed
244
+ - Does not refactor beyond the fix
245
+ - Does not introduce new features
246
+
247
+ ### 8.12 Integration Auditor (Opus)
248
+ - Runs after all modules are individually complete
249
+ - Tests cross-module interactions
250
+ - Identifies integration issues
251
+ - Proposes structural fixes if needed
252
+
253
+ ---
254
+
255
+ ## 9. The Implementation Loop
256
+
257
+ This is the core cycle. For each file or module:
258
+
259
+ ```
260
+ implement(file_spec, context):
261
+ code = file_implementer.write(file_spec, context)
262
+ save(code)
263
+
264
+ for round in range(max_rounds):
265
+ test_results = test_runner.run(relevant_tests)
266
+
267
+ if test_results.all_pass:
268
+ critics = run_critics(code)
269
+ if critics.all_above_threshold:
270
+ return ACCEPT
271
+ else:
272
+ diagnosis = identify_weakest_dimension(critics)
273
+ code = repair_agent.fix(code, diagnosis)
274
+ save(code)
275
+ continue
276
+
277
+ diagnosis = error_diagnostician.diagnose(test_results)
278
+ code = repair_agent.fix(code, diagnosis)
279
+ save(code)
280
+
281
+ return ACCEPT_WITH_WARNINGS # best effort after max rounds
282
+ ```
283
+
284
+ For UI projects, add after critic pass:
285
+ ```
286
+ screenshot = take_screenshot(relevant_pages)
287
+ visual_eval = visual_inspector.evaluate(screenshot, spec)
288
+ if visual_eval.issues:
289
+ code = repair_agent.fix(code, visual_eval)
290
+ save(code)
291
+ ```
292
+
293
+ ---
294
+
295
+ ## 10. Context Slicing
296
+
297
+ Each agent receives only what it needs:
298
+
299
+ - **Architect**: full spec, technology constraints, user preferences
300
+ - **File implementer**: file spec, module interface, dependency interfaces, project conventions, 2-3 related files for style reference
301
+ - **Test writer**: the implementation, the spec, the module interface
302
+ - **Repair agent**: the failing code, the error, the diagnosis, preserve constraints
303
+ - **Visual inspector**: the screenshot, the spec for that page/component
304
+
305
+ Token budget awareness: trim oldest/least-relevant context when approaching limits.
306
+
307
+ ---
308
+
309
+ ## 11. Context Files
310
+
311
+ Users can place files in a `context/` directory:
312
+
313
+ - `spec.md` — project specification
314
+ - `architecture.md` — architectural preferences or constraints
315
+ - `style.md` — coding style preferences
316
+ - `tech-stack.md` — technology requirements
317
+ - `reference/` — example code, API documentation, design mockups
318
+ - `*.png`, `*.jpg` — UI mockups (processed by vision)
319
+
320
+ All optional. If present, they inform the planning agents and reduce the number of bootstrap questions.
321
+
322
+ ---
323
+
324
+ ## 12. Stop Conditions
325
+
326
+ The system declares the project complete when ALL of:
327
+
328
+ 1. All specified features have corresponding implementations
329
+ 2. All tests pass
330
+ 3. No hard validator failures (lint, types, build)
331
+ 4. All soft critic scores above threshold
332
+ 5. Visual inspection passes (if UI project)
333
+ 6. The obligation ledger has no unresolved items
334
+
335
+ If any condition cannot be met after max repair rounds, the system reports what remains unresolved and asks for human guidance.
336
+
337
+ ---
338
+
339
+ ## 13. Validation Model
340
+
341
+ ### 13.1 Hard Validators (pass/fail, block acceptance)
342
+
343
+ - Tests pass
344
+ - TypeScript/mypy/equivalent type check passes
345
+ - Linter passes (or only warnings, no errors)
346
+ - Build succeeds
347
+ - No import errors
348
+ - No runtime crashes on startup
349
+
350
+ ### 13.2 Soft Critics (scored, influence repair priority)
351
+
352
+ - Code readability (0-1)
353
+ - Maintainability (0-1)
354
+ - Idiomaticity (0-1)
355
+ - Security (0-1)
356
+ - Performance (0-1)
357
+ - Accessibility (0-1, for UI projects)
358
+ - Visual fidelity (0-1, for UI projects)
359
+ - Test coverage adequacy (0-1)
360
+ - Error handling completeness (0-1)
361
+
362
+ ---
363
+
364
+ ## 14. Visual Inspection
365
+
366
+ For projects with UI:
367
+
368
+ 1. Launch a headless browser (Playwright)
369
+ 2. Navigate to each page/state defined in the spec
370
+ 3. Take screenshots
371
+ 4. Send screenshots to Opus with vision
372
+ 5. Opus evaluates: layout correctness, visual completeness, responsiveness, obvious bugs
373
+ 6. Structured feedback fed back to repair agent
374
+
375
+ This is the "viewing the result" that distinguishes Adam from blind code generation.
376
+
377
+ ---
378
+
379
+ ## 15. Model Tiering
380
+
381
+ | Tier | Role | Used for |
382
+ |------|------|----------|
383
+ | Opus | Architectural reasoning, visual inspection | Project design, major decision points, screenshot evaluation |
384
+ | Sonnet | Implementation, critique, repair | File writing, test writing, all critics, error diagnosis, fixes |
385
+ | Haiku | Mechanical validation | Parsing test output, linting, type checking, file existence, dependency resolution |
386
+
387
+ ---
388
+
389
+ ## 16. Repair Philosophy
390
+
391
+ Identical to Postwriter:
392
+
393
+ - Narrow: fix only what's broken
394
+ - Ordered: highest-priority issues first
395
+ - Traceable: every fix linked to a diagnosis
396
+ - Reversible: git commits after each accepted change
397
+ - Minimally destructive: don't refactor what works
398
+
399
+ ---
400
+
401
+ ## 17. Failure Modes to Guard Against
402
+
403
+ - **Over-engineering**: building abstractions before they're needed
404
+ - **Repair flattening**: too many fix rounds making code worse
405
+ - **Test gaming**: writing code to pass tests rather than meet the spec
406
+ - **Critic monoculture**: all critics converging on the same bland style
407
+ - **Context drift**: losing track of the project's purpose in the details
408
+ - **Dependency hell**: pulling in packages to solve problems that don't exist
409
+
410
+ ---
411
+
412
+ ## 18. Key Differences from Postwriter
413
+
414
+ | Aspect | Postwriter | Adam |
415
+ |--------|-----------|------|
416
+ | Output | Prose | Code |
417
+ | Verification | Critics score text | Tests pass or fail |
418
+ | Observation | Soft quality metrics | Hard test results + visual inspection |
419
+ | Branching | Multiple rhetorical strategies | Possibly multiple implementation strategies for critical components |
420
+ | Canon | Character states, promises | Module states, obligation ledger |
421
+ | Repair trigger | Low scores, hard validation failure | Test failure, critic flags, visual bugs |
422
+ | Stop condition | Composite score threshold | All tests pass + all critics satisfied |
423
+
424
+ ---
425
+
426
+ ## 19. Implementation Principles
427
+
428
+ - Tests are first-class citizens, not afterthoughts
429
+ - Every file gets committed to git after acceptance
430
+ - Visual inspection is not optional for UI projects
431
+ - The system should be honest about what it cannot verify
432
+ - Human review is available at any point but should rarely be needed
433
+ - The obligation ledger is the source of truth for completeness
434
+ - Context slicing is critical — agents must not see the whole project when they don't need to
435
+ - Repair rounds have hard limits to prevent infinite loops
436
+
437
+ ---
438
+
439
+ ## 20. Operating Assumptions
440
+
441
+ - One-pass code generation is unreliable for projects beyond a few files
442
+ - Tests catch bugs that critics miss; critics catch quality issues that tests miss
443
+ - Visual inspection catches UI bugs that no amount of code review will find
444
+ - The repair loop converges for most issues within 3-5 rounds
445
+ - Explicit project state prevents the drift that kills long-horizon generation
446
+ - Human engineering judgement remains necessary for taste, priority, and ambiguous requirements
447
+
448
+ ---
449
+
450
+ ## 21. CLI Behaviour
451
+
452
+ Running `adam` in any directory should:
453
+
454
+ 1. Check for a `.adam` project state file
455
+ 2. If none exists: start a new project
456
+ - Load context files from `context/` if present
457
+ - Ask the user questions about the project (skipping what context files answer)
458
+ - Design architecture
459
+ - Implement iteratively
460
+ 3. If a project exists and is in progress: offer to resume
461
+ 4. If a project exists and is complete: offer to revise, extend, or start new
462
+
463
+ The system should produce clear, real-time progress output showing what it's doing, what's passing, what's failing, and how far along it is.