adam-cli 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of adam-cli might be problematic. Click here for more details.
- adam_cli-0.1.0/.claude/settings.local.json +74 -0
- adam_cli-0.1.0/.gitignore +12 -0
- adam_cli-0.1.0/CLAUDE.md +463 -0
- adam_cli-0.1.0/PKG-INFO +286 -0
- adam_cli-0.1.0/README.md +258 -0
- adam_cli-0.1.0/alembic.ini +45 -0
- adam_cli-0.1.0/docker-compose.yml +32 -0
- adam_cli-0.1.0/examples/balagan-tower-defense/README.md +60 -0
- adam_cli-0.1.0/examples/balagan-tower-defense/spec.md +248 -0
- adam_cli-0.1.0/examples/balagan-tower-defense/tech-stack.md +50 -0
- adam_cli-0.1.0/pyproject.toml +67 -0
- adam_cli-0.1.0/src/adam/__init__.py +3 -0
- adam_cli-0.1.0/src/adam/__main__.py +5 -0
- adam_cli-0.1.0/src/adam/agents/__init__.py +20 -0
- adam_cli-0.1.0/src/adam/agents/architect.py +50 -0
- adam_cli-0.1.0/src/adam/agents/base.py +326 -0
- adam_cli-0.1.0/src/adam/agents/diagnostician.py +48 -0
- adam_cli-0.1.0/src/adam/agents/file_implementer.py +62 -0
- adam_cli-0.1.0/src/adam/agents/integration_auditor.py +61 -0
- adam_cli-0.1.0/src/adam/agents/module_planner.py +62 -0
- adam_cli-0.1.0/src/adam/agents/repair_agent.py +62 -0
- adam_cli-0.1.0/src/adam/agents/route_discoverer.py +165 -0
- adam_cli-0.1.0/src/adam/agents/scaffolder.py +104 -0
- adam_cli-0.1.0/src/adam/agents/test_writer.py +63 -0
- adam_cli-0.1.0/src/adam/cli/__init__.py +1 -0
- adam_cli-0.1.0/src/adam/cli/app.py +391 -0
- adam_cli-0.1.0/src/adam/cli/bootstrap.py +216 -0
- adam_cli-0.1.0/src/adam/cli/checkpoints.py +110 -0
- adam_cli-0.1.0/src/adam/cli/display.py +306 -0
- adam_cli-0.1.0/src/adam/config.py +69 -0
- adam_cli-0.1.0/src/adam/context/__init__.py +19 -0
- adam_cli-0.1.0/src/adam/context/condenser.py +97 -0
- adam_cli-0.1.0/src/adam/context/loader.py +340 -0
- adam_cli-0.1.0/src/adam/db/__init__.py +0 -0
- adam_cli-0.1.0/src/adam/db/migrations/__init__.py +0 -0
- adam_cli-0.1.0/src/adam/db/migrations/env.py +57 -0
- adam_cli-0.1.0/src/adam/db/migrations/script.py.mako +26 -0
- adam_cli-0.1.0/src/adam/db/migrations/versions/001_initial_schema.py +341 -0
- adam_cli-0.1.0/src/adam/db/migrations/versions/__init__.py +0 -0
- adam_cli-0.1.0/src/adam/db/session.py +73 -0
- adam_cli-0.1.0/src/adam/errors.py +31 -0
- adam_cli-0.1.0/src/adam/execution/__init__.py +13 -0
- adam_cli-0.1.0/src/adam/execution/dependencies.py +202 -0
- adam_cli-0.1.0/src/adam/execution/dev_server.py +333 -0
- adam_cli-0.1.0/src/adam/execution/runner.py +166 -0
- adam_cli-0.1.0/src/adam/git/__init__.py +5 -0
- adam_cli-0.1.0/src/adam/git/manager.py +183 -0
- adam_cli-0.1.0/src/adam/inspection/__init__.py +20 -0
- adam_cli-0.1.0/src/adam/inspection/api_smoke.py +256 -0
- adam_cli-0.1.0/src/adam/inspection/cli_verify.py +306 -0
- adam_cli-0.1.0/src/adam/inspection/evaluator.py +192 -0
- adam_cli-0.1.0/src/adam/inspection/screenshotter.py +167 -0
- adam_cli-0.1.0/src/adam/llm/__init__.py +5 -0
- adam_cli-0.1.0/src/adam/llm/client.py +255 -0
- adam_cli-0.1.0/src/adam/llm/json_extract.py +129 -0
- adam_cli-0.1.0/src/adam/logging_config.py +69 -0
- adam_cli-0.1.0/src/adam/models/__init__.py +26 -0
- adam_cli-0.1.0/src/adam/models/analytics.py +86 -0
- adam_cli-0.1.0/src/adam/models/base.py +33 -0
- adam_cli-0.1.0/src/adam/models/core.py +93 -0
- adam_cli-0.1.0/src/adam/models/events.py +31 -0
- adam_cli-0.1.0/src/adam/models/obligations.py +29 -0
- adam_cli-0.1.0/src/adam/models/testing.py +32 -0
- adam_cli-0.1.0/src/adam/orchestrator/__init__.py +20 -0
- adam_cli-0.1.0/src/adam/orchestrator/checkpoint.py +103 -0
- adam_cli-0.1.0/src/adam/orchestrator/engine.py +1169 -0
- adam_cli-0.1.0/src/adam/orchestrator/file_loop.py +415 -0
- adam_cli-0.1.0/src/adam/orchestrator/obligations.py +185 -0
- adam_cli-0.1.0/src/adam/orchestrator/planner.py +419 -0
- adam_cli-0.1.0/src/adam/orchestrator/policies.py +20 -0
- adam_cli-0.1.0/src/adam/orchestrator/stop_conditions.py +133 -0
- adam_cli-0.1.0/src/adam/profiles.py +100 -0
- adam_cli-0.1.0/src/adam/project.py +67 -0
- adam_cli-0.1.0/src/adam/prompts/__init__.py +5 -0
- adam_cli-0.1.0/src/adam/prompts/loader.py +41 -0
- adam_cli-0.1.0/src/adam/prompts/templates/architect.j2 +55 -0
- adam_cli-0.1.0/src/adam/prompts/templates/code_quality.j2 +46 -0
- adam_cli-0.1.0/src/adam/prompts/templates/diagnostician.j2 +42 -0
- adam_cli-0.1.0/src/adam/prompts/templates/file_implementer.j2 +87 -0
- adam_cli-0.1.0/src/adam/prompts/templates/integration_auditor.j2 +51 -0
- adam_cli-0.1.0/src/adam/prompts/templates/module_planner.j2 +63 -0
- adam_cli-0.1.0/src/adam/prompts/templates/performance.j2 +49 -0
- adam_cli-0.1.0/src/adam/prompts/templates/repair_agent.j2 +56 -0
- adam_cli-0.1.0/src/adam/prompts/templates/route_discoverer.j2 +37 -0
- adam_cli-0.1.0/src/adam/prompts/templates/security.j2 +50 -0
- adam_cli-0.1.0/src/adam/prompts/templates/test_writer.j2 +45 -0
- adam_cli-0.1.0/src/adam/repair/__init__.py +5 -0
- adam_cli-0.1.0/src/adam/repair/planner.py +105 -0
- adam_cli-0.1.0/src/adam/store/__init__.py +7 -0
- adam_cli-0.1.0/src/adam/store/events.py +76 -0
- adam_cli-0.1.0/src/adam/store/slicer.py +193 -0
- adam_cli-0.1.0/src/adam/store/store.py +297 -0
- adam_cli-0.1.0/src/adam/types.py +253 -0
- adam_cli-0.1.0/src/adam/validation/__init__.py +13 -0
- adam_cli-0.1.0/src/adam/validation/base.py +139 -0
- adam_cli-0.1.0/src/adam/validation/file_classifier.py +120 -0
- adam_cli-0.1.0/src/adam/validation/hard/__init__.py +1 -0
- adam_cli-0.1.0/src/adam/validation/hard/build_checker.py +41 -0
- adam_cli-0.1.0/src/adam/validation/hard/lint_runner.py +40 -0
- adam_cli-0.1.0/src/adam/validation/hard/test_runner.py +41 -0
- adam_cli-0.1.0/src/adam/validation/hard/type_checker.py +40 -0
- adam_cli-0.1.0/src/adam/validation/soft/__init__.py +1 -0
- adam_cli-0.1.0/src/adam/validation/soft/code_quality.py +71 -0
- adam_cli-0.1.0/src/adam/validation/soft/performance.py +69 -0
- adam_cli-0.1.0/src/adam/validation/soft/security.py +69 -0
- adam_cli-0.1.0/tests/__init__.py +0 -0
- adam_cli-0.1.0/tests/test_bootstrap.py +127 -0
- adam_cli-0.1.0/tests/test_checkpoints.py +24 -0
- adam_cli-0.1.0/tests/test_context_loader.py +111 -0
- adam_cli-0.1.0/tests/test_dependencies.py +93 -0
- adam_cli-0.1.0/tests/test_dev_server.py +53 -0
- adam_cli-0.1.0/tests/test_display.py +40 -0
- adam_cli-0.1.0/tests/test_e2e.py +485 -0
- adam_cli-0.1.0/tests/test_execution.py +79 -0
- adam_cli-0.1.0/tests/test_file_classifier.py +95 -0
- adam_cli-0.1.0/tests/test_file_loop_extras.py +51 -0
- adam_cli-0.1.0/tests/test_git_manager.py +113 -0
- adam_cli-0.1.0/tests/test_inspection.py +85 -0
- adam_cli-0.1.0/tests/test_integration.py +351 -0
- adam_cli-0.1.0/tests/test_json_extract.py +74 -0
- adam_cli-0.1.0/tests/test_llm_client.py +71 -0
- adam_cli-0.1.0/tests/test_obligations.py +41 -0
- adam_cli-0.1.0/tests/test_observation.py +265 -0
- adam_cli-0.1.0/tests/test_profiles.py +46 -0
- adam_cli-0.1.0/tests/test_project.py +38 -0
- adam_cli-0.1.0/tests/test_prompts.py +67 -0
- adam_cli-0.1.0/tests/test_repair_planner.py +91 -0
- adam_cli-0.1.0/tests/test_stop_conditions.py +146 -0
- adam_cli-0.1.0/tests/test_types.py +91 -0
- adam_cli-0.1.0/tests/test_validation.py +92 -0
- adam_cli-0.1.0/tests/test_visual_pipeline.py +178 -0
- adam_cli-0.1.0/uv.lock +1100 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(for dir:*)",
|
|
5
|
+
"Bash(do touch:*)",
|
|
6
|
+
"Bash(done)",
|
|
7
|
+
"Bash(uv sync:*)",
|
|
8
|
+
"Bash(uv run:*)",
|
|
9
|
+
"Bash(ALEMBIC_CONFIG=alembic.ini uv run alembic revision --autogenerate -m \"initial_schema\" --sql)",
|
|
10
|
+
"Bash(find /Users/avramscore/Projects/adam/src/adam -name \"*.py\" -not -path \"*__pycache__*\" -exec cat {} +)",
|
|
11
|
+
"Bash(find /Users/avramscore/Projects/adam/tests -name \"*.py\" -not -path \"*__pycache__*\" -exec cat {} +)",
|
|
12
|
+
"Bash(find /Users/avramscore/Projects/adam/src/adam -name \"*.j2\" -exec cat {} +)",
|
|
13
|
+
"Bash(grep:*)",
|
|
14
|
+
"Bash(python3:*)",
|
|
15
|
+
"WebSearch",
|
|
16
|
+
"WebFetch(domain:kenney.nl)",
|
|
17
|
+
"WebFetch(domain:opengameart.org)",
|
|
18
|
+
"Bash(mkdir -p /tmp/kenney-td)",
|
|
19
|
+
"Bash(curl -L -o /tmp/kenney-td/pack.zip \"https://kenney.nl/media/pages/assets/tower-defense-top-down/07c78e15e4-1716898025/kenney_tower-defense-top-down.zip\")",
|
|
20
|
+
"Read(//private/tmp/kenney-td/**)",
|
|
21
|
+
"Bash(mkdir -p /Users/avramscore/Projects/balagan/context/assets)",
|
|
22
|
+
"Bash(cp /tmp/balagan-spec/spec.md /Users/avramscore/Projects/balagan/context/spec.md)",
|
|
23
|
+
"Bash(cp /tmp/balagan-spec/tech-stack.md /Users/avramscore/Projects/balagan/context/tech-stack.md)",
|
|
24
|
+
"Bash(cp \"/tmp/kenney-td/PNG/Default size/\"*.png /Users/avramscore/Projects/balagan/context/assets/)",
|
|
25
|
+
"Read(//Users/avramscore/Projects/balagan/context/assets/**)",
|
|
26
|
+
"Bash(find \"/tmp/kenney-td/PNG/Default size\" -name \"*.png\" -exec cp {} /Users/avramscore/Projects/balagan/context/assets/)",
|
|
27
|
+
"Bash(for f:*)",
|
|
28
|
+
"Bash(do sed:*)",
|
|
29
|
+
"WebFetch(domain:docs.anthropic.com)",
|
|
30
|
+
"Bash(cd:*)",
|
|
31
|
+
"Bash(do)",
|
|
32
|
+
"Bash(sed -i '' 's/notes: str = \"\"\"\"/notes: Any = \"\"\"\"/' \"$f\")",
|
|
33
|
+
"Bash(cp /Users/avramscore/Projects/postwriter/site/package.json /Users/avramscore/Projects/adam-site/)",
|
|
34
|
+
"Bash(cp /Users/avramscore/Projects/postwriter/site/tsconfig.json /Users/avramscore/Projects/adam-site/)",
|
|
35
|
+
"Bash(cp /Users/avramscore/Projects/postwriter/site/tsconfig.app.json /Users/avramscore/Projects/adam-site/)",
|
|
36
|
+
"Bash(cp /Users/avramscore/Projects/postwriter/site/tsconfig.node.json /Users/avramscore/Projects/adam-site/)",
|
|
37
|
+
"Bash(cp /Users/avramscore/Projects/postwriter/site/vite.config.ts /Users/avramscore/Projects/adam-site/)",
|
|
38
|
+
"Bash(npm install:*)",
|
|
39
|
+
"Bash(npm run:*)",
|
|
40
|
+
"Bash(source ~/.nvm/nvm.sh)",
|
|
41
|
+
"Bash(nvm use:*)",
|
|
42
|
+
"Bash(fnm use:*)",
|
|
43
|
+
"Bash(open:*)",
|
|
44
|
+
"Bash(lsof -ti:4173,4174,5174,5175)",
|
|
45
|
+
"Bash(xargs kill:*)",
|
|
46
|
+
"Bash(git add:*)",
|
|
47
|
+
"Bash(git commit:*)",
|
|
48
|
+
"Bash(git push:*)",
|
|
49
|
+
"Bash(ls:*)",
|
|
50
|
+
"Read(//tmp/**)",
|
|
51
|
+
"Read(//Users/avramscore/.ssh/**)",
|
|
52
|
+
"Bash(ssh:*)",
|
|
53
|
+
"Bash(chmod:*)",
|
|
54
|
+
"Bash(rsync -avz --delete dist/ root@204.168.219.211:/var/www/meetadam/)",
|
|
55
|
+
"Bash(dig +short meetadam.app A)",
|
|
56
|
+
"Bash(nslookup meetadam.app)",
|
|
57
|
+
"Bash(curl -sI https://meetadam.app)",
|
|
58
|
+
"Bash(curl -svI https://meetadam.app)",
|
|
59
|
+
"Bash(curl -sI http://meetadam.app)",
|
|
60
|
+
"Bash(find /Users/avramscore/Projects/balagan/tests -name *.ts)",
|
|
61
|
+
"Bash(xargs cat:*)",
|
|
62
|
+
"Bash(uv pip:*)",
|
|
63
|
+
"WebFetch(domain:mysecond.app)",
|
|
64
|
+
"Bash(npx tsc:*)",
|
|
65
|
+
"Bash(git reset:*)",
|
|
66
|
+
"Bash(git rm:*)",
|
|
67
|
+
"Bash(git remote:*)",
|
|
68
|
+
"Bash(rsync -avz --delete dist/ root@204.168.219.211:/var/www/meetadam/play/)",
|
|
69
|
+
"Bash(curl -sI https://meetadam.app/play)",
|
|
70
|
+
"Bash(pip index:*)",
|
|
71
|
+
"Bash(uv build:*)"
|
|
72
|
+
]
|
|
73
|
+
}
|
|
74
|
+
}
|
adam_cli-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
## Project: Adam — Orchestrated Long-Form Software Engineering
|
|
4
|
+
|
|
5
|
+
This document specifies a system for generating, testing, inspecting, and iterating complete software projects using a hierarchy of specialised agents, explicit project state, multi-pass critique, and repair loops. The architecture is derived from the Postwriter novel generation system, adapted for code. The core insight is the same: one-pass generation is inadequate for serious engineering, and the solution is an orchestrated pipeline of narrow agents operating in act→observe→verify→repair cycles.
|
|
6
|
+
|
|
7
|
+
The target is not code completion or snippet generation. The target is a system that autonomously builds entire software projects — from architecture through implementation through testing through visual inspection — and does not stop until critic agents have high confidence the project meets its specification.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## 1. Cognitive Model
|
|
12
|
+
|
|
13
|
+
The system models software engineering as a layered cognitive process, analogous to how the human brain uses different subsystems for different tasks:
|
|
14
|
+
|
|
15
|
+
### Layer 1: Pattern Recognition (Haiku tier)
|
|
16
|
+
Fast, cheap, mechanical checks. Syntax validation, linting, type checking, import resolution, file existence verification. These are the "looks like rain" judgements — no deep reasoning required.
|
|
17
|
+
|
|
18
|
+
### Layer 2: Trained Intuition (Sonnet tier)
|
|
19
|
+
Implementation decisions that an experienced engineer makes without deep deliberation. Choosing data structures, naming conventions, file organisation, API design patterns, error handling strategies. The workhorse layer — most code is written here.
|
|
20
|
+
|
|
21
|
+
### Layer 3: Architectural Reasoning (Opus tier)
|
|
22
|
+
High-level design decisions requiring broad context and deep reasoning. System architecture, technology selection, interface design, dependency management, performance strategy, security model. Used sparingly but critically — at project inception and at major structural decision points.
|
|
23
|
+
|
|
24
|
+
### Layer 4: Verification and Critique (mixed tiers)
|
|
25
|
+
Observation of the system's own output. Running tests, reading error messages, viewing rendered UI, checking accessibility, measuring performance, evaluating code quality. This is the feedback loop that makes the system self-correcting.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## 2. Objective
|
|
30
|
+
|
|
31
|
+
Build an orchestration framework for software engineering that:
|
|
32
|
+
|
|
33
|
+
- plans at multiple scales (architecture → modules → files → functions)
|
|
34
|
+
- implements at the file level with awareness of the full project
|
|
35
|
+
- maintains explicit canonical project state outside the code
|
|
36
|
+
- tests continuously and automatically
|
|
37
|
+
- visually inspects rendered output using vision-capable models
|
|
38
|
+
- audits code quality, security, accessibility, and performance
|
|
39
|
+
- performs constrained repair loops when tests fail or critics flag issues
|
|
40
|
+
- supports iterative refinement based on human feedback
|
|
41
|
+
- knows when to stop — when critic confidence is high and tests pass
|
|
42
|
+
|
|
43
|
+
The system should optimise for:
|
|
44
|
+
|
|
45
|
+
- correctness (tests pass, no runtime errors)
|
|
46
|
+
- completeness (all specified features implemented)
|
|
47
|
+
- code quality (readable, maintainable, idiomatic)
|
|
48
|
+
- visual fidelity (UI matches specification or reasonable defaults)
|
|
49
|
+
- security (no obvious vulnerabilities)
|
|
50
|
+
- performance (no egregious inefficiencies)
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## 3. Non-goals
|
|
55
|
+
|
|
56
|
+
This system is not a code autocompleter.
|
|
57
|
+
This system is not a chatbot that answers programming questions.
|
|
58
|
+
This system is not a linter or static analysis tool.
|
|
59
|
+
This system is not limited to a single language or framework.
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## 4. Core Design Principle
|
|
64
|
+
|
|
65
|
+
Treat the project as four linked representations:
|
|
66
|
+
|
|
67
|
+
1. **Code layer** — the source files themselves
|
|
68
|
+
2. **Project-state layer** — what has been implemented, what remains, what depends on what, what has been tested, what has been visually verified
|
|
69
|
+
3. **Quality layer** — linting results, type checking results, test results, critic scores, security audit results
|
|
70
|
+
4. **Specification layer** — what was asked for, acceptance criteria, constraints, user preferences
|
|
71
|
+
|
|
72
|
+
No important reasoning should depend on code alone if it can instead depend on structured state.
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## 5. Operating Model
|
|
77
|
+
|
|
78
|
+
The system works hierarchically:
|
|
79
|
+
|
|
80
|
+
- project specification
|
|
81
|
+
- architecture and technology decisions
|
|
82
|
+
- module decomposition
|
|
83
|
+
- file-level implementation
|
|
84
|
+
- function-level implementation
|
|
85
|
+
- test writing and execution
|
|
86
|
+
- visual inspection (for UI projects)
|
|
87
|
+
- quality audit
|
|
88
|
+
- repair of failures
|
|
89
|
+
|
|
90
|
+
Each level has:
|
|
91
|
+
- its own representation
|
|
92
|
+
- its own goals
|
|
93
|
+
- its own validator types
|
|
94
|
+
- its own repair loop
|
|
95
|
+
- explicit dependency links upward and downward
|
|
96
|
+
|
|
97
|
+
A file-level implementer should not be forced to infer the entire architecture.
|
|
98
|
+
A quality auditor should not flatten local implementation decisions.
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## 6. Architecture Overview
|
|
103
|
+
|
|
104
|
+
### 6.1 Primary Subsystems
|
|
105
|
+
|
|
106
|
+
- **Orchestrator** — task decomposition, ordering, state coordination, stop conditions
|
|
107
|
+
- **Project Store** — canonical state: what exists, what works, what's pending
|
|
108
|
+
- **Planning Layer** — architect, module planner, file planner, dependency resolver
|
|
109
|
+
- **Implementation Layer** — file writer, function writer, test writer
|
|
110
|
+
- **Execution Layer** — test runner, build runner, dev server launcher
|
|
111
|
+
- **Observation Layer** — output reader, screenshot taker, vision analyser
|
|
112
|
+
- **Validation Layer** — hard validators (tests pass, types check, lints clean) and soft critics (code quality, security, performance, accessibility, visual fidelity)
|
|
113
|
+
- **Repair Layer** — error diagnosis, targeted fix generation, regression prevention
|
|
114
|
+
- **Context Loader** — reads spec files from the project directory
|
|
115
|
+
|
|
116
|
+
### 6.2 Execution Phases
|
|
117
|
+
|
|
118
|
+
1. Project bootstrap (read specs, ask user questions)
|
|
119
|
+
2. Architecture design (Opus)
|
|
120
|
+
3. Module and file planning (Sonnet)
|
|
121
|
+
4. Iterative implementation loop:
|
|
122
|
+
a. Implement next file/module
|
|
123
|
+
b. Run tests
|
|
124
|
+
c. If tests fail → diagnose → repair → re-test (up to N rounds)
|
|
125
|
+
d. Run soft critics
|
|
126
|
+
e. If critics flag issues → repair → re-validate
|
|
127
|
+
f. Visual inspection (if UI project)
|
|
128
|
+
g. Mark module complete
|
|
129
|
+
5. Integration testing
|
|
130
|
+
6. Full quality audit
|
|
131
|
+
7. Visual audit (screenshot every page/state, evaluate with vision)
|
|
132
|
+
8. Final repair pass
|
|
133
|
+
9. Declare done — only when all tests pass AND all critics score above threshold
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## 7. Canonical Data Model
|
|
138
|
+
|
|
139
|
+
### 7.1 Project Model
|
|
140
|
+
|
|
141
|
+
- title
|
|
142
|
+
- description
|
|
143
|
+
- specification (from context files + user answers)
|
|
144
|
+
- technology stack
|
|
145
|
+
- architecture decisions
|
|
146
|
+
- status (planning, implementing, testing, auditing, complete)
|
|
147
|
+
|
|
148
|
+
### 7.2 Module Model
|
|
149
|
+
|
|
150
|
+
- name
|
|
151
|
+
- purpose
|
|
152
|
+
- dependencies (other modules)
|
|
153
|
+
- files
|
|
154
|
+
- status (pending, implementing, tested, complete)
|
|
155
|
+
- test coverage
|
|
156
|
+
|
|
157
|
+
### 7.3 File Model
|
|
158
|
+
|
|
159
|
+
- path
|
|
160
|
+
- purpose
|
|
161
|
+
- language
|
|
162
|
+
- dependencies (other files, external packages)
|
|
163
|
+
- status (pending, written, tested, reviewed)
|
|
164
|
+
- quality scores
|
|
165
|
+
|
|
166
|
+
### 7.4 Test Model
|
|
167
|
+
|
|
168
|
+
- path
|
|
169
|
+
- type (unit, integration, e2e, visual)
|
|
170
|
+
- target files/modules
|
|
171
|
+
- status (pending, passing, failing)
|
|
172
|
+
- last run output
|
|
173
|
+
- failure diagnosis
|
|
174
|
+
|
|
175
|
+
### 7.5 Task Model (Obligation Ledger)
|
|
176
|
+
|
|
177
|
+
Similar to Postwriter's promise model. Tracks:
|
|
178
|
+
- what was specified
|
|
179
|
+
- what has been implemented
|
|
180
|
+
- what has been tested
|
|
181
|
+
- what remains
|
|
182
|
+
- what is blocked and by what
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## 8. Agent Roles
|
|
187
|
+
|
|
188
|
+
### 8.1 Architect (Opus)
|
|
189
|
+
- Reads specification
|
|
190
|
+
- Chooses technology stack
|
|
191
|
+
- Designs module structure
|
|
192
|
+
- Defines interfaces between modules
|
|
193
|
+
- Identifies critical path
|
|
194
|
+
- Makes build/deploy decisions
|
|
195
|
+
|
|
196
|
+
### 8.2 Module Planner (Sonnet)
|
|
197
|
+
- Breaks modules into files
|
|
198
|
+
- Defines file purposes and interfaces
|
|
199
|
+
- Orders implementation by dependency
|
|
200
|
+
- Identifies what needs tests
|
|
201
|
+
|
|
202
|
+
### 8.3 File Implementer (Sonnet)
|
|
203
|
+
- Writes a single file from its specification + context
|
|
204
|
+
- Has access to: file spec, module spec, interfaces of dependencies, project conventions
|
|
205
|
+
- Does NOT see the entire codebase — only what's relevant (context slicing)
|
|
206
|
+
|
|
207
|
+
### 8.4 Test Writer (Sonnet)
|
|
208
|
+
- Writes tests for implemented files
|
|
209
|
+
- Has access to: the implementation, the spec, the module interfaces
|
|
210
|
+
- Writes unit tests, integration tests, and (for UI) visual test specifications
|
|
211
|
+
|
|
212
|
+
### 8.5 Test Runner (Haiku + shell execution)
|
|
213
|
+
- Executes test suites
|
|
214
|
+
- Parses output
|
|
215
|
+
- Classifies failures (syntax error, logic error, missing dependency, flaky test)
|
|
216
|
+
- Reports structured results
|
|
217
|
+
|
|
218
|
+
### 8.6 Visual Inspector (Opus with vision)
|
|
219
|
+
- Takes screenshots of rendered UI (via headless browser)
|
|
220
|
+
- Evaluates visual output against specification
|
|
221
|
+
- Flags: layout issues, missing elements, broken styling, accessibility problems
|
|
222
|
+
- Provides structured feedback for repair
|
|
223
|
+
|
|
224
|
+
### 8.7 Code Quality Critic (Sonnet)
|
|
225
|
+
- Reviews code for: readability, maintainability, idiomaticity
|
|
226
|
+
- Checks for: dead code, unnecessary complexity, poor naming, missing error handling
|
|
227
|
+
- Scores and provides specific repair suggestions
|
|
228
|
+
|
|
229
|
+
### 8.8 Security Critic (Sonnet)
|
|
230
|
+
- Reviews for: injection vulnerabilities, exposed secrets, insecure defaults, missing input validation
|
|
231
|
+
- Checks dependencies for known vulnerabilities
|
|
232
|
+
|
|
233
|
+
### 8.9 Performance Critic (Sonnet)
|
|
234
|
+
- Reviews for: N+1 queries, unnecessary re-renders, missing indexes, unbounded loops, memory leaks
|
|
235
|
+
- Provides specific suggestions
|
|
236
|
+
|
|
237
|
+
### 8.10 Error Diagnostician (Sonnet)
|
|
238
|
+
- Receives: test failure output, stack trace, relevant source code
|
|
239
|
+
- Produces: diagnosis (root cause), proposed fix (specific code change), confidence level
|
|
240
|
+
|
|
241
|
+
### 8.11 Repair Agent (Sonnet)
|
|
242
|
+
- Receives: diagnosis + proposed fix + preserve constraints
|
|
243
|
+
- Applies the minimum change needed
|
|
244
|
+
- Does not refactor beyond the fix
|
|
245
|
+
- Does not introduce new features
|
|
246
|
+
|
|
247
|
+
### 8.12 Integration Auditor (Opus)
|
|
248
|
+
- Runs after all modules are individually complete
|
|
249
|
+
- Tests cross-module interactions
|
|
250
|
+
- Identifies integration issues
|
|
251
|
+
- Proposes structural fixes if needed
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
## 9. The Implementation Loop
|
|
256
|
+
|
|
257
|
+
This is the core cycle. For each file or module:
|
|
258
|
+
|
|
259
|
+
```
|
|
260
|
+
implement(file_spec, context):
|
|
261
|
+
code = file_implementer.write(file_spec, context)
|
|
262
|
+
save(code)
|
|
263
|
+
|
|
264
|
+
for round in range(max_rounds):
|
|
265
|
+
test_results = test_runner.run(relevant_tests)
|
|
266
|
+
|
|
267
|
+
if test_results.all_pass:
|
|
268
|
+
critics = run_critics(code)
|
|
269
|
+
if critics.all_above_threshold:
|
|
270
|
+
return ACCEPT
|
|
271
|
+
else:
|
|
272
|
+
diagnosis = identify_weakest_dimension(critics)
|
|
273
|
+
code = repair_agent.fix(code, diagnosis)
|
|
274
|
+
save(code)
|
|
275
|
+
continue
|
|
276
|
+
|
|
277
|
+
diagnosis = error_diagnostician.diagnose(test_results)
|
|
278
|
+
code = repair_agent.fix(code, diagnosis)
|
|
279
|
+
save(code)
|
|
280
|
+
|
|
281
|
+
return ACCEPT_WITH_WARNINGS # best effort after max rounds
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
For UI projects, add after critic pass:
|
|
285
|
+
```
|
|
286
|
+
screenshot = take_screenshot(relevant_pages)
|
|
287
|
+
visual_eval = visual_inspector.evaluate(screenshot, spec)
|
|
288
|
+
if visual_eval.issues:
|
|
289
|
+
code = repair_agent.fix(code, visual_eval)
|
|
290
|
+
save(code)
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
---
|
|
294
|
+
|
|
295
|
+
## 10. Context Slicing
|
|
296
|
+
|
|
297
|
+
Each agent receives only what it needs:
|
|
298
|
+
|
|
299
|
+
- **Architect**: full spec, technology constraints, user preferences
|
|
300
|
+
- **File implementer**: file spec, module interface, dependency interfaces, project conventions, 2-3 related files for style reference
|
|
301
|
+
- **Test writer**: the implementation, the spec, the module interface
|
|
302
|
+
- **Repair agent**: the failing code, the error, the diagnosis, preserve constraints
|
|
303
|
+
- **Visual inspector**: the screenshot, the spec for that page/component
|
|
304
|
+
|
|
305
|
+
Token budget awareness: trim oldest/least-relevant context when approaching limits.
|
|
306
|
+
|
|
307
|
+
---
|
|
308
|
+
|
|
309
|
+
## 11. Context Files
|
|
310
|
+
|
|
311
|
+
Users can place files in a `context/` directory:
|
|
312
|
+
|
|
313
|
+
- `spec.md` — project specification
|
|
314
|
+
- `architecture.md` — architectural preferences or constraints
|
|
315
|
+
- `style.md` — coding style preferences
|
|
316
|
+
- `tech-stack.md` — technology requirements
|
|
317
|
+
- `reference/` — example code, API documentation, design mockups
|
|
318
|
+
- `*.png`, `*.jpg` — UI mockups (processed by vision)
|
|
319
|
+
|
|
320
|
+
All optional. If present, they inform the planning agents and reduce the number of bootstrap questions.
|
|
321
|
+
|
|
322
|
+
---
|
|
323
|
+
|
|
324
|
+
## 12. Stop Conditions
|
|
325
|
+
|
|
326
|
+
The system declares the project complete when ALL of:
|
|
327
|
+
|
|
328
|
+
1. All specified features have corresponding implementations
|
|
329
|
+
2. All tests pass
|
|
330
|
+
3. No hard validator failures (lint, types, build)
|
|
331
|
+
4. All soft critic scores above threshold
|
|
332
|
+
5. Visual inspection passes (if UI project)
|
|
333
|
+
6. The obligation ledger has no unresolved items
|
|
334
|
+
|
|
335
|
+
If any condition cannot be met after max repair rounds, the system reports what remains unresolved and asks for human guidance.
|
|
336
|
+
|
|
337
|
+
---
|
|
338
|
+
|
|
339
|
+
## 13. Validation Model
|
|
340
|
+
|
|
341
|
+
### 13.1 Hard Validators (pass/fail, block acceptance)
|
|
342
|
+
|
|
343
|
+
- Tests pass
|
|
344
|
+
- TypeScript/mypy/equivalent type check passes
|
|
345
|
+
- Linter passes (or only warnings, no errors)
|
|
346
|
+
- Build succeeds
|
|
347
|
+
- No import errors
|
|
348
|
+
- No runtime crashes on startup
|
|
349
|
+
|
|
350
|
+
### 13.2 Soft Critics (scored, influence repair priority)
|
|
351
|
+
|
|
352
|
+
- Code readability (0-1)
|
|
353
|
+
- Maintainability (0-1)
|
|
354
|
+
- Idiomaticity (0-1)
|
|
355
|
+
- Security (0-1)
|
|
356
|
+
- Performance (0-1)
|
|
357
|
+
- Accessibility (0-1, for UI projects)
|
|
358
|
+
- Visual fidelity (0-1, for UI projects)
|
|
359
|
+
- Test coverage adequacy (0-1)
|
|
360
|
+
- Error handling completeness (0-1)
|
|
361
|
+
|
|
362
|
+
---
|
|
363
|
+
|
|
364
|
+
## 14. Visual Inspection
|
|
365
|
+
|
|
366
|
+
For projects with UI:
|
|
367
|
+
|
|
368
|
+
1. Launch a headless browser (Playwright)
|
|
369
|
+
2. Navigate to each page/state defined in the spec
|
|
370
|
+
3. Take screenshots
|
|
371
|
+
4. Send screenshots to Opus with vision
|
|
372
|
+
5. Opus evaluates: layout correctness, visual completeness, responsiveness, obvious bugs
|
|
373
|
+
6. Structured feedback fed back to repair agent
|
|
374
|
+
|
|
375
|
+
This is the "viewing the result" that distinguishes Adam from blind code generation.
|
|
376
|
+
|
|
377
|
+
---
|
|
378
|
+
|
|
379
|
+
## 15. Model Tiering
|
|
380
|
+
|
|
381
|
+
| Tier | Role | Used for |
|
|
382
|
+
|------|------|----------|
|
|
383
|
+
| Opus | Architectural reasoning, visual inspection | Project design, major decision points, screenshot evaluation |
|
|
384
|
+
| Sonnet | Implementation, critique, repair | File writing, test writing, all critics, error diagnosis, fixes |
|
|
385
|
+
| Haiku | Mechanical validation | Parsing test output, linting, type checking, file existence, dependency resolution |
|
|
386
|
+
|
|
387
|
+
---
|
|
388
|
+
|
|
389
|
+
## 16. Repair Philosophy
|
|
390
|
+
|
|
391
|
+
Identical to Postwriter:
|
|
392
|
+
|
|
393
|
+
- Narrow: fix only what's broken
|
|
394
|
+
- Ordered: highest-priority issues first
|
|
395
|
+
- Traceable: every fix linked to a diagnosis
|
|
396
|
+
- Reversible: git commits after each accepted change
|
|
397
|
+
- Minimally destructive: don't refactor what works
|
|
398
|
+
|
|
399
|
+
---
|
|
400
|
+
|
|
401
|
+
## 17. Failure Modes to Guard Against
|
|
402
|
+
|
|
403
|
+
- **Over-engineering**: building abstractions before they're needed
|
|
404
|
+
- **Repair flattening**: too many fix rounds making code worse
|
|
405
|
+
- **Test gaming**: writing code to pass tests rather than meet the spec
|
|
406
|
+
- **Critic monoculture**: all critics converging on the same bland style
|
|
407
|
+
- **Context drift**: losing track of the project's purpose in the details
|
|
408
|
+
- **Dependency hell**: pulling in packages to solve problems that don't exist
|
|
409
|
+
|
|
410
|
+
---
|
|
411
|
+
|
|
412
|
+
## 18. Key Differences from Postwriter
|
|
413
|
+
|
|
414
|
+
| Aspect | Postwriter | Adam |
|
|
415
|
+
|--------|-----------|------|
|
|
416
|
+
| Output | Prose | Code |
|
|
417
|
+
| Verification | Critics score text | Tests pass or fail |
|
|
418
|
+
| Observation | Soft quality metrics | Hard test results + visual inspection |
|
|
419
|
+
| Branching | Multiple rhetorical strategies | Possibly multiple implementation strategies for critical components |
|
|
420
|
+
| Canon | Character states, promises | Module states, obligation ledger |
|
|
421
|
+
| Repair trigger | Low scores, hard validation failure | Test failure, critic flags, visual bugs |
|
|
422
|
+
| Stop condition | Composite score threshold | All tests pass + all critics satisfied |
|
|
423
|
+
|
|
424
|
+
---
|
|
425
|
+
|
|
426
|
+
## 19. Implementation Principles
|
|
427
|
+
|
|
428
|
+
- Tests are first-class citizens, not afterthoughts
|
|
429
|
+
- Every file gets committed to git after acceptance
|
|
430
|
+
- Visual inspection is not optional for UI projects
|
|
431
|
+
- The system should be honest about what it cannot verify
|
|
432
|
+
- Human review is available at any point but should rarely be needed
|
|
433
|
+
- The obligation ledger is the source of truth for completeness
|
|
434
|
+
- Context slicing is critical — agents must not see the whole project when they don't need to
|
|
435
|
+
- Repair rounds have hard limits to prevent infinite loops
|
|
436
|
+
|
|
437
|
+
---
|
|
438
|
+
|
|
439
|
+
## 20. Operating Assumptions
|
|
440
|
+
|
|
441
|
+
- One-pass code generation is unreliable for projects beyond a few files
|
|
442
|
+
- Tests catch bugs that critics miss; critics catch quality issues that tests miss
|
|
443
|
+
- Visual inspection catches UI bugs that no amount of code review will find
|
|
444
|
+
- The repair loop converges for most issues within 3-5 rounds
|
|
445
|
+
- Explicit project state prevents the drift that kills long-horizon generation
|
|
446
|
+
- Human engineering judgement remains necessary for taste, priority, and ambiguous requirements
|
|
447
|
+
|
|
448
|
+
---
|
|
449
|
+
|
|
450
|
+
## 21. CLI Behaviour
|
|
451
|
+
|
|
452
|
+
Running `adam` in any directory should:
|
|
453
|
+
|
|
454
|
+
1. Check for a `.adam` project state file
|
|
455
|
+
2. If none exists: start a new project
|
|
456
|
+
- Load context files from `context/` if present
|
|
457
|
+
- Ask the user questions about the project (skipping what context files answer)
|
|
458
|
+
- Design architecture
|
|
459
|
+
- Implement iteratively
|
|
460
|
+
3. If a project exists and is in progress: offer to resume
|
|
461
|
+
4. If a project exists and is complete: offer to revise, extend, or start new
|
|
462
|
+
|
|
463
|
+
The system should produce clear, real-time progress output showing what it's doing, what's passing, what's failing, and how far along it is.
|