ai-engineering-kit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/NOTICE +33 -0
- package/README.md +65 -0
- package/dist/cli.js +675 -0
- package/package.json +56 -0
- package/template/agents/claude-code.CLAUDE.md +88 -0
- package/template/agents/codex.AGENTS.md +82 -0
- package/template/ai-workspace/brainstorms/.gitkeep +0 -0
- package/template/ai-workspace/plans/.gitkeep +0 -0
- package/template/ai-workspace/prds/.gitkeep +0 -0
- package/template/ai-workspace/research/.gitkeep +0 -0
- package/template/ai-workspace/reviews/.gitkeep +0 -0
- package/template/ai-workspace/runbooks/.gitkeep +0 -0
- package/template/ai-workspace/templates/.gitkeep +0 -0
- package/template/docs/foundations/product-vision.md +21 -0
- package/template/docs/foundations/project-guidelines.md +19 -0
- package/template/docs/foundations/technical-decisions.md +50 -0
- package/template/docs/foundations/testing-principles.md +13 -0
- package/template/skills/core-workflow/audit-architecture/REFERENCE.md +78 -0
- package/template/skills/core-workflow/audit-architecture/SKILL.md +76 -0
- package/template/skills/core-workflow/explore-design/SKILL.md +94 -0
- package/template/skills/core-workflow/implement/SKILL.md +107 -0
- package/template/skills/core-workflow/implement/deep-modules.md +33 -0
- package/template/skills/core-workflow/implement/interface-design.md +31 -0
- package/template/skills/core-workflow/implement/mocking.md +59 -0
- package/template/skills/core-workflow/implement/refactoring.md +10 -0
- package/template/skills/core-workflow/implement/tests.md +61 -0
- package/template/skills/core-workflow/investigate-bug/SKILL.md +104 -0
- package/template/skills/core-workflow/kickoff/SKILL.md +10 -0
- package/template/skills/core-workflow/load-context/SKILL.md +26 -0
- package/template/skills/core-workflow/plan-feature/SKILL.md +107 -0
- package/template/skills/core-workflow/plan-refactor/SKILL.md +68 -0
- package/template/skills/core-workflow/review/SKILL.md +127 -0
- package/template/skills/core-workflow/setup-foundations/SKILL.md +58 -0
- package/template/skills/core-workflow/verify-completion/SKILL.md +20 -0
- package/template/skills/core-workflow/write-prd/SKILL.md +74 -0
- package/template/skills/core-workflow/write-skill/SKILL.md +117 -0
package/package.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "ai-engineering-kit",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "An opinionated, agent-agnostic AI-development kit — disciplined skills plus a structured workspace — installed and updated through one npx command.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"author": "Ebert Mota",
|
|
8
|
+
"homepage": "https://github.com/ebertmota/ai-engineering-kit#readme",
|
|
9
|
+
"repository": {
|
|
10
|
+
"type": "git",
|
|
11
|
+
"url": "git+https://github.com/ebertmota/ai-engineering-kit.git"
|
|
12
|
+
},
|
|
13
|
+
"keywords": [
|
|
14
|
+
"ai",
|
|
15
|
+
"agent",
|
|
16
|
+
"claude-code",
|
|
17
|
+
"codex",
|
|
18
|
+
"skills",
|
|
19
|
+
"scaffold",
|
|
20
|
+
"cli"
|
|
21
|
+
],
|
|
22
|
+
"bin": {
|
|
23
|
+
"ai-engineering-kit": "dist/cli.js"
|
|
24
|
+
},
|
|
25
|
+
"files": [
|
|
26
|
+
"dist",
|
|
27
|
+
"template",
|
|
28
|
+
"NOTICE"
|
|
29
|
+
],
|
|
30
|
+
"publishConfig": {
|
|
31
|
+
"access": "public"
|
|
32
|
+
},
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"@clack/prompts": "^0.7.0"
|
|
35
|
+
},
|
|
36
|
+
"devDependencies": {
|
|
37
|
+
"@changesets/cli": "^2.27.0",
|
|
38
|
+
"@types/node": "^22.0.0",
|
|
39
|
+
"tsup": "^8.0.0",
|
|
40
|
+
"tsx": "^4.0.0",
|
|
41
|
+
"typescript": "^5.4.0",
|
|
42
|
+
"vitest": "^2.0.0"
|
|
43
|
+
},
|
|
44
|
+
"engines": {
|
|
45
|
+
"node": ">=18"
|
|
46
|
+
},
|
|
47
|
+
"scripts": {
|
|
48
|
+
"build": "tsup",
|
|
49
|
+
"dev": "tsx src/cli.ts",
|
|
50
|
+
"typecheck": "tsc --noEmit",
|
|
51
|
+
"test": "vitest run",
|
|
52
|
+
"test:watch": "vitest",
|
|
53
|
+
"verify": "pnpm typecheck && pnpm test",
|
|
54
|
+
"release": "pnpm verify && changeset publish"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# {{PROJECT_NAME}} — Claude Code Instructions
|
|
2
|
+
|
|
3
|
+
## Skills
|
|
4
|
+
|
|
5
|
+
When a project skill in `ai/skills/` and a global skill share the same name or similar trigger, always prefer the project skill.
|
|
6
|
+
|
|
7
|
+
## Starting a new project
|
|
8
|
+
|
|
9
|
+
For a freshly scaffolded project, set up the foundations before building:
|
|
10
|
+
|
|
11
|
+
1. `ai/skills/kickoff/SKILL.md` — brainstorm the MVP (writes to `ai/brainstorms/`).
|
|
12
|
+
2. `ai/skills/setup-foundations/SKILL.md` — fill in `docs/foundations/` (vision, technical decisions, code & test guidelines).
|
|
13
|
+
3. `ai/skills/write-prd/SKILL.md` — first PRD.
|
|
14
|
+
4. `ai/skills/plan-feature/SKILL.md` → `ai/skills/implement/SKILL.md` — plan, then build.
|
|
15
|
+
|
|
16
|
+
## When to read reference docs
|
|
17
|
+
|
|
18
|
+
Read docs only when relevant — do not load all of them upfront.
|
|
19
|
+
|
|
20
|
+
**Before implementing, refactoring, or fixing anything:** read `docs/foundations/project-guidelines.md`. It defines architecture, conventions, naming rules, and non-negotiable rules. Follow it strictly.
|
|
21
|
+
|
|
22
|
+
**Before writing or modifying any test:** read `docs/foundations/testing-principles.md`. It defines which test type to use for each layer, what to mock, and what not to test.
|
|
23
|
+
|
|
24
|
+
**Before introducing a new technology, pattern, or architectural shape:** read `docs/foundations/technical-decisions.md` for prior decisions and record the new one there.
|
|
25
|
+
|
|
26
|
+
**Before planning a feature or writing a PRD/plan:** read `docs/foundations/product-vision.md` for product context, then `ai/prds/` and `ai/plans/` for in-flight work, and any relevant brainstorm in `ai/brainstorms/`.
|
|
27
|
+
|
|
28
|
+
**When picking up in-flight work or the user mentions continuity:** read the relevant plan in `ai/plans/` — its checkboxes are the authoritative record of what is done and what is next.
|
|
29
|
+
|
|
30
|
+
## Reference Docs
|
|
31
|
+
|
|
32
|
+
Canonical (long-lived, under `docs/foundations/`):
|
|
33
|
+
|
|
34
|
+
- `docs/foundations/project-guidelines.md` — architecture and development guidelines
|
|
35
|
+
- `docs/foundations/testing-principles.md` — testing strategy by layer
|
|
36
|
+
- `docs/foundations/technical-decisions.md` — running log of architecture/technology decisions
|
|
37
|
+
- `docs/foundations/product-vision.md` — product vision and strategic context
|
|
38
|
+
|
|
39
|
+
Other durable docs (start empty; promote knowledge here as it emerges):
|
|
40
|
+
|
|
41
|
+
- `docs/system/` — how our own code works (per-flow specs, event/contract conventions)
|
|
42
|
+
- `docs/references/` — how external systems we integrate with work (vendor APIs, protocols)
|
|
43
|
+
- `docs/operations/` — what to do when something needs human action (runbooks)
|
|
44
|
+
- `docs/debt/` — structured registries of deferred or known work
|
|
45
|
+
|
|
46
|
+
Development artefacts (ephemeral, under `ai/`):
|
|
47
|
+
|
|
48
|
+
- `ai/brainstorms/` — requirements and exploratory docs
|
|
49
|
+
- `ai/prds/` — feature PRDs for tracked initiatives
|
|
50
|
+
- `ai/plans/` — phased implementation plans for in-flight PRDs; status lives in their checkboxes
|
|
51
|
+
- `ai/reviews/` — two-axis (Standards / Spec) review reports from the `review` skill
|
|
52
|
+
- `ai/runbooks/` — operational notes used during implementation or maintenance
|
|
53
|
+
- `ai/research/` — exploratory research notes
|
|
54
|
+
- `ai/templates/` — reusable templates for generated artifacts
|
|
55
|
+
|
|
56
|
+
## Skills
|
|
57
|
+
|
|
58
|
+
Project workflows live in `ai/skills/`. Read only the matching `SKILL.md`, not every skill upfront:
|
|
59
|
+
|
|
60
|
+
- `ai/skills/load-context/SKILL.md` — starting or resuming in-flight work, or when continuity matters
|
|
61
|
+
- `ai/skills/kickoff/SKILL.md` — stress-test or clarify a plan/design through focused questioning
|
|
62
|
+
- `ai/skills/setup-foundations/SKILL.md` — fill in `docs/foundations/` (vision, technical decisions, code & test guidelines) from the kickoff brainstorm; run before the first PRD
|
|
63
|
+
- `ai/skills/write-prd/SKILL.md` — turn a feature idea into a PRD saved in `ai/prds/`
|
|
64
|
+
- `ai/skills/plan-feature/SKILL.md` — turn a PRD into a phased implementation plan
|
|
65
|
+
- `ai/skills/implement/SKILL.md` — build features or fix bugs with TDD / red-green-refactor
|
|
66
|
+
- `ai/skills/investigate-bug/SKILL.md` — triage a bug and produce a root-cause / TDD fix plan in `ai/bugs/`
|
|
67
|
+
- `ai/skills/explore-design/SKILL.md` — compare API/module interface designs
|
|
68
|
+
- `ai/skills/plan-refactor/SKILL.md` — plan a refactor in small safe steps
|
|
69
|
+
- `ai/skills/audit-architecture/SKILL.md` — look for architectural improvements and testability gaps
|
|
70
|
+
- `ai/skills/review/SKILL.md` — two-axis (Standards / Spec) review of a branch
|
|
71
|
+
- `ai/skills/verify-completion/SKILL.md` — use before saying work is complete
|
|
72
|
+
- `ai/skills/write-skill/SKILL.md` — create or update a project skill
|
|
73
|
+
|
|
74
|
+
## Commands
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# edit for your project
|
|
78
|
+
pnpm lint
|
|
79
|
+
pnpm typecheck
|
|
80
|
+
pnpm test
|
|
81
|
+
pnpm test:unit
|
|
82
|
+
pnpm test:integration
|
|
83
|
+
pnpm verify # lint + typecheck + test
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Commits
|
|
87
|
+
|
|
88
|
+
Never commit on behalf of the user unless explicitly asked.
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# {{PROJECT_NAME}} — Codex Instructions
|
|
2
|
+
|
|
3
|
+
## Primary Rules
|
|
4
|
+
|
|
5
|
+
This file is the entry point. Keep detailed guidance in canonical docs and read those docs only when relevant.
|
|
6
|
+
|
|
7
|
+
Non-negotiable:
|
|
8
|
+
|
|
9
|
+
- Before implementing, refactoring, or fixing code, read `docs/foundations/project-guidelines.md` and follow it.
|
|
10
|
+
- Before writing or modifying tests, read `docs/foundations/testing-principles.md` and follow it.
|
|
11
|
+
- Before introducing a new technology, pattern, or architectural shape, read `docs/foundations/technical-decisions.md` and record the new decision there.
|
|
12
|
+
- Before planning a feature, PRD, or implementation approach, read `docs/foundations/product-vision.md` for product context.
|
|
13
|
+
- Before claiming a task is complete, use `ai/skills/verify-completion/SKILL.md` and run the relevant verification command. Prefer `pnpm verify` when practical.
|
|
14
|
+
- Never commit unless the user explicitly asks.
|
|
15
|
+
|
|
16
|
+
## Starting a new project
|
|
17
|
+
|
|
18
|
+
For a freshly scaffolded project, set up the foundations first:
|
|
19
|
+
|
|
20
|
+
1. `ai/skills/kickoff/SKILL.md` — brainstorm the MVP (writes to `ai/brainstorms/`).
|
|
21
|
+
2. `ai/skills/setup-foundations/SKILL.md` — fill in `docs/foundations/` (vision, technical decisions, code & test guidelines).
|
|
22
|
+
3. `ai/skills/write-prd/SKILL.md` — first PRD, then `plan-feature` and `implement`.
|
|
23
|
+
|
|
24
|
+
## Project Workflows
|
|
25
|
+
|
|
26
|
+
Project workflows live in `ai/skills/`. If a project workflow and a global Codex skill overlap, prefer the project workflow.
|
|
27
|
+
|
|
28
|
+
Read only the matching `SKILL.md`, not every skill upfront:
|
|
29
|
+
|
|
30
|
+
- `ai/skills/load-context/SKILL.md` — starting or resuming in-flight work, or when continuity matters.
|
|
31
|
+
- `ai/skills/kickoff/SKILL.md` — stress-test or clarify a plan/design through focused questioning.
|
|
32
|
+
- `ai/skills/setup-foundations/SKILL.md` — fill in `docs/foundations/` from the kickoff brainstorm; run before the first PRD.
|
|
33
|
+
- `ai/skills/write-prd/SKILL.md` — turn a feature idea into a PRD saved in `ai/prds/`.
|
|
34
|
+
- `ai/skills/plan-feature/SKILL.md` — turn a PRD into a phased implementation plan.
|
|
35
|
+
- `ai/skills/implement/SKILL.md` — build features or fix bugs with TDD/red-green-refactor.
|
|
36
|
+
- `ai/skills/investigate-bug/SKILL.md` — triage a bug and produce a root-cause/TDD fix plan in `ai/bugs/`.
|
|
37
|
+
- `ai/skills/explore-design/SKILL.md` — compare API/module interface designs.
|
|
38
|
+
- `ai/skills/plan-refactor/SKILL.md` — plan a refactor in small safe steps.
|
|
39
|
+
- `ai/skills/audit-architecture/SKILL.md` — look for architectural improvements and testability gaps.
|
|
40
|
+
- `ai/skills/review/SKILL.md` — two-axis (Standards / Spec) review of a branch.
|
|
41
|
+
- `ai/skills/verify-completion/SKILL.md` — use before saying work is complete.
|
|
42
|
+
- `ai/skills/write-skill/SKILL.md` — create or update a project skill.
|
|
43
|
+
|
|
44
|
+
## Reference Docs
|
|
45
|
+
|
|
46
|
+
Read these only when the task touches the area:
|
|
47
|
+
|
|
48
|
+
- `docs/foundations/project-guidelines.md` — architecture, naming, layering, error handling, and development rules.
|
|
49
|
+
- `docs/foundations/testing-principles.md` — what to test by layer, what to mock, and test structure.
|
|
50
|
+
- `docs/foundations/technical-decisions.md` — running log of architecture/technology decisions.
|
|
51
|
+
- `docs/foundations/product-vision.md` — product context.
|
|
52
|
+
|
|
53
|
+
Other durable docs (start empty; promote knowledge here as it emerges):
|
|
54
|
+
|
|
55
|
+
- `docs/system/` — how our own code works (per-flow specs, event/contract conventions).
|
|
56
|
+
- `docs/references/` — how external systems we integrate with work (vendor APIs, protocols).
|
|
57
|
+
- `docs/operations/` — what to do when something needs human action (runbooks).
|
|
58
|
+
- `docs/debt/` — structured registries of deferred or known work.
|
|
59
|
+
|
|
60
|
+
Development context under `ai/` (ephemeral — durable knowledge belongs in `docs/`):
|
|
61
|
+
|
|
62
|
+
- `ai/brainstorms/` — exploratory notes.
|
|
63
|
+
- `ai/prds/` — product requirements.
|
|
64
|
+
- `ai/plans/` — implementation plans; status lives in their checkboxes.
|
|
65
|
+
- `ai/reviews/` — review reports.
|
|
66
|
+
- `ai/runbooks/` — operational notes.
|
|
67
|
+
- `ai/research/` — research notes.
|
|
68
|
+
- `ai/templates/` — reusable templates.
|
|
69
|
+
|
|
70
|
+
When picking up existing work, read the matching plan in `ai/plans/` — its checkboxes are the authoritative record of what is done and what is next — plus the originating PRD in `ai/prds/`.
|
|
71
|
+
|
|
72
|
+
## Commands
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
# edit for your project
|
|
76
|
+
pnpm lint
|
|
77
|
+
pnpm typecheck
|
|
78
|
+
pnpm test
|
|
79
|
+
pnpm test:unit
|
|
80
|
+
pnpm test:integration
|
|
81
|
+
pnpm verify # lint + typecheck + test
|
|
82
|
+
```
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Product Vision
|
|
2
|
+
|
|
3
|
+
## What we're building
|
|
4
|
+
|
|
5
|
+
_one paragraph: the product in plain language_
|
|
6
|
+
|
|
7
|
+
## Users
|
|
8
|
+
|
|
9
|
+
_who uses this and why_
|
|
10
|
+
|
|
11
|
+
## Core surfaces
|
|
12
|
+
|
|
13
|
+
_the main things users do_
|
|
14
|
+
|
|
15
|
+
## Constraints
|
|
16
|
+
|
|
17
|
+
_non-negotiables: regulatory, technical, business_
|
|
18
|
+
|
|
19
|
+
## Out of scope
|
|
20
|
+
|
|
21
|
+
_what we explicitly do not do_
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Project Guidelines
|
|
2
|
+
|
|
3
|
+
## Stack
|
|
4
|
+
_languages, frameworks, datastore, key libraries_
|
|
5
|
+
|
|
6
|
+
## Project Structure
|
|
7
|
+
_top-level layout and what lives where_
|
|
8
|
+
|
|
9
|
+
## Architecture & Layers
|
|
10
|
+
_layers and how dependencies flow_
|
|
11
|
+
|
|
12
|
+
## Naming Conventions
|
|
13
|
+
_files, types, classes_
|
|
14
|
+
|
|
15
|
+
## Error Handling
|
|
16
|
+
_how failures are signalled and mapped_
|
|
17
|
+
|
|
18
|
+
## Non-Negotiable Rules
|
|
19
|
+
_the hard rules for this codebase_
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Technical Decisions
|
|
2
|
+
|
|
3
|
+
A running log of architecture- and technology-level decisions for this project.
|
|
4
|
+
One entry per decision, newest first. Capture the *why*, not just the *what* — the
|
|
5
|
+
reasoning is what future readers (human or agent) need when they hit the same fork.
|
|
6
|
+
|
|
7
|
+
This is canonical, long-lived documentation. It outlives any single PRD, plan, or
|
|
8
|
+
feature, which is why it lives under `docs/foundations/` and not in the ephemeral
|
|
9
|
+
`ai/` workspace.
|
|
10
|
+
|
|
11
|
+
## When to add an entry
|
|
12
|
+
|
|
13
|
+
Add a decision when the change:
|
|
14
|
+
|
|
15
|
+
- picks a new technology, library, framework, or external service,
|
|
16
|
+
- introduces a new pattern or architectural shape (a layer, a boundary, a
|
|
17
|
+
cross-cutting convention), or
|
|
18
|
+
- contradicts or supersedes an earlier decision recorded here.
|
|
19
|
+
|
|
20
|
+
Routine code that follows existing conventions does **not** need an entry — those
|
|
21
|
+
conventions belong in `project-guidelines.md`.
|
|
22
|
+
|
|
23
|
+
## Format
|
|
24
|
+
|
|
25
|
+
Each entry uses this shape:
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
## <YYYY-MM-DD> — <short decision title>
|
|
29
|
+
|
|
30
|
+
**Status:** accepted | superseded by <link> | reversed
|
|
31
|
+
|
|
32
|
+
**Context.** What forced a choice? What constraints applied?
|
|
33
|
+
|
|
34
|
+
**Decision.** What we chose, stated plainly.
|
|
35
|
+
|
|
36
|
+
**Consequences.** What this makes easier, what it makes harder, and what we
|
|
37
|
+
explicitly gave up.
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## <YYYY-MM-DD> — Example: replace the placeholder, then delete this entry
|
|
43
|
+
|
|
44
|
+
**Status:** accepted
|
|
45
|
+
|
|
46
|
+
**Context.** Describe the problem and the constraints that ruled options in or out.
|
|
47
|
+
|
|
48
|
+
**Decision.** State the choice in one or two sentences.
|
|
49
|
+
|
|
50
|
+
**Consequences.** Note the trade-offs you accepted and the doors you closed.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Testing Principles
|
|
2
|
+
|
|
3
|
+
## Strategy by layer
|
|
4
|
+
_which test type per layer, and what each covers_
|
|
5
|
+
|
|
6
|
+
## What to mock
|
|
7
|
+
_what is mocked vs. exercised for real, by layer_
|
|
8
|
+
|
|
9
|
+
## Test structure
|
|
10
|
+
_naming, file layout, fixtures/factories_
|
|
11
|
+
|
|
12
|
+
## Non-Negotiable Rules
|
|
13
|
+
_the hard testing rules for this codebase_
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Reference
|
|
2
|
+
|
|
3
|
+
## Dependency Categories
|
|
4
|
+
|
|
5
|
+
When assessing a candidate for deepening, classify its dependencies:
|
|
6
|
+
|
|
7
|
+
### 1. In-process
|
|
8
|
+
|
|
9
|
+
Pure computation, in-memory state, no I/O. Always deepenable — just merge the modules and test directly.
|
|
10
|
+
|
|
11
|
+
### 2. Local-substitutable
|
|
12
|
+
|
|
13
|
+
Dependencies that have local test stand-ins (e.g., PGLite for Postgres, in-memory filesystem). Deepenable if the test substitute exists. The deepened module is tested with the local stand-in running in the test suite.
|
|
14
|
+
|
|
15
|
+
### 3. Remote but owned (Ports & Adapters)
|
|
16
|
+
|
|
17
|
+
Your own services across a network boundary (microservices, internal APIs). Define a port (interface) at the module boundary. The deep module owns the logic; the transport is injected. Tests use an in-memory adapter. Production uses the real HTTP/gRPC/queue adapter.
|
|
18
|
+
|
|
19
|
+
Recommendation shape: "Define a shared interface (port), implement an HTTP adapter for production and an in-memory adapter for testing, so the logic can be tested as one deep module even though it's deployed across a network boundary."
|
|
20
|
+
|
|
21
|
+
### 4. True external (Mock)
|
|
22
|
+
|
|
23
|
+
Third-party services (Stripe, Twilio, etc.) you don't control. Mock at the boundary. The deepened module takes the external dependency as an injected port, and tests provide a mock implementation.
|
|
24
|
+
|
|
25
|
+
## Testing Strategy
|
|
26
|
+
|
|
27
|
+
The core principle: **replace, don't layer.**
|
|
28
|
+
|
|
29
|
+
- Old unit tests on shallow modules are waste once boundary tests exist — delete them
|
|
30
|
+
- Write new tests at the deepened module's interface boundary
|
|
31
|
+
- Tests assert on observable outcomes through the public interface, not internal state
|
|
32
|
+
- Tests should survive internal refactors — they describe behavior, not implementation
|
|
33
|
+
|
|
34
|
+
## Issue Template
|
|
35
|
+
|
|
36
|
+
<issue-template>
|
|
37
|
+
|
|
38
|
+
## Problem
|
|
39
|
+
|
|
40
|
+
Describe the architectural friction:
|
|
41
|
+
|
|
42
|
+
- Which modules are shallow and tightly coupled
|
|
43
|
+
- What integration risk exists in the seams between them
|
|
44
|
+
- Why this makes the codebase harder to navigate and maintain
|
|
45
|
+
|
|
46
|
+
## Proposed Interface
|
|
47
|
+
|
|
48
|
+
The chosen interface design:
|
|
49
|
+
|
|
50
|
+
- Interface signature (types, methods, params)
|
|
51
|
+
- Usage example showing how callers use it
|
|
52
|
+
- What complexity it hides internally
|
|
53
|
+
|
|
54
|
+
## Dependency Strategy
|
|
55
|
+
|
|
56
|
+
Which category applies and how dependencies are handled:
|
|
57
|
+
|
|
58
|
+
- **In-process**: merged directly
|
|
59
|
+
- **Local-substitutable**: tested with [specific stand-in]
|
|
60
|
+
- **Ports & adapters**: port definition, production adapter, test adapter
|
|
61
|
+
- **Mock**: mock boundary for external services
|
|
62
|
+
|
|
63
|
+
## Testing Strategy
|
|
64
|
+
|
|
65
|
+
- **New boundary tests to write**: describe the behaviors to verify at the interface
|
|
66
|
+
- **Old tests to delete**: list the shallow module tests that become redundant
|
|
67
|
+
- **Test environment needs**: any local stand-ins or adapters required
|
|
68
|
+
|
|
69
|
+
## Implementation Recommendations
|
|
70
|
+
|
|
71
|
+
Durable architectural guidance that is NOT coupled to current file paths:
|
|
72
|
+
|
|
73
|
+
- What the module should own (responsibilities)
|
|
74
|
+
- What it should hide (implementation details)
|
|
75
|
+
- What it should expose (the interface contract)
|
|
76
|
+
- How callers should migrate to the new interface
|
|
77
|
+
|
|
78
|
+
</issue-template>
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: audit-architecture
|
|
3
|
+
description: Explore a codebase to find opportunities for architectural improvement, focusing on making the codebase more testable by deepening shallow modules. Use when user wants to improve architecture, find refactoring opportunities, consolidate tightly-coupled modules, or make a codebase more AI-navigable.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Improve Codebase Architecture
|
|
7
|
+
|
|
8
|
+
Explore a codebase like an AI would, surface architectural friction, discover opportunities for improving testability, and propose module-deepening refactors as GitHub issue RFCs.
|
|
9
|
+
|
|
10
|
+
A **deep module** (John Ousterhout, "A Philosophy of Software Design") has a small interface hiding a large implementation. Deep modules are more testable, more AI-navigable, and let you test at the boundary instead of inside.
|
|
11
|
+
|
|
12
|
+
## Process
|
|
13
|
+
|
|
14
|
+
### 1. Explore the codebase
|
|
15
|
+
|
|
16
|
+
Use the Agent tool with subagent_type=Explore to navigate the codebase naturally. Do NOT follow rigid heuristics — explore organically and note where you experience friction:
|
|
17
|
+
|
|
18
|
+
- Where does understanding one concept require bouncing between many small files?
|
|
19
|
+
- Where are modules so shallow that the interface is nearly as complex as the implementation?
|
|
20
|
+
- Where have pure functions been extracted just for testability, but the real bugs hide in how they're called?
|
|
21
|
+
- Where do tightly-coupled modules create integration risk in the seams between them?
|
|
22
|
+
- Which parts of the codebase are untested, or hard to test?
|
|
23
|
+
|
|
24
|
+
The friction you encounter IS the signal.
|
|
25
|
+
|
|
26
|
+
### 2. Present candidates
|
|
27
|
+
|
|
28
|
+
Present a numbered list of deepening opportunities. For each candidate, show:
|
|
29
|
+
|
|
30
|
+
- **Cluster**: Which modules/concepts are involved
|
|
31
|
+
- **Why they're coupled**: Shared types, call patterns, co-ownership of a concept
|
|
32
|
+
- **Dependency category**: See [REFERENCE.md](REFERENCE.md) for the four categories
|
|
33
|
+
- **Test impact**: What existing tests would be replaced by boundary tests
|
|
34
|
+
|
|
35
|
+
Do NOT propose interfaces yet. Ask the user: "Which of these would you like to explore?"
|
|
36
|
+
|
|
37
|
+
### 3. User picks a candidate
|
|
38
|
+
|
|
39
|
+
### 4. Frame the problem space
|
|
40
|
+
|
|
41
|
+
Before spawning sub-agents, write a user-facing explanation of the problem space for the chosen candidate:
|
|
42
|
+
|
|
43
|
+
- The constraints any new interface would need to satisfy
|
|
44
|
+
- The dependencies it would need to rely on
|
|
45
|
+
- A rough illustrative code sketch to make the constraints concrete — this is not a proposal, just a way to ground the constraints
|
|
46
|
+
|
|
47
|
+
Show this to the user, then immediately proceed to Step 5. The user reads and thinks about the problem while the sub-agents work in parallel.
|
|
48
|
+
|
|
49
|
+
### 5. Design multiple interfaces
|
|
50
|
+
|
|
51
|
+
Spawn 3+ sub-agents in parallel using the Agent tool. Each must produce a **radically different** interface for the deepened module.
|
|
52
|
+
|
|
53
|
+
Prompt each sub-agent with a separate technical brief (file paths, coupling details, dependency category, what's being hidden). This brief is independent of the user-facing explanation in Step 4. Give each agent a different design constraint:
|
|
54
|
+
|
|
55
|
+
- Agent 1: "Minimize the interface — aim for 1-3 entry points max"
|
|
56
|
+
- Agent 2: "Maximize flexibility — support many use cases and extension"
|
|
57
|
+
- Agent 3: "Optimize for the most common caller — make the default case trivial"
|
|
58
|
+
- Agent 4 (if applicable): "Design around the ports & adapters pattern for cross-boundary dependencies"
|
|
59
|
+
|
|
60
|
+
Each sub-agent outputs:
|
|
61
|
+
|
|
62
|
+
1. Interface signature (types, methods, params)
|
|
63
|
+
2. Usage example showing how callers use it
|
|
64
|
+
3. What complexity it hides internally
|
|
65
|
+
4. Dependency strategy (how deps are handled — see [REFERENCE.md](REFERENCE.md))
|
|
66
|
+
5. Trade-offs
|
|
67
|
+
|
|
68
|
+
Present designs sequentially, then compare them in prose.
|
|
69
|
+
|
|
70
|
+
After comparing, give your own recommendation: which design you think is strongest and why. If elements from different designs would combine well, propose a hybrid. Be opinionated — the user wants a strong read, not just a menu.
|
|
71
|
+
|
|
72
|
+
### 6. User picks an interface (or accepts recommendation)
|
|
73
|
+
|
|
74
|
+
### 7. Create GitHub issue
|
|
75
|
+
|
|
76
|
+
Create a refactor RFC as a GitHub issue using `gh issue create`. Use the template in [REFERENCE.md](REFERENCE.md). Do NOT ask the user to review before creating — just create it and share the URL.
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: explore-design
|
|
3
|
+
description: Generate multiple radically different interface designs for a module using parallel sub-agents. Use when user wants to design an API, explore interface options, compare module shapes, or mentions "design it twice".
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Design an Interface
|
|
7
|
+
|
|
8
|
+
Based on "Design It Twice" from "A Philosophy of Software Design": your first idea is unlikely to be the best. Generate multiple radically different designs, then compare.
|
|
9
|
+
|
|
10
|
+
## Workflow
|
|
11
|
+
|
|
12
|
+
### 1. Gather Requirements
|
|
13
|
+
|
|
14
|
+
Before designing, understand:
|
|
15
|
+
|
|
16
|
+
- [ ] What problem does this module solve?
|
|
17
|
+
- [ ] Who are the callers? (other modules, external users, tests)
|
|
18
|
+
- [ ] What are the key operations?
|
|
19
|
+
- [ ] Any constraints? (performance, compatibility, existing patterns)
|
|
20
|
+
- [ ] What should be hidden inside vs exposed?
|
|
21
|
+
|
|
22
|
+
Ask: "What does this module need to do? Who will use it?"
|
|
23
|
+
|
|
24
|
+
### 2. Generate Designs (Parallel Sub-Agents)
|
|
25
|
+
|
|
26
|
+
Spawn 3+ sub-agents simultaneously using Task tool. Each must produce a **radically different** approach.
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
Prompt template for each sub-agent:
|
|
30
|
+
|
|
31
|
+
Design an interface for: [module description]
|
|
32
|
+
|
|
33
|
+
Requirements: [gathered requirements]
|
|
34
|
+
|
|
35
|
+
Constraints for this design: [assign a different constraint to each agent]
|
|
36
|
+
- Agent 1: "Minimize method count - aim for 1-3 methods max"
|
|
37
|
+
- Agent 2: "Maximize flexibility - support many use cases"
|
|
38
|
+
- Agent 3: "Optimize for the most common case"
|
|
39
|
+
- Agent 4: "Take inspiration from [specific paradigm/library]"
|
|
40
|
+
|
|
41
|
+
Output format:
|
|
42
|
+
1. Interface signature (types/methods)
|
|
43
|
+
2. Usage example (how caller uses it)
|
|
44
|
+
3. What this design hides internally
|
|
45
|
+
4. Trade-offs of this approach
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### 3. Present Designs
|
|
49
|
+
|
|
50
|
+
Show each design with:
|
|
51
|
+
|
|
52
|
+
1. **Interface signature** - types, methods, params
|
|
53
|
+
2. **Usage examples** - how callers actually use it in practice
|
|
54
|
+
3. **What it hides** - complexity kept internal
|
|
55
|
+
|
|
56
|
+
Present designs sequentially so user can absorb each approach before comparison.
|
|
57
|
+
|
|
58
|
+
### 4. Compare Designs
|
|
59
|
+
|
|
60
|
+
After showing all designs, compare them on:
|
|
61
|
+
|
|
62
|
+
- **Interface simplicity**: fewer methods, simpler params
|
|
63
|
+
- **General-purpose vs specialized**: flexibility vs focus
|
|
64
|
+
- **Implementation efficiency**: does shape allow efficient internals?
|
|
65
|
+
- **Depth**: small interface hiding significant complexity (good) vs large interface with thin implementation (bad)
|
|
66
|
+
- **Ease of correct use** vs **ease of misuse**
|
|
67
|
+
|
|
68
|
+
Discuss trade-offs in prose, not tables. Highlight where designs diverge most.
|
|
69
|
+
|
|
70
|
+
### 5. Synthesize
|
|
71
|
+
|
|
72
|
+
Often the best design combines insights from multiple options. Ask:
|
|
73
|
+
|
|
74
|
+
- "Which design best fits your primary use case?"
|
|
75
|
+
- "Any elements from other designs worth incorporating?"
|
|
76
|
+
|
|
77
|
+
## Evaluation Criteria
|
|
78
|
+
|
|
79
|
+
From "A Philosophy of Software Design":
|
|
80
|
+
|
|
81
|
+
**Interface simplicity**: Fewer methods, simpler params = easier to learn and use correctly.
|
|
82
|
+
|
|
83
|
+
**General-purpose**: Can handle future use cases without changes. But beware over-generalization.
|
|
84
|
+
|
|
85
|
+
**Implementation efficiency**: Does interface shape allow efficient implementation? Or force awkward internals?
|
|
86
|
+
|
|
87
|
+
**Depth**: Small interface hiding significant complexity = deep module (good). Large interface with thin implementation = shallow module (avoid).
|
|
88
|
+
|
|
89
|
+
## Anti-Patterns
|
|
90
|
+
|
|
91
|
+
- Don't let sub-agents produce similar designs - enforce radical difference
|
|
92
|
+
- Don't skip comparison - the value is in contrast
|
|
93
|
+
- Don't implement - this is purely about interface shape
|
|
94
|
+
- Don't evaluate based on implementation effort
|