sandcastle-drain 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +108 -0
  3. package/dist/cli.d.ts +3 -0
  4. package/dist/cli.d.ts.map +1 -0
  5. package/dist/cli.js +139 -0
  6. package/dist/cli.js.map +1 -0
  7. package/dist/content/agent-docs/issue-tracker.md +22 -0
  8. package/dist/content/agent-docs/sandcastle-windows-cleanup.md +45 -0
  9. package/dist/content/agent-docs/triage-labels.md +101 -0
  10. package/dist/content/principles/README.md +39 -0
  11. package/dist/content/principles/architecture.md +124 -0
  12. package/dist/content/principles/claude-code-modes.md +47 -0
  13. package/dist/content/principles/clean-code.md +102 -0
  14. package/dist/content/principles/context-budget.md +81 -0
  15. package/dist/content/principles/cqrs.md +70 -0
  16. package/dist/content/principles/domain-modeling.md +62 -0
  17. package/dist/content/principles/frontend-organization.md +120 -0
  18. package/dist/content/principles/language-and-types.md +85 -0
  19. package/dist/content/principles/linting-and-tooling.md +122 -0
  20. package/dist/content/principles/personal-use-tradeoffs.md +55 -0
  21. package/dist/content/principles/testing.md +89 -0
  22. package/dist/orchestrator/blocked-by.d.ts +17 -0
  23. package/dist/orchestrator/blocked-by.d.ts.map +1 -0
  24. package/dist/orchestrator/blocked-by.js +48 -0
  25. package/dist/orchestrator/blocked-by.js.map +1 -0
  26. package/dist/orchestrator/ci-gate.d.ts +28 -0
  27. package/dist/orchestrator/ci-gate.d.ts.map +1 -0
  28. package/dist/orchestrator/ci-gate.js +198 -0
  29. package/dist/orchestrator/ci-gate.js.map +1 -0
  30. package/dist/orchestrator/main.d.ts +10 -0
  31. package/dist/orchestrator/main.d.ts.map +1 -0
  32. package/dist/orchestrator/main.js +883 -0
  33. package/dist/orchestrator/main.js.map +1 -0
  34. package/dist/orchestrator/prereqs.d.ts +30 -0
  35. package/dist/orchestrator/prereqs.d.ts.map +1 -0
  36. package/dist/orchestrator/prereqs.js +191 -0
  37. package/dist/orchestrator/prereqs.js.map +1 -0
  38. package/dist/orchestrator/rejection.d.ts +60 -0
  39. package/dist/orchestrator/rejection.d.ts.map +1 -0
  40. package/dist/orchestrator/rejection.js +187 -0
  41. package/dist/orchestrator/rejection.js.map +1 -0
  42. package/dist/orchestrator/reviewer.d.ts +75 -0
  43. package/dist/orchestrator/reviewer.d.ts.map +1 -0
  44. package/dist/orchestrator/reviewer.js +260 -0
  45. package/dist/orchestrator/reviewer.js.map +1 -0
  46. package/dist/orchestrator/ship.d.ts +19 -0
  47. package/dist/orchestrator/ship.d.ts.map +1 -0
  48. package/dist/orchestrator/ship.js +73 -0
  49. package/dist/orchestrator/ship.js.map +1 -0
  50. package/dist/orchestrator/sibling-context.d.ts +16 -0
  51. package/dist/orchestrator/sibling-context.d.ts.map +1 -0
  52. package/dist/orchestrator/sibling-context.js +61 -0
  53. package/dist/orchestrator/sibling-context.js.map +1 -0
  54. package/dist/orchestrator/splits.d.ts +60 -0
  55. package/dist/orchestrator/splits.d.ts.map +1 -0
  56. package/dist/orchestrator/splits.js +149 -0
  57. package/dist/orchestrator/splits.js.map +1 -0
  58. package/dist/orchestrator/status.d.ts +13 -0
  59. package/dist/orchestrator/status.d.ts.map +1 -0
  60. package/dist/orchestrator/status.js +43 -0
  61. package/dist/orchestrator/status.js.map +1 -0
  62. package/dist/orchestrator/summary.d.ts +33 -0
  63. package/dist/orchestrator/summary.d.ts.map +1 -0
  64. package/dist/orchestrator/summary.js +59 -0
  65. package/dist/orchestrator/summary.js.map +1 -0
  66. package/dist/orchestrator/sweep.d.ts +18 -0
  67. package/dist/orchestrator/sweep.d.ts.map +1 -0
  68. package/dist/orchestrator/sweep.js +79 -0
  69. package/dist/orchestrator/sweep.js.map +1 -0
  70. package/dist/orchestrator/teardown.d.ts +12 -0
  71. package/dist/orchestrator/teardown.d.ts.map +1 -0
  72. package/dist/orchestrator/teardown.js +42 -0
  73. package/dist/orchestrator/teardown.js.map +1 -0
  74. package/dist/orchestrator/worktree-cleanup.d.ts +2 -0
  75. package/dist/orchestrator/worktree-cleanup.d.ts.map +1 -0
  76. package/dist/orchestrator/worktree-cleanup.js +39 -0
  77. package/dist/orchestrator/worktree-cleanup.js.map +1 -0
  78. package/dist/prompts/implementer.md.tpl +85 -0
  79. package/dist/prompts/reviewer.md.tpl +118 -0
  80. package/dist/render-prompt.d.ts +22 -0
  81. package/dist/render-prompt.d.ts.map +1 -0
  82. package/dist/render-prompt.js +64 -0
  83. package/dist/render-prompt.js.map +1 -0
  84. package/dist/stage.d.ts +43 -0
  85. package/dist/stage.d.ts.map +1 -0
  86. package/dist/stage.js +105 -0
  87. package/dist/stage.js.map +1 -0
  88. package/docker/Dockerfile +42 -0
  89. package/package.json +48 -0
@@ -0,0 +1,85 @@
1
+ # Language and types
2
+
3
+ TypeScript, run on Node, with strict-mode flags maxed and Zod parsing every external boundary.
4
+
5
+ ## Why TS strict + Zod, not Effect-ts
6
+
7
+ Effect-ts is a stronger guarantee — its type system can refuse to compile code that violates architectural invariants the way TS+Zod cannot. We chose against it for this project because:
8
+
9
+ - The user is not yet familiar with Effect's `Layer` / `Schema` / `Effect` idioms; learning them alongside building a complex agent + experimenting with memory architectures would compound learning curves.
10
+ - For a single-operator personal project, "the lint rules + pre-commit hook + ADR review enforces the architecture" is good enough — _if_ the lint rules and pre-commit hook are taken seriously.
11
+
12
+ The trade-off: with Effect, the _compiler_ refuses to let you violate the architecture. With TS strict + Zod + ESLint, the _project_ refuses, but only because we wired up the rules to enforce it. If you `// @ts-expect-error` your way past a problem or skip a lint rule, the system rots. **Effect makes that rot impossible; we make it visible.**
13
+
14
+ Re-examine this choice in an ADR if Effect-ts familiarity grows or if architectural drift becomes painful.
15
+
16
+ ## tsconfig strictness
17
+
18
+ The project's `tsconfig.json` sets _every_ strictness flag worth setting:
19
+
20
+ ```jsonc
21
+ {
22
+ "compilerOptions": {
23
+ "strict": true,
24
+ "noUncheckedIndexedAccess": true,
25
+ "exactOptionalPropertyTypes": true,
26
+ "noImplicitOverride": true,
27
+ "noFallthroughCasesInSwitch": true,
28
+ "noPropertyAccessFromIndexSignature": true,
29
+ "noImplicitReturns": true,
30
+ "useUnknownInCatchVariables": true,
31
+ },
32
+ }
33
+ ```
34
+
35
+ These catch whole classes of agent-introduced bugs. Expect lint failures often early on. **Soften specific rules only if a real false positive shows up — never globally relax for convenience.**
36
+
37
+ ## Branded types for every domain identifier and value object
38
+
39
+ ```ts
40
+ // in packages/domain/aggregates/order/order-id.ts
41
+ export type OrderId = string & { readonly _brand: 'OrderId' };
42
+
43
+ export const OrderId = (raw: string): Result<OrderId, ParseError> => {
44
+ // validation here
45
+ };
46
+ ```
47
+
48
+ - No raw `string` IDs cross a function boundary, ever.
49
+ - The factory in `packages/domain` is the _only_ place a brand is minted; consumers receive it pre-validated.
50
+ - Branded value objects (e.g. `EmailAddress`, `MoneyUSD`, `Url`) follow the same pattern.
51
+
52
+ ## Zod at every boundary
53
+
54
+ Anything entering the system from outside our type-checked code gets Zod-parsed. **Parse, don't validate** — the schema generates the type via `z.infer`, so the runtime check and the static type can never disagree.
55
+
56
+ Boundaries that require Zod parsing:
57
+
58
+ - LLM output (every structured agent response)
59
+ - Database row (every read from a persistent store)
60
+ - Web fetch (LLM tool-use results, HTTP responses, JSON APIs)
61
+ - HTTP request body / query / params
62
+ - Config files / environment variables at startup
63
+ - File reads where shape matters
64
+
65
+ The schemas live next to the code that uses them, in the layer that owns the boundary (External adapters own the persistence schemas; the API owns the HTTP schemas; etc.).
66
+
67
+ ## Tagged unions for fallible operations in the domain layer
68
+
69
+ `throw` is reserved for _bug, kill the process_ — not for control flow. Domain functions that can fail return:
70
+
71
+ ```ts
72
+ type Result<T, E> = { tag: 'ok'; value: T } | { tag: 'err'; error: E };
73
+ ```
74
+
75
+ A custom ESLint rule (`local/no-throw-in-domain`, see [linting-and-tooling.md](linting-and-tooling.md)) bans `throw` inside `packages/domain/`. The `application` layer can lift `Result` into thrown errors at the API edge if it wants — but the domain itself stays totally exception-free.
76
+
77
+ Match on `tag` exhaustively (TS strict + `noFallthroughCasesInSwitch` + `@typescript-eslint/switch-exhaustiveness-check` covers this). A new `tag` variant that isn't handled is a compile error.
78
+
79
+ ## What this enables
80
+
81
+ - Every dataflow has a `Schema` parse at its boundary — untyped or string-typed agent outputs are a recipe for silent wrong answers.
82
+ - Branded IDs prevent accidental cross-aggregate mixing (an `OrderId` cannot be silently passed where a `CustomerId` is expected).
83
+ - Tagged-union results force callers to handle both success and failure paths at compile time.
84
+
85
+ The rest of the architecture (see [architecture.md](architecture.md), [domain-modeling.md](domain-modeling.md)) is built on top of these primitives.
@@ -0,0 +1,122 @@
1
+ # Linting and tooling
2
+
3
+ ESLint + typescript-eslint + Prettier + custom local rules. Pre-commit hook via Husky + lint-staged. No `--no-verify` ever.
4
+
5
+ ## Why ESLint, not Biome
6
+
7
+ Three reasons we picked ESLint:
8
+
9
+ 1. **`eslint-plugin-boundaries` is non-negotiable.** Our entire onion architecture (see [architecture.md](architecture.md)) relies on a real boundary-enforcement plugin. Biome does not have an equivalent. Without lint-enforced boundaries, the layers become honor-system, and we already decided we cannot afford honor-system on architecture rules in autonomous sandcastle-drain runs.
10
+ 2. **Custom rules are easier in ESLint.** We need three (`no-throw-in-domain`, `no-anemic-aggregate`, `domain-names-match-context-md`). In ESLint these are 50–150 lines of TypeScript each; in Biome they require GritQL or Rust.
11
+ 3. **Speed isn't the bottleneck on a personal project.** Biome's main pitch is speed at scale. With ~50–100 source files, ESLint's 10–30s runs are fine. `oxlint` can be added as a fast pre-pass later if it becomes annoying.
12
+
13
+ Re-examine in an ADR if Biome's plugin ecosystem catches up or our project size grows past ESLint's comfort zone.
14
+
15
+ ## Plugin set
16
+
17
+ ```jsonc
18
+ {
19
+ "plugins": [
20
+ "@typescript-eslint",
21
+ "boundaries", // onion-direction
22
+ "functional", // immutability, no let, readonly fields
23
+ "unicorn", // sane defaults beyond ESLint core
24
+ "vitest", // test-file rules
25
+ ],
26
+ "extends": [
27
+ "eslint:recommended",
28
+ "plugin:@typescript-eslint/strict-type-checked",
29
+ "plugin:@typescript-eslint/stylistic-type-checked",
30
+ ],
31
+ }
32
+ ```
33
+
34
+ ## Specific rules to enable
35
+
36
+ ```jsonc
37
+ {
38
+ "@typescript-eslint/no-floating-promises": "error",
39
+ "@typescript-eslint/no-misused-promises": "error",
40
+ "@typescript-eslint/strict-boolean-expressions": "error",
41
+ "@typescript-eslint/switch-exhaustiveness-check": "error",
42
+ "@typescript-eslint/consistent-type-imports": "error",
43
+ "@typescript-eslint/no-unnecessary-condition": "error",
44
+ "@typescript-eslint/no-throw-literal": "error",
45
+
46
+ "max-depth": ["error", 3],
47
+ "complexity": ["error", 10],
48
+ "max-lines-per-function": ["error", { "max": 50, "skipBlankLines": true, "skipComments": true }],
49
+ "max-params": ["error", 4],
50
+ }
51
+ ```
52
+
53
+ The four `max-*` / `complexity` rules are the structural enforcement of the small-functions rule in [clean-code.md](clean-code.md). Thresholds are intentionally tight; loosen one rule for one function with a `// eslint-disable-next-line` comment that explains why, never globally.
54
+
55
+ ## Boundary enforcement
56
+
57
+ ```jsonc
58
+ "boundaries/element-types": ["error", {
59
+ "default": "disallow",
60
+ "rules": [
61
+ { "from": "domain", "allow": ["domain"] },
62
+ { "from": "application", "allow": ["domain", "application"] },
63
+ { "from": "external", "allow": ["domain", "application", "external"] },
64
+ { "from": "presentation", "allow": ["*"] }
65
+ ]
66
+ }]
67
+ ```
68
+
69
+ `boundaries/elements` maps folders to ring labels. Any inner-from-outer import is a hard error. No exemptions, no `// eslint-disable`. If a violation feels necessary, the right move is to define a port in `packages/application/`.
70
+
71
+ ## Three custom local rules
72
+
73
+ Live in `tools/eslint-rules/` (TypeScript, simple AST walks). Loaded via `eslint-plugin-local-rules` or equivalent.
74
+
75
+ ### `local/no-throw-in-domain`
76
+
77
+ Bans `throw` (statement and expression) inside `packages/domain/**`. Domain functions return `Result<T, E>` per [language-and-types.md](language-and-types.md). `throw` is reserved for "bug, kill the process" and only appears at the application boundary or above.
78
+
79
+ ### `local/no-anemic-aggregate`
80
+
81
+ Fails any class exported from `packages/domain/aggregates/**` whose body has only a constructor and getters with no behavioral methods. Aggregates carry their invariants in their behavior; a data-only class belongs in `packages/domain/dtos/` instead. DTOs are explicitly exempt.
82
+
83
+ ### `local/domain-names-match-context-md`
84
+
85
+ Parses `CONTEXT.md` headings and fails any export from `packages/domain/aggregates/` or `packages/domain/value-objects/` whose name does not appear there. Silent until `CONTEXT.md` is populated — the template ships with a stub.
86
+
87
+ ## Prettier
88
+
89
+ Formatting only. `eslint-config-prettier` disables the stylistic rules that overlap with Prettier so the two never argue. Run via the same pre-commit hook.
90
+
91
+ ```jsonc
92
+ // .prettierrc
93
+ {
94
+ "singleQuote": true,
95
+ "trailingComma": "all",
96
+ "printWidth": 100,
97
+ "tabWidth": 2,
98
+ }
99
+ ```
100
+
101
+ ## Pre-commit hook (Husky + lint-staged)
102
+
103
+ `.husky/pre-commit` runs:
104
+
105
+ 1. `pnpm tsc --noEmit` (or `npm run typecheck`) — full project typecheck
106
+ 2. `lint-staged` — ESLint + Prettier on changed files
107
+ 3. `vitest related --run` on changed test files
108
+ 4. **In `packages/domain/`**: coverage check (90% gate, 95% target — line + branch)
109
+
110
+ **Never `--no-verify`.** If a hook fails, fix the underlying issue. The global CLAUDE.md already records this rule; it stays here too because it's load-bearing for autonomous sandcastle-drain runs where there's no human reviewer to catch a skipped hook.
111
+
112
+ If a hook is genuinely too slow (e.g. > 30s on small commits), the right move is to make the hook faster (cache TS server, scope ESLint to staged files, reduce Vitest's "related" scope) — _not_ to skip it.
113
+
114
+ ## Strictness expectation
115
+
116
+ `@typescript-eslint/strict-type-checked` is genuinely strict — it catches `if (someString)` ambiguity, requires explicit `boolean` checks, etc. **Strict from day one. Soften specific rules only if a real false positive shows up — never globally relax for convenience.** Expect lint failures often early on; treat each as feedback, not friction.
117
+
118
+ ## What lives outside this doc
119
+
120
+ - The actual `.eslintrc.*` and `tsconfig.json` files don't exist yet — they're queued as follow-up sandcastle-drain issues. This doc is the spec the issue implements.
121
+ - The custom rule implementations live in `tools/eslint-rules/`, also queued as follow-up issues.
122
+ - `pg_dump` / backup scheduling lives in [personal-use-tradeoffs.md](personal-use-tradeoffs.md), not here.
@@ -0,0 +1,55 @@
1
+ # Personal-use trade-offs
2
+
3
+ This is a personal-use, single-operator, attended product. Some standards relax; others stay tight. The dividing line:
4
+
5
+ > **Standards relax where mistakes cost only your time.**
6
+ > **Standards stay tight where mistakes corrupt your data or your decisions.**
7
+
8
+ A lazy "personal use means skip everything" reading is wrong. Domain bugs corrupt your data. Type drift produces silent wrong answers from the agent. None of those are negotiable just because there's only one user.
9
+
10
+ ## Relaxed
11
+
12
+ | Area | What that means concretely |
13
+ | ------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
14
+ | **UI design polish** | Functional > beautiful. No design system, no theme tokens. Tailwind utilities directly. Desktop-only — no responsive layouts. No animations. |
15
+ | **UI a11y** | Not enforced. Use semantic HTML by default but no audit, no a11y tests. |
16
+ | **Auth** | **Localhost-only by default.** API binds to `127.0.0.1`. No user accounts, passwords, OAuth, JWT. If you ever need access from another device, tunnel via Tailscale or SSH port-forward — don't add auth code to this repo. |
17
+ | **Onboarding / settings / preferences UI** | Skip. Edit a config file. |
18
+ | **i18n / l10n** | Skip. English only. |
19
+ | **Performance budgets** | "Fast enough not to annoy you." No SLOs. No premature caching. LLM calls taking 10s is fine. |
20
+ | **Observability** | Plain `console.log` + Sandcastle's own log files. No structured logging service, no tracing, no metrics dashboards. |
21
+ | **Multi-tenancy / RBAC** | The system has one user. Database role is single-user; no row-level security policies, no per-tenant scoping. |
22
+ | **CI/CD pipeline** | None. Tests run via the pre-commit hook locally. Deploy is manual (`git pull && pnpm install && systemd restart`, or whatever you actually use). |
23
+ | **Marketing / docs site** | None. |
24
+ | **End-user error messages** | Stack traces are fine. You are the user. |
25
+
26
+ ## NOT relaxed
27
+
28
+ | Area | Why it stays tight |
29
+ | ------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
30
+ | **Domain correctness** | A bug in your core state transitions or derived calculations means you make a real decision on wrong data. Domain layer 90% coverage gate / 95% target (line + branch); property-based tests on state machines and math; branded types; no anemic models. See [testing.md](testing.md), [domain-modeling.md](domain-modeling.md). |
31
+ | **Type safety** | TS strict + Zod at every boundary, fully enforced. Personal use does not mean `any` is OK. |
32
+ | **Architecture boundaries** | Onion enforced via `eslint-plugin-boundaries`. No "I'll skip the layer this once" exemptions. |
33
+ | **Token budget** | 100k target / 150k ceiling, mechanically measured per-run. Oversized issues labeled. See [context-budget.md](context-budget.md). |
34
+ | **Secrets handling** | API keys in `.env`, `.env` gitignored, no keys in logs. The personal project still talks to external APIs — keys do not leak. |
35
+ | **Backups of persistent state** | **Required.** The persistent data store is the irreplaceable artifact of this system. `pg_dump` cron + versioned snapshots; any wiki-style or filesystem-projected state is git-versioned. _This is the strongest "not relaxed" — losing your data undoes the project._ |
36
+ | **Pre-commit hooks** | Run on every commit, no `--no-verify`. The agent will try to skip them in tight loops; the hook is a hard block. |
37
+
38
+ ## Tech-selection rule
39
+
40
+ > **Default to free / open-source for libraries, runtimes, databases, and self-hostable services.** Pay for managed cloud / proprietary services only when **(a)** the open alternative would burn a meaningful fraction of build time on operating it, or **(b)** the proprietary option is meaningfully better at the specific job.
41
+ >
42
+ > **Each paid service requires a one-page ADR** in `docs/adr/` justifying the exception against an OSS alternative.
43
+
44
+ The ADR requirement keeps the rule from being silent. Every paid dependency has a written "why this over the OSS alternative" that future-you can re-examine when costs shift.
45
+
46
+ ## Backups: this is operational, but it's also a principle
47
+
48
+ Treating backups as an _operational concern_ would mean the agent can deprioritize them. Treating them as a _principle_ keeps them in the active rule set the agent reads. Given that losing the persistent data store undoes the project, this belongs in principles.
49
+
50
+ Concretely:
51
+
52
+ - `pg_dump` (or equivalent) runs on a host cron, output to a versioned snapshot dir (e.g. `~/backup/<dbname>/YYYY-MM-DD.sql.gz`).
53
+ - Retention: at minimum, daily for 14 days + monthly for 12 months.
54
+ - A restore drill quarterly: pick a snapshot, restore into a scratch container, verify it loads. If you've never restored, you don't have a backup.
55
+ - Specific cron timing, retention details, and offsite mirror policy live in an ADR (queued as a follow-up issue once the database is actually running).
@@ -0,0 +1,89 @@
1
+ # Testing
2
+
3
+ Vitest for unit + integration. Playwright for the chat-UI E2E flow. `fast-check` for property-based tests on the domain layer. Coverage targets and enforcement vary per layer — flat targets produce ritual tests where they don't matter and miss coverage where they do.
4
+
5
+ ## Runner choices
6
+
7
+ - **Vitest** for unit + integration. TS+ESM-native, fast, watch mode, parallel workers, integrates with `fast-check`.
8
+ - **Playwright Test** for the chat-UI E2E flow. Already implied by the sandcastle-drain Dockerfile baking in Playwright + Chromium.
9
+ - **`node:test`** is available but not used; Vitest's DX (built-in expect, watch UI, snapshot) wins for the costs of one extra dep.
10
+
11
+ ## Per-layer coverage policy
12
+
13
+ | Layer | Coverage target | Test type | Enforcement |
14
+ | ---------------------- | ------------------------------------------ | -------------------------------------------------------- | ------------------------------------------- |
15
+ | `packages/domain` | **≥ 90% gate, 95% target (line + branch)** | Pure unit; property-based for state machines and math | **Gated** — pre-commit / CI fails below 90% |
16
+ | `packages/application` | **≥ 85% line** | Use-case tests with in-memory port stubs | **Report-only** — printed, not gated |
17
+ | `packages/external/*` | **No coverage target** | Integration tests against real services where reasonable | Tests gated to pass; coverage not enforced |
18
+ | `apps/api` | Smoke + Zod contract tests | Endpoint smoke, schema round-trips | Tests gated to pass |
19
+ | `apps/ui` | **No coverage target** | A few Playwright smoke tests on the golden chat flow | Smoke tests gated to pass |
20
+ | `apps/agent` | Composition root, mostly untested | Wiring integration test only | — |
21
+
22
+ The 90%-gated / 95%-target rule on `packages/domain` is the load-bearing one. The domain layer is where bugs cause silent wrong answers, and where the agent is most likely to skip tests without something forcing them. The 5% gap between gate and target is deliberate: 95% as a hard gate forces ritual tests on trivial getters and coverage-of-coverage games that corrupt the discipline; 90% as a hard gate stays tight on real behavior. Aim for 95%; the commit fails below 90%.
23
+
24
+ ## Property-based testing on the domain
25
+
26
+ Required for state machines and pure math in `packages/domain`; encouraged elsewhere; optional in adapters.
27
+
28
+ Use `fast-check` to express invariants like:
29
+
30
+ ```ts
31
+ import fc from 'fast-check';
32
+ import { test } from 'vitest';
33
+
34
+ test('transition to shipped preserves prior status as snapshot', () => {
35
+ fc.assert(
36
+ fc.property(arbOrder(), arbShipmentEvent(), (order, event) => {
37
+ const before = order.status;
38
+ order.ship(event);
39
+ expect(before).toMatchInlineSnapshot(/* ... */);
40
+ }),
41
+ );
42
+ });
43
+ ```
44
+
45
+ The cost is real (property tests take longer to write than examples) but for state machines, derived math, and aggregations — the parts of the domain where bugs cause silent wrong answers — property tests pay back many times over.
46
+
47
+ ## Integration tests: real services or recorded fixtures
48
+
49
+ | Service | Strategy | Why |
50
+ | ------------------------------------------------------- | -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
51
+ | **Postgres / databases** | **Testcontainers** | Real schema, real migrations, real query plans. Cheap and fast in a container. |
52
+ | **LLM APIs (Anthropic, OpenAI, etc.)** | **Recorded fixtures** with explicit re-record flag | Calls cost money and take seconds. Replay deterministically; re-record when the agent's prompt changes. |
53
+ | **Other paid APIs (embeddings, web search, etc.)** | **Recorded fixtures** with explicit re-record flag | Same reasoning. |
54
+ | **HTTP GET against arbitrary URLs** | **Recorded fixtures** keyed by URL | Real fetches are non-deterministic (paywalls, redirects, content drift); fixtures pin the bytes for stable test behavior. |
55
+ | **Filesystem** | Real, in a tmp directory | No reason to mock; filesystem is fast. |
56
+
57
+ **No mocking the database.** A recurring failure mode: mocked DB tests pass while a real production migration breaks. Integration tests for your persistence adapters hit a real database in testcontainers.
58
+
59
+ ## Behavior-required test rule
60
+
61
+ Tests are required when introducing testable _behavior_:
62
+
63
+ - A new domain function
64
+ - A new invariant
65
+ - A new state-machine transition
66
+ - A new use-case orchestration
67
+ - A new parser, schema, or math routine
68
+
69
+ Tests are **not** required for:
70
+
71
+ - Type-only changes
72
+ - Dependency bumps
73
+ - Comment / doc edits
74
+ - Config tweaks
75
+ - UI cosmetic changes
76
+ - Log-message changes
77
+
78
+ This inherits verbatim from `src/prompts/implementer.md.tpl`'s existing rule. The rationale: forcing tests on type-only changes produces theater tests that pass for the wrong reason and rot when the type changes.
79
+
80
+ ## Test file layout
81
+
82
+ - Tests colocated with code: `foo.ts` + `foo.test.ts` in the same folder.
83
+ - Integration tests in `*.integration.test.ts` (separate suite, slower, can be `--exclude`'d in dev loop).
84
+ - E2E tests in `apps/ui/e2e/*.spec.ts` (Playwright's convention).
85
+ - `vitest.config.ts` at the workspace root configures per-package projects with the right thresholds.
86
+
87
+ ## What to do when a test would be obviously theater
88
+
89
+ Don't write it. Instead, add a one-line note in the PR / commit explaining the work has no testable behavior. The pre-commit hook does not require _every_ change to add tests — it requires the test suite to _pass_ and (in the domain) coverage thresholds to hold. If you remove a tested behavior, remove the test in the same commit.
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Parses the `## Blocked by` section of a GitHub issue body and returns the
3
+ * referenced issue numbers. The wrapper uses this to skip dependents of an
4
+ * issue that failed to land in the current drain run.
5
+ *
6
+ * Convention (from src/content/agent-docs/issue-tracker.md and existing issue bodies):
7
+ *
8
+ * ## Blocked by
9
+ *
10
+ * - #33 (short reference)
11
+ * - [#34](https://github.com/owner/repo/issues/34) (markdown link form)
12
+ *
13
+ * The section ends at the next markdown heading or EOF. References anywhere
14
+ * else in the body are ignored.
15
+ */
16
+ export declare function parseBlockedBy(body: string): number[];
17
+ //# sourceMappingURL=blocked-by.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"blocked-by.d.ts","sourceRoot":"","sources":["../../src/orchestrator/blocked-by.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAMH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CA4BrD"}
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Parses the `## Blocked by` section of a GitHub issue body and returns the
3
+ * referenced issue numbers. The wrapper uses this to skip dependents of an
4
+ * issue that failed to land in the current drain run.
5
+ *
6
+ * Convention (from src/content/agent-docs/issue-tracker.md and existing issue bodies):
7
+ *
8
+ * ## Blocked by
9
+ *
10
+ * - #33 (short reference)
11
+ * - [#34](https://github.com/owner/repo/issues/34) (markdown link form)
12
+ *
13
+ * The section ends at the next markdown heading or EOF. References anywhere
14
+ * else in the body are ignored.
15
+ */
16
+ const HEADING_RE = /^##\s+blocked\s+by\b/i;
17
+ const ANY_HEADING_RE = /^#{1,6}\s/;
18
+ const REF_RE = /#(\d+)/g;
19
+ export function parseBlockedBy(body) {
20
+ if (!body)
21
+ return [];
22
+ const lines = body.split(/\r?\n/);
23
+ let start = -1;
24
+ for (let i = 0; i < lines.length; i++) {
25
+ if (HEADING_RE.test(lines[i])) {
26
+ start = i + 1;
27
+ break;
28
+ }
29
+ }
30
+ if (start === -1)
31
+ return [];
32
+ let end = lines.length;
33
+ for (let i = start; i < lines.length; i++) {
34
+ if (ANY_HEADING_RE.test(lines[i])) {
35
+ end = i;
36
+ break;
37
+ }
38
+ }
39
+ const section = lines.slice(start, end).join('\n');
40
+ const numbers = new Set();
41
+ for (const m of section.matchAll(REF_RE)) {
42
+ const n = Number(m[1]);
43
+ if (Number.isFinite(n) && n > 0)
44
+ numbers.add(n);
45
+ }
46
+ return [...numbers].sort((a, b) => a - b);
47
+ }
48
+ //# sourceMappingURL=blocked-by.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"blocked-by.js","sourceRoot":"","sources":["../../src/orchestrator/blocked-by.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,MAAM,UAAU,GAAG,uBAAuB,CAAC;AAC3C,MAAM,cAAc,GAAG,WAAW,CAAC;AACnC,MAAM,MAAM,GAAG,SAAS,CAAC;AAEzB,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAElC,IAAI,KAAK,GAAG,CAAC,CAAC,CAAC;IACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9B,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,MAAM;QACR,CAAC;IACH,CAAC;IACD,IAAI,KAAK,KAAK,CAAC,CAAC;QAAE,OAAO,EAAE,CAAC;IAE5B,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,IAAI,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAClC,GAAG,GAAG,CAAC,CAAC;YACR,MAAM;QACR,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACzC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACvB,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAClD,CAAC;IACD,OAAO,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;AAC5C,CAAC"}
@@ -0,0 +1,28 @@
1
+ export type PackageManager = 'npm' | 'pnpm' | 'yarn';
2
+ export type CiCheck = 'install' | 'typecheck' | 'lint' | 'test';
3
+ export interface CiCheckRun {
4
+ check: CiCheck;
5
+ exitCode: number;
6
+ output: string;
7
+ }
8
+ export interface CiGateResult {
9
+ ok: boolean;
10
+ failedCheck?: CiCheck;
11
+ runs: CiCheckRun[];
12
+ logPath: string;
13
+ packageManager: PackageManager;
14
+ }
15
+ export declare function determineCiOk(runs: readonly CiCheckRun[]): {
16
+ ok: boolean;
17
+ failedCheck?: CiCheck;
18
+ };
19
+ export declare function needsInstall(dir: string, pm: PackageManager): boolean;
20
+ export declare function detectPackageManager(repoRoot: string): PackageManager;
21
+ export declare function formatCiSection(result: CiGateResult): string;
22
+ export declare function runCiGate(args: {
23
+ issue: number;
24
+ branch: string;
25
+ repoRoot: string;
26
+ worktreePath: string;
27
+ }): Promise<CiGateResult>;
28
+ //# sourceMappingURL=ci-gate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ci-gate.d.ts","sourceRoot":"","sources":["../../src/orchestrator/ci-gate.ts"],"names":[],"mappings":"AAwBA,MAAM,MAAM,cAAc,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,CAAC;AAErD,MAAM,MAAM,OAAO,GAAG,SAAS,GAAG,WAAW,GAAG,MAAM,GAAG,MAAM,CAAC;AAEhE,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,OAAO,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,OAAO,CAAC;IACZ,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,IAAI,EAAE,UAAU,EAAE,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,cAAc,CAAC;CAChC;AAID,wBAAgB,aAAa,CAAC,IAAI,EAAE,SAAS,UAAU,EAAE,GAAG;IAC1D,EAAE,EAAE,OAAO,CAAC;IACZ,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB,CAKA;AAQD,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE,cAAc,GAAG,OAAO,CAKrE;AAOD,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,CAkBrE;AAsBD,wBAAgB,eAAe,CAAC,MAAM,EAAE,YAAY,GAAG,MAAM,CAsB5D;AA4DD,wBAAsB,SAAS,CAAC,IAAI,EAAE;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,YAAY,CAAC,CA6CxB"}