pysolated 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. pysolated-0.1.0/.claude/settings.local.json +22 -0
  2. pysolated-0.1.0/.claude/skills/diagnose +1 -0
  3. pysolated-0.1.0/.claude/skills/find-skills +1 -0
  4. pysolated-0.1.0/.claude/skills/grill-me +1 -0
  5. pysolated-0.1.0/.claude/skills/grill-with-docs +1 -0
  6. pysolated-0.1.0/.claude/skills/handoff +1 -0
  7. pysolated-0.1.0/.claude/skills/improve-codebase-architecture +1 -0
  8. pysolated-0.1.0/.claude/skills/prd-to-plan +1 -0
  9. pysolated-0.1.0/.claude/skills/prototype +1 -0
  10. pysolated-0.1.0/.claude/skills/setup-matt-pocock-skills +1 -0
  11. pysolated-0.1.0/.claude/skills/software-design-research +1 -0
  12. pysolated-0.1.0/.claude/skills/tdd +1 -0
  13. pysolated-0.1.0/.claude/skills/teach +1 -0
  14. pysolated-0.1.0/.claude/skills/to-issues +1 -0
  15. pysolated-0.1.0/.claude/skills/to-prd +1 -0
  16. pysolated-0.1.0/.claude/skills/triage +1 -0
  17. pysolated-0.1.0/.claude/skills/write-a-skill +1 -0
  18. pysolated-0.1.0/.claude/skills/zoom-out +1 -0
  19. pysolated-0.1.0/.gitignore +22 -0
  20. pysolated-0.1.0/.pre-commit-config.yaml +20 -0
  21. pysolated-0.1.0/.pysolated/Containerfile +48 -0
  22. pysolated-0.1.0/.pysolated/Dockerfile +48 -0
  23. pysolated-0.1.0/.pysolated/main.py +86 -0
  24. pysolated-0.1.0/.pysolated/prompt.md +364 -0
  25. pysolated-0.1.0/.pysolated/prompt_old.txt +61 -0
  26. pysolated-0.1.0/.pysolated/test.py +54 -0
  27. pysolated-0.1.0/CONTEXT.md +256 -0
  28. pysolated-0.1.0/LICENSE +21 -0
  29. pysolated-0.1.0/PKG-INFO +706 -0
  30. pysolated-0.1.0/README.md +680 -0
  31. pysolated-0.1.0/docs/adr/0001-agent-providers-return-argv.md +17 -0
  32. pysolated-0.1.0/docs/adr/0002-asyncio-not-effect.md +17 -0
  33. pysolated-0.1.0/docs/adr/0003-sandbox-providers-are-factories.md +37 -0
  34. pysolated-0.1.0/docs/adr/0004-same-path-bind-mount.md +33 -0
  35. pysolated-0.1.0/docs/adr/0005-docker-uid-alignment-via-build-arg.md +77 -0
  36. pysolated-0.1.0/docs/adr/0006-result-event-is-error-channel-only.md +60 -0
  37. pysolated-0.1.0/docs/adr/0007-branch-strategy-is-host-side-value.md +40 -0
  38. pysolated-0.1.0/docs/adr/0008-branch-strategy-durable-worktree.md +44 -0
  39. pysolated-0.1.0/docs/adr/0009-copy-to-worktree-overwrites-on-reuse.md +29 -0
  40. pysolated-0.1.0/docs/adr/0010-init-is-an-interactive-wizard.md +32 -0
  41. pysolated-0.1.0/docs/adr/0011-containerfile-composed-from-parts.md +44 -0
  42. pysolated-0.1.0/docs/adr/0012-driver-substitution-and-env-forwarding.md +32 -0
  43. pysolated-0.1.0/docs/futures/agent-providers.md +94 -0
  44. pysolated-0.1.0/docs/futures/agent-sessions.md +13 -0
  45. pysolated-0.1.0/docs/futures/completed-features.md +53 -0
  46. pysolated-0.1.0/docs/futures/docker-sandbox-provider.md +98 -0
  47. pysolated-0.1.0/docs/futures/entry-points.md +10 -0
  48. pysolated-0.1.0/docs/futures/env-resolution.md +7 -0
  49. pysolated-0.1.0/docs/futures/features.md +83 -0
  50. pysolated-0.1.0/docs/futures/init-scaffolding.md +99 -0
  51. pysolated-0.1.0/docs/futures/lifecycle-hooks.md +6 -0
  52. pysolated-0.1.0/docs/futures/lifecycle-timeout-overrides.md +7 -0
  53. pysolated-0.1.0/docs/futures/observability-logging.md +8 -0
  54. pysolated-0.1.0/docs/futures/platform-correctness.md +9 -0
  55. pysolated-0.1.0/docs/futures/sandbox-providers.md +17 -0
  56. pysolated-0.1.0/docs/futures/token-usage-reporting.md +7 -0
  57. pysolated-0.1.0/docs/futures/worktrees-branching-sync.md +183 -0
  58. pysolated-0.1.0/docs/prd/0001-pysolated-v1-run-loop.md +221 -0
  59. pysolated-0.1.0/pyproject.toml +54 -0
  60. pysolated-0.1.0/src/pysolated/__init__.py +177 -0
  61. pysolated-0.1.0/src/pysolated/agents/__init__.py +45 -0
  62. pysolated-0.1.0/src/pysolated/agents/_parsing.py +55 -0
  63. pysolated-0.1.0/src/pysolated/agents/_registry.py +85 -0
  64. pysolated-0.1.0/src/pysolated/agents/claude_code.py +161 -0
  65. pysolated-0.1.0/src/pysolated/agents/codex.py +193 -0
  66. pysolated-0.1.0/src/pysolated/cli.py +672 -0
  67. pysolated-0.1.0/src/pysolated/completion.py +28 -0
  68. pysolated-0.1.0/src/pysolated/core.py +268 -0
  69. pysolated-0.1.0/src/pysolated/display.py +109 -0
  70. pysolated-0.1.0/src/pysolated/errors.py +120 -0
  71. pysolated-0.1.0/src/pysolated/init.py +361 -0
  72. pysolated-0.1.0/src/pysolated/orchestrator.py +805 -0
  73. pysolated-0.1.0/src/pysolated/prompts.py +206 -0
  74. pysolated-0.1.0/src/pysolated/py.typed +0 -0
  75. pysolated-0.1.0/src/pysolated/sandboxes/__init__.py +69 -0
  76. pysolated-0.1.0/src/pysolated/sandboxes/_images.py +28 -0
  77. pysolated-0.1.0/src/pysolated/sandboxes/_mounts.py +84 -0
  78. pysolated-0.1.0/src/pysolated/sandboxes/_streaming.py +94 -0
  79. pysolated-0.1.0/src/pysolated/sandboxes/docker.py +330 -0
  80. pysolated-0.1.0/src/pysolated/sandboxes/no_sandbox.py +77 -0
  81. pysolated-0.1.0/src/pysolated/sandboxes/podman.py +279 -0
  82. pysolated-0.1.0/src/pysolated/structured_output.py +235 -0
  83. pysolated-0.1.0/src/pysolated/worktrees.py +504 -0
  84. pysolated-0.1.0/tests/__init__.py +0 -0
  85. pysolated-0.1.0/tests/test_abort.py +316 -0
  86. pysolated-0.1.0/tests/test_agent_execution_error.py +89 -0
  87. pysolated-0.1.0/tests/test_branch_strategy_seam.py +911 -0
  88. pysolated-0.1.0/tests/test_build_agent.py +88 -0
  89. pysolated-0.1.0/tests/test_claude_code_command.py +58 -0
  90. pysolated-0.1.0/tests/test_cli.py +824 -0
  91. pysolated-0.1.0/tests/test_cli_agent_seam.py +243 -0
  92. pysolated-0.1.0/tests/test_codex_command.py +68 -0
  93. pysolated-0.1.0/tests/test_codex_session_usage.py +105 -0
  94. pysolated-0.1.0/tests/test_codex_stream_parser.py +143 -0
  95. pysolated-0.1.0/tests/test_commit_collection.py +174 -0
  96. pysolated-0.1.0/tests/test_completion_signal.py +83 -0
  97. pysolated-0.1.0/tests/test_copy_to_worktree.py +301 -0
  98. pysolated-0.1.0/tests/test_docker.py +783 -0
  99. pysolated-0.1.0/tests/test_file_display.py +91 -0
  100. pysolated-0.1.0/tests/test_init_cli.py +293 -0
  101. pysolated-0.1.0/tests/test_init_scaffold.py +322 -0
  102. pysolated-0.1.0/tests/test_iteration_loop.py +247 -0
  103. pysolated-0.1.0/tests/test_no_sandbox.py +98 -0
  104. pysolated-0.1.0/tests/test_orchestrator.py +296 -0
  105. pysolated-0.1.0/tests/test_podman.py +751 -0
  106. pysolated-0.1.0/tests/test_prompt_pipeline.py +308 -0
  107. pysolated-0.1.0/tests/test_result_event.py +292 -0
  108. pysolated-0.1.0/tests/test_run_agent_failure.py +104 -0
  109. pysolated-0.1.0/tests/test_run_file_logging.py +109 -0
  110. pysolated-0.1.0/tests/test_run_prompt_pipeline.py +223 -0
  111. pysolated-0.1.0/tests/test_run_structured_output.py +317 -0
  112. pysolated-0.1.0/tests/test_sandbox_lifecycle.py +249 -0
  113. pysolated-0.1.0/tests/test_session_usage.py +87 -0
  114. pysolated-0.1.0/tests/test_stream_parser.py +140 -0
  115. pysolated-0.1.0/tests/test_structured_output.py +281 -0
  116. pysolated-0.1.0/tests/test_terminal_display.py +34 -0
  117. pysolated-0.1.0/tests/test_timer_race.py +223 -0
  118. pysolated-0.1.0/tests/test_workdir_git_preflight.py +113 -0
  119. pysolated-0.1.0/tests/test_worktrees.py +410 -0
  120. pysolated-0.1.0/uv.lock +701 -0
@@ -0,0 +1,22 @@
1
+ {
2
+ "env": {
3
+ "CLAUDE_CODE_ENABLE_AUTO_MODE": "1"
4
+ },
5
+ "permissions": {
6
+ "allow": [
7
+ "Bash(gh issue create *)",
8
+ "Bash(uv run *)",
9
+ "Bash(git add *)",
10
+ "Bash(git commit *)",
11
+ "Bash(gh repo *)",
12
+ "Bash(gh label *)",
13
+ "Bash(gh issue *)",
14
+ "Bash(command -v bd)",
15
+ "Bash(command -v podman docker)",
16
+ "Bash(podman image *)",
17
+ "Bash(set +x)",
18
+ "Bash(podman run *)",
19
+ "Bash(gh auth *)"
20
+ ]
21
+ }
22
+ }
@@ -0,0 +1 @@
1
+ ../../.agents/skills/diagnose
@@ -0,0 +1 @@
1
+ ../../.agents/skills/find-skills
@@ -0,0 +1 @@
1
+ ../../.agents/skills/grill-me
@@ -0,0 +1 @@
1
+ ../../.agents/skills/grill-with-docs
@@ -0,0 +1 @@
1
+ ../../.agents/skills/handoff
@@ -0,0 +1 @@
1
+ ../../.agents/skills/improve-codebase-architecture
@@ -0,0 +1 @@
1
+ ../../.agents/skills/prd-to-plan
@@ -0,0 +1 @@
1
+ ../../.agents/skills/prototype
@@ -0,0 +1 @@
1
+ ../../.agents/skills/setup-matt-pocock-skills
@@ -0,0 +1 @@
1
+ ../../.agents/skills/software-design-research/
@@ -0,0 +1 @@
1
+ ../../.agents/skills/tdd
@@ -0,0 +1 @@
1
+ ../../.agents/skills/teach/
@@ -0,0 +1 @@
1
+ ../../.agents/skills/to-issues
@@ -0,0 +1 @@
1
+ ../../.agents/skills/to-prd
@@ -0,0 +1 @@
1
+ ../../.agents/skills/triage
@@ -0,0 +1 @@
1
+ ../../.agents/skills/write-a-skill
@@ -0,0 +1 @@
1
+ ../../.agents/skills/zoom-out
@@ -0,0 +1,22 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .pytest_cache/
6
+ .ruff_cache/
7
+ .mypy_cache/
8
+ build/
9
+ dist/
10
+ .venv/
11
+
12
+ # uv
13
+ # (uv.lock is committed; the virtualenv is not)
14
+
15
+ # Node reference install (Sandcastle, kept locally for cross-referencing)
16
+ node_modules/
17
+
18
+ # Tooling
19
+ .sandcastle/
20
+
21
+ # Environment variables
22
+ .env
@@ -0,0 +1,20 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.15.17
4
+ hooks:
5
+ - id: ruff
6
+ name: ruff lint
7
+ - id: ruff-format
8
+ name: ruff format check
9
+ args: [--check]
10
+
11
+ - repo: https://github.com/pre-commit/mirrors-mypy
12
+ rev: v2.1.0
13
+ hooks:
14
+ - id: mypy
15
+ args: [--strict, --ignore-missing-imports]
16
+ files: ^src/
17
+ additional_dependencies:
18
+ - pydantic>=2.7
19
+ - typer>=0.12
20
+ - rich>=13.7
@@ -0,0 +1,48 @@
1
+ FROM python:3.13-bookworm
2
+
3
+ # Install system dependencies.
4
+ # gawk is required because the codex installer's checksum lookup uses an
5
+ # interval regex (/^[0-9a-fA-F]{64}$/) that Debian's default mawk does not
6
+ # honor, which makes it fail to find the package digest in SHA256SUMS.
7
+ RUN apt-get update && apt-get install -y \
8
+ git \
9
+ curl \
10
+ jq \
11
+ gawk \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Install GitHub CLI
15
+ RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
16
+ | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
17
+ && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
18
+ | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
19
+ && apt-get update && apt-get install -y gh \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ # Build-args for UID/GID alignment: sandcastle docker build-image
23
+ # defaults these to the host user's UID/GID so image-built files
24
+ # and bind-mounted files share an owner without runtime chown.
25
+ ARG AGENT_UID=1000
26
+ ARG AGENT_GID=1000
27
+
28
+ # Add "agent" group
29
+ RUN addgroup --gid ${AGENT_GID} agent
30
+
31
+ # Add "agent" user and align UID/GID.
32
+ RUN adduser --uid ${AGENT_UID} --gid ${AGENT_GID} --home /home/agent agent
33
+
34
+ # Add agent to PATH
35
+ ENV PATH="/home/agent/.local/bin:$PATH"
36
+
37
+ # Install Claude Code CLI as the agent user so the binary lands in
38
+ # /home/agent/.local/bin (the installer targets $HOME/.local/bin).
39
+ USER agent
40
+ RUN curl -fsSL https://claude.ai/install.sh | bash
41
+ RUN curl -fsSL https://chatgpt.com/codex/install.sh | CODEX_NON_INTERACTIVE=1 sh
42
+
43
+ WORKDIR /home/agent
44
+
45
+ # In worktree sandbox mode, Sandcastle bind-mounts the git worktree at /home/agent/workspace
46
+ # and overrides the working directory to /home/agent/workspace at container start.
47
+ # Structure your Dockerfile so that /home/agent/workspace can serve as the project root.
48
+ ENTRYPOINT ["sleep", "infinity"]
@@ -0,0 +1,48 @@
1
+ FROM python:3.13-bookworm
2
+
3
+ # Install system dependencies.
4
+ # gawk is required because the codex installer's checksum lookup uses an
5
+ # interval regex (/^[0-9a-fA-F]{64}$/) that Debian's default mawk does not
6
+ # honor, which makes it fail to find the package digest in SHA256SUMS.
7
+ RUN apt-get update && apt-get install -y \
8
+ git \
9
+ curl \
10
+ jq \
11
+ gawk \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Install GitHub CLI
15
+ RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
16
+ | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
17
+ && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
18
+ | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
19
+ && apt-get update && apt-get install -y gh \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ # Build-args for UID/GID alignment: sandcastle docker build-image
23
+ # defaults these to the host user's UID/GID so image-built files
24
+ # and bind-mounted files share an owner without runtime chown.
25
+ ARG AGENT_UID=1000
26
+ ARG AGENT_GID=1000
27
+
28
+ # Add "agent" group
29
+ RUN addgroup --gid ${AGENT_GID} agent
30
+
31
+ # Add "agent" user and align UID/GID.
32
+ RUN adduser --uid ${AGENT_UID} --gid ${AGENT_GID} --home /home/agent agent
33
+
34
+ # Add agent to PATH
35
+ ENV PATH="/home/agent/.local/bin:$PATH"
36
+
37
+ # Install Claude Code CLI as the agent user so the binary lands in
38
+ # /home/agent/.local/bin (the installer targets $HOME/.local/bin).
39
+ USER agent
40
+ RUN curl -fsSL https://claude.ai/install.sh | bash
41
+ RUN curl -fsSL https://chatgpt.com/codex/install.sh | CODEX_NON_INTERACTIVE=1 sh
42
+
43
+ WORKDIR /home/agent
44
+
45
+ # In worktree sandbox mode, Sandcastle bind-mounts the git worktree at /home/agent/workspace
46
+ # and overrides the working directory to /home/agent/workspace at container start.
47
+ # Structure your Dockerfile so that /home/agent/workspace can serve as the project root.
48
+ ENTRYPOINT ["sleep", "infinity"]
@@ -0,0 +1,86 @@
1
+ import asyncio
2
+ import os
3
+ from pathlib import Path
4
+ from dotenv import load_dotenv
5
+ from pysolated import run, claude_code
6
+ from pysolated.errors import IdleTimeoutError
7
+ from pysolated.sandboxes.podman import podman
8
+
9
+ PROMPT_FILE = Path(__file__).parent / "prompt.md"
10
+ REPO_ROOT = Path(__file__).parent.parent # .pysolated/ lives at the repo root
11
+
12
+ # Load credentials from .pysolated/.env (gitignored) into the host environment
13
+ # so _require_env can read them. The .env is never mounted into the sandbox;
14
+ # the values are passed explicitly via the provider's `env=`.
15
+ load_dotenv(Path(__file__).parent / ".env")
16
+
17
+
18
+ def _require_env(name: str) -> str:
19
+ """Read a required credential from the environment, failing fast if unset.
20
+
21
+ Credentials must never be committed to this driver script — the sandbox
22
+ has no access to the host environment, so they are read here and passed
23
+ explicitly via the provider's `env=`.
24
+ """
25
+ value = os.environ.get(name)
26
+ if not value:
27
+ raise SystemExit(
28
+ f"missing required environment variable {name!r}; "
29
+ f"export it before running (e.g. `export {name}=...`)"
30
+ )
31
+ return value
32
+
33
+
34
+ ## figure out HITL reviews
35
+ async def main():
36
+ while True:
37
+ try:
38
+ result = await run(
39
+ agent=claude_code("claude-opus-4-7"),
40
+ sandbox=podman(
41
+ image="pysolated:pysolated",
42
+ env={
43
+ # MUST pass credentials explicitly — the sandbox does
44
+ # not inherit the host environment. Read from the host
45
+ # env here; never hard-code secrets in this file.
46
+ "CLAUDE_CODE_OAUTH_TOKEN": _require_env(
47
+ "CLAUDE_CODE_OAUTH_TOKEN"
48
+ ),
49
+ "GH_TOKEN": _require_env("GH_TOKEN"),
50
+ },
51
+ ),
52
+ prompt_file=str(PROMPT_FILE),
53
+ prompt_args={"area": "auth"},
54
+ cwd=str(
55
+ REPO_ROOT
56
+ ), # mount the repo root, not wherever main.py was launched from
57
+ max_iterations=2,
58
+ completion_signal=[
59
+ "<completion>ISSUE-DONE</completion>",
60
+ "<completion>NO-MORE-ISSUES</completion>",
61
+ "<completion>AWAITING-DEPENDENCIES</completion>",
62
+ ],
63
+ idle_timeout_seconds=600,
64
+ completion_timeout_seconds=60,
65
+ )
66
+ except IdleTimeoutError as e:
67
+ print(f"timed out: {e}")
68
+ break
69
+
70
+ print(result.text)
71
+ print(result.branch, result.usage)
72
+ if result.completion_signal == "<completion>NO-MORE-ISSUES</completion>":
73
+ break
74
+ if result.completion_signal == "<completion>AWAITING-DEPENDENCIES</completion>":
75
+ print(
76
+ "All outstanding tasks are blocked, awaiting unresolved dependencies."
77
+ )
78
+ answer = await asyncio.to_thread(
79
+ input, "Confirm the dependencies are resolved to continue [y/N]: "
80
+ )
81
+ if answer.strip().lower() not in ("y", "yes"):
82
+ print("Stopping until dependencies are resolved.")
83
+ break
84
+
85
+
86
+ asyncio.run(main())
@@ -0,0 +1,364 @@
1
+ # ISSUES
2
+
3
+ Here are a set of GitHub issues:
4
+
5
+ !`gh issue list --state open --json number,title,body,comments`
6
+
7
+ You will work on one AFK (away from keyboard) issue only, not the HITL (human in the loop) ones.
8
+
9
+ When the task is complete, output <completion>ISSUE-DONE</completion>
10
+ If there are not more AFK issues, output <completion>NO-MORE-ISSUES</completion>
11
+ If the all open AFK issues have unresolved dependencies output <completion>AWAITING-DEPENDENCIES</completion>
12
+
13
+ # TASK SELECTION
14
+
15
+ Pick the next task. Prioritize tasks in this order:
16
+
17
+ 1. Critical bugfixes
18
+ 2. Development infrastructure
19
+
20
+ Getting development infrastructure like tests and types and dev scripts ready is an important precursor to building features.
21
+
22
+ 3. Tracer bullets for new features
23
+
24
+ Tracer bullets are small slices of functionality that go through all layers of the system, allowing you to test and validate your approach early. This helps in identifying potential issues and ensures that the overall architecture is sound before investing significant time in development.
25
+
26
+ TL;DR - build a tiny, end-to-end slice of the feature first, then expand it out.
27
+
28
+ 4. Polish and quick wins
29
+ 5. Refactors
30
+
31
+ # EXPLORATION
32
+
33
+ Explore the repo.
34
+
35
+ # IMPLEMENTATION
36
+
37
+ Complete the task using test driven development (TDD)
38
+
39
+ ## TDD Philosophy
40
+
41
+ **Core principle**: Tests should verify behavior through public interfaces, not implementation details.
42
+ Code can change entirely; tests shouldn't.
43
+
44
+ **Good tests** are integration-style: they exercise real code paths through public APIs.
45
+ They describe _what_ the system does, not _how_ it does it.
46
+ A good test reads like a specification - "user can checkout with valid cart" tells you exactly what capability exists.
47
+ These tests survive refactors because they don't care about internal structure.
48
+
49
+ **Bad tests** are coupled to implementation.
50
+ They mock internal collaborators, test private methods, or verify through external means (like querying a database directly instead of using the interface).
51
+ The warning sign: your test breaks when you refactor, but behavior hasn't changed.
52
+ If you rename an internal function and tests fail, those tests were testing implementation, not behavior.
53
+
54
+ ### Test examples
55
+
56
+ #### Good Tests
57
+
58
+ **Integration-style**: Test through real interfaces, not mocks of internal parts.
59
+
60
+ ```python
61
+ # GOOD: Tests observable behavior
62
+ async def test_user_can_checkout_with_valid_cart() -> None:
63
+ cart = create_cart()
64
+ cart.add(product)
65
+ result = await checkout(cart, payment_method)
66
+ assert result.status == "confirmed"
67
+ ```
68
+
69
+ Characteristics:
70
+
71
+ - Tests behavior users/callers care about
72
+ - Uses public API only
73
+ - Survives internal refactors
74
+ - Describes WHAT, not HOW
75
+ - One logical assertion per test
76
+
77
+ #### Bad Tests
78
+
79
+ **Implementation-detail tests**: Coupled to internal structure.
80
+
81
+ ```python
82
+ # BAD: Tests implementation details
83
+ async def test_checkout_calls_payment_service_process() -> None:
84
+ with patch("myapp.checkout.payment_service") as mock_payment:
85
+ await checkout(cart, payment)
86
+ mock_payment.process.assert_called_once_with(cart.total)
87
+ ```
88
+
89
+ #### Red flags:
90
+
91
+ - Mocking internal collaborators
92
+ - Testing private methods
93
+ - Asserting on call counts/order
94
+ - Test breaks when refactoring without behavior change
95
+ - Test name describes HOW not WHAT
96
+ - Verifying through external means instead of interface
97
+
98
+ ```python
99
+ # BAD: Bypasses interface to verify
100
+ async def test_create_user_saves_to_database() -> None:
101
+ await create_user({"name": "Alice"})
102
+ row = await db.query("SELECT * FROM users WHERE name = ?", ["Alice"])
103
+ assert row is not None
104
+
105
+ # GOOD: Verifies through interface
106
+ async def test_create_user_makes_user_retrievable() -> None:
107
+ user = await create_user({"name": "Alice"})
108
+ retrieved = await get_user(user.id)
109
+ assert retrieved.name == "Alice"
110
+ ```
111
+ ### Mocking guidelines
112
+
113
+ #### When to Mock
114
+
115
+ Mock at **system boundaries** only:
116
+
117
+ - External APIs (payment, email, etc.)
118
+ - Databases (sometimes - prefer test DB)
119
+ - Time/randomness
120
+ - File system (sometimes)
121
+
122
+ Don't mock:
123
+
124
+ - Your own classes/modules
125
+ - Internal collaborators
126
+ - Anything you control
127
+
128
+ #### Designing for Mockability
129
+
130
+ At system boundaries, design interfaces that are easy to mock:
131
+
132
+ **1. Use dependency injection**
133
+
134
+ Pass external dependencies in rather than creating them internally:
135
+
136
+ ```python
137
+ # Easy to mock
138
+ def process_payment(order, payment_client):
139
+ return payment_client.charge(order.total)
140
+
141
+ # Hard to mock
142
+ def process_payment(order):
143
+ client = StripeClient(os.environ["STRIPE_KEY"])
144
+ return client.charge(order.total)
145
+ ```
146
+
147
+ **2. Prefer SDK-style interfaces over generic fetchers**
148
+
149
+ Create specific functions for each external operation instead of one generic function with conditional logic:
150
+
151
+ ```python
152
+ # GOOD: Each method is independently mockable
153
+ class Api:
154
+ def get_user(self, id):
155
+ return fetch(f"/users/{id}")
156
+
157
+ def get_orders(self, user_id):
158
+ return fetch(f"/users/{user_id}/orders")
159
+
160
+ def create_order(self, data):
161
+ return fetch("/orders", method="POST", body=data)
162
+
163
+ # BAD: Mocking requires conditional logic inside the mock
164
+ class Api:
165
+ def fetch(self, endpoint, options):
166
+ return fetch(endpoint, options)
167
+ ```
168
+
169
+ The SDK approach means:
170
+ - Each mock returns one specific shape
171
+ - No conditional logic in test setup
172
+ - Easier to see which endpoints a test exercises
173
+ - Type safety per endpoint
174
+
175
+ ## TDD Anti-Pattern: Horizontal Slices
176
+
177
+ **DO NOT write all tests first, then all implementation.** This is "horizontal slicing" - treating RED as "write al
178
+ l tests" and GREEN as "write all code."
179
+
180
+ This produces **crap tests**:
181
+
182
+ - Tests written in bulk test _imagined_ behavior, not _actual_ behavior
183
+ - You end up testing the _shape_ of things (data structures, function signatures) rather than user-facing behavior
184
+ - Tests become insensitive to real changes - they pass when behavior breaks, fail when behavior is fine
185
+ - You outrun your headlights, committing to test structure before understanding the implementation
186
+
187
+ **Correct approach**: Vertical slices via tracer bullets. One test → one implementation → repeat. Each test respond
188
+ s to what you learned from the previous cycle. Because you just wrote the code, you know exactly what behavior matt
189
+ ers and how to verify it.
190
+
191
+ ```
192
+ WRONG (horizontal):
193
+ RED: test1, test2, test3, test4, test5
194
+ GREEN: impl1, impl2, impl3, impl4, impl5
195
+
196
+ RIGHT (vertical):
197
+ RED→GREEN: test1→impl1
198
+ RED→GREEN: test2→impl2
199
+ RED→GREEN: test3→impl3
200
+ ...
201
+ ```
202
+
203
+ ## Definition of deep modules
204
+
205
+ From "A Philosophy of Software Design":
206
+
207
+ **Deep module** = small interface + lots of implementation
208
+ This is good
209
+
210
+ ```
211
+ ┌─────────────────────┐
212
+ │ Small Interface │ ← Few methods, simple params
213
+ ├─────────────────────┤
214
+ │ │
215
+ │ │
216
+ │ Deep Implementation│ ← Complex logic hidden
217
+ │ │
218
+ │ │
219
+ └─────────────────────┘
220
+ ```
221
+
222
+ **Shallow module** = large interface + little implementation (avoid)
223
+ This is bad
224
+
225
+ ```
226
+ ┌─────────────────────────────────┐
227
+ │ Large Interface │ ← Many methods, complex params
228
+ ├─────────────────────────────────┤
229
+ │ Thin Implementation │ ← Just passes through
230
+ └─────────────────────────────────┘
231
+ ```
232
+
233
+ When designing interfaces, ask:
234
+
235
+ - Can I reduce the number of methods?
236
+ - Can I simplify the parameters?
237
+ - Can I hide more complexity inside?
238
+
239
+ ## Interface design for testability
240
+
241
+ Good interfaces make testing natural:
242
+
243
+ 1. **Accept dependencies, don't create them**
244
+
245
+ ```python
246
+ # Testable
247
+ def process_order(order, payment_gateway):
248
+ ...
249
+
250
+ # Hard to test
251
+ def process_order(order):
252
+ gateway = StripeGateway()
253
+ ...
254
+ ```
255
+
256
+ 2. **Return results, don't produce side effects**
257
+
258
+ ```python
259
+ # Testable
260
+ def calculate_discount(cart) -> Discount:
261
+ ...
262
+
263
+ # Hard to test
264
+ def apply_discount(cart) -> None:
265
+ cart.total -= discount
266
+ ```
267
+
268
+ 3. **Small surface area**
269
+ - Fewer methods = fewer tests needed
270
+ - Fewer params = simpler test setup
271
+
272
+ ## TDD Workflow
273
+
274
+ ### 1. Planning
275
+
276
+ When exploring the codebase, use the project's domain glossary so that test names and interface vocabulary match the project's language,
277
+ and respect ADRs in the area you're touching.
278
+
279
+ Before writing any code:
280
+
281
+ - [ ] Identify opportunities for deep modules
282
+ - [ ] Design interfaces for testability
283
+ - [ ] List the behaviors to test (not implementation steps)
284
+ - [ ] Get user approval on the plan
285
+
286
+ Consider what the public interface should look like and which behaviors are most important to test
287
+
288
+ **You can't test everything.** Focus testing effort on critical paths and complex logic, not every possible edge case.
289
+
290
+ ### 2. Tracer Bullet
291
+
292
+ Write ONE test that confirms ONE thing about the system:
293
+
294
+ ```
295
+ RED: Write test for first behavior → test fails
296
+ GREEN: Write minimal code to pass → test passes
297
+ ```
298
+
299
+ This is your tracer bullet - proves the path works end-to-end.
300
+
301
+ ### 3. Incremental Loop
302
+
303
+ For each remaining behavior:
304
+
305
+ ```
306
+ RED: Write next test → fails
307
+ GREEN: Minimal code to pass → passes
308
+ ```
309
+
310
+ Rules:
311
+
312
+ - One test at a time
313
+ - Only enough code to pass current test
314
+ - Don't anticipate future tests
315
+ - Keep tests focused on observable behavior
316
+
317
+ ### 4. Refactor
318
+
319
+ After all tests pass, look for [refactor candidates](refactoring.md):
320
+
321
+ - [ ] Extract duplication
322
+ - [ ] Break long methods into private helpers (keep tests on public interface)
323
+ - [ ] Deepen modules (move complexity behind simple interfaces)
324
+ - [ ] Move logic to where data lives
325
+ - [ ] Apply SOLID principles where natural
326
+ - [ ] Consider what new code reveals about existing code
327
+ - [ ] Run tests after each refactor step
328
+
329
+ **Never refactor while RED.** Get to GREEN first.
330
+
331
+ ## Checklist Per Cycle
332
+
333
+ ```
334
+ [ ] Test describes behavior, not implementation
335
+ [ ] Test uses public interface only
336
+ [ ] Test would survive internal refactor
337
+ [ ] Code is minimal for this test
338
+ [ ] No speculative features added
339
+
340
+ # FEEDBACK LOOPS
341
+
342
+ Before committing, run the feedback loops:
343
+
344
+ - run tests for any files that have changed
345
+ - run mypy in strict mode for the files that have changed
346
+ - run ruff to check formatting and linting for files that have changed
347
+
348
+ # COMMIT
349
+
350
+ Make a git commit. The commit message must:
351
+
352
+ 1. Include key decisions made
353
+ 2. Include files changed
354
+ 3. Blockers or notes for next iteration
355
+
356
+ # THE ISSUE
357
+
358
+ If the task is complete, close the original GitHub issue.
359
+
360
+ If the task is not complete, leave a comment on the GitHub issue with what was done.
361
+
362
+ # FINAL RULES
363
+
364
+ ONLY WORK ON A SINGLE TASK. If you receive a multi-phase plan, only work on a single phase of that plan.