pysolated 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysolated-0.1.0/.claude/settings.local.json +22 -0
- pysolated-0.1.0/.claude/skills/diagnose +1 -0
- pysolated-0.1.0/.claude/skills/find-skills +1 -0
- pysolated-0.1.0/.claude/skills/grill-me +1 -0
- pysolated-0.1.0/.claude/skills/grill-with-docs +1 -0
- pysolated-0.1.0/.claude/skills/handoff +1 -0
- pysolated-0.1.0/.claude/skills/improve-codebase-architecture +1 -0
- pysolated-0.1.0/.claude/skills/prd-to-plan +1 -0
- pysolated-0.1.0/.claude/skills/prototype +1 -0
- pysolated-0.1.0/.claude/skills/setup-matt-pocock-skills +1 -0
- pysolated-0.1.0/.claude/skills/software-design-research +1 -0
- pysolated-0.1.0/.claude/skills/tdd +1 -0
- pysolated-0.1.0/.claude/skills/teach +1 -0
- pysolated-0.1.0/.claude/skills/to-issues +1 -0
- pysolated-0.1.0/.claude/skills/to-prd +1 -0
- pysolated-0.1.0/.claude/skills/triage +1 -0
- pysolated-0.1.0/.claude/skills/write-a-skill +1 -0
- pysolated-0.1.0/.claude/skills/zoom-out +1 -0
- pysolated-0.1.0/.gitignore +22 -0
- pysolated-0.1.0/.pre-commit-config.yaml +20 -0
- pysolated-0.1.0/.pysolated/Containerfile +48 -0
- pysolated-0.1.0/.pysolated/Dockerfile +48 -0
- pysolated-0.1.0/.pysolated/main.py +86 -0
- pysolated-0.1.0/.pysolated/prompt.md +364 -0
- pysolated-0.1.0/.pysolated/prompt_old.txt +61 -0
- pysolated-0.1.0/.pysolated/test.py +54 -0
- pysolated-0.1.0/CONTEXT.md +256 -0
- pysolated-0.1.0/LICENSE +21 -0
- pysolated-0.1.0/PKG-INFO +706 -0
- pysolated-0.1.0/README.md +680 -0
- pysolated-0.1.0/docs/adr/0001-agent-providers-return-argv.md +17 -0
- pysolated-0.1.0/docs/adr/0002-asyncio-not-effect.md +17 -0
- pysolated-0.1.0/docs/adr/0003-sandbox-providers-are-factories.md +37 -0
- pysolated-0.1.0/docs/adr/0004-same-path-bind-mount.md +33 -0
- pysolated-0.1.0/docs/adr/0005-docker-uid-alignment-via-build-arg.md +77 -0
- pysolated-0.1.0/docs/adr/0006-result-event-is-error-channel-only.md +60 -0
- pysolated-0.1.0/docs/adr/0007-branch-strategy-is-host-side-value.md +40 -0
- pysolated-0.1.0/docs/adr/0008-branch-strategy-durable-worktree.md +44 -0
- pysolated-0.1.0/docs/adr/0009-copy-to-worktree-overwrites-on-reuse.md +29 -0
- pysolated-0.1.0/docs/adr/0010-init-is-an-interactive-wizard.md +32 -0
- pysolated-0.1.0/docs/adr/0011-containerfile-composed-from-parts.md +44 -0
- pysolated-0.1.0/docs/adr/0012-driver-substitution-and-env-forwarding.md +32 -0
- pysolated-0.1.0/docs/futures/agent-providers.md +94 -0
- pysolated-0.1.0/docs/futures/agent-sessions.md +13 -0
- pysolated-0.1.0/docs/futures/completed-features.md +53 -0
- pysolated-0.1.0/docs/futures/docker-sandbox-provider.md +98 -0
- pysolated-0.1.0/docs/futures/entry-points.md +10 -0
- pysolated-0.1.0/docs/futures/env-resolution.md +7 -0
- pysolated-0.1.0/docs/futures/features.md +83 -0
- pysolated-0.1.0/docs/futures/init-scaffolding.md +99 -0
- pysolated-0.1.0/docs/futures/lifecycle-hooks.md +6 -0
- pysolated-0.1.0/docs/futures/lifecycle-timeout-overrides.md +7 -0
- pysolated-0.1.0/docs/futures/observability-logging.md +8 -0
- pysolated-0.1.0/docs/futures/platform-correctness.md +9 -0
- pysolated-0.1.0/docs/futures/sandbox-providers.md +17 -0
- pysolated-0.1.0/docs/futures/token-usage-reporting.md +7 -0
- pysolated-0.1.0/docs/futures/worktrees-branching-sync.md +183 -0
- pysolated-0.1.0/docs/prd/0001-pysolated-v1-run-loop.md +221 -0
- pysolated-0.1.0/pyproject.toml +54 -0
- pysolated-0.1.0/src/pysolated/__init__.py +177 -0
- pysolated-0.1.0/src/pysolated/agents/__init__.py +45 -0
- pysolated-0.1.0/src/pysolated/agents/_parsing.py +55 -0
- pysolated-0.1.0/src/pysolated/agents/_registry.py +85 -0
- pysolated-0.1.0/src/pysolated/agents/claude_code.py +161 -0
- pysolated-0.1.0/src/pysolated/agents/codex.py +193 -0
- pysolated-0.1.0/src/pysolated/cli.py +672 -0
- pysolated-0.1.0/src/pysolated/completion.py +28 -0
- pysolated-0.1.0/src/pysolated/core.py +268 -0
- pysolated-0.1.0/src/pysolated/display.py +109 -0
- pysolated-0.1.0/src/pysolated/errors.py +120 -0
- pysolated-0.1.0/src/pysolated/init.py +361 -0
- pysolated-0.1.0/src/pysolated/orchestrator.py +805 -0
- pysolated-0.1.0/src/pysolated/prompts.py +206 -0
- pysolated-0.1.0/src/pysolated/py.typed +0 -0
- pysolated-0.1.0/src/pysolated/sandboxes/__init__.py +69 -0
- pysolated-0.1.0/src/pysolated/sandboxes/_images.py +28 -0
- pysolated-0.1.0/src/pysolated/sandboxes/_mounts.py +84 -0
- pysolated-0.1.0/src/pysolated/sandboxes/_streaming.py +94 -0
- pysolated-0.1.0/src/pysolated/sandboxes/docker.py +330 -0
- pysolated-0.1.0/src/pysolated/sandboxes/no_sandbox.py +77 -0
- pysolated-0.1.0/src/pysolated/sandboxes/podman.py +279 -0
- pysolated-0.1.0/src/pysolated/structured_output.py +235 -0
- pysolated-0.1.0/src/pysolated/worktrees.py +504 -0
- pysolated-0.1.0/tests/__init__.py +0 -0
- pysolated-0.1.0/tests/test_abort.py +316 -0
- pysolated-0.1.0/tests/test_agent_execution_error.py +89 -0
- pysolated-0.1.0/tests/test_branch_strategy_seam.py +911 -0
- pysolated-0.1.0/tests/test_build_agent.py +88 -0
- pysolated-0.1.0/tests/test_claude_code_command.py +58 -0
- pysolated-0.1.0/tests/test_cli.py +824 -0
- pysolated-0.1.0/tests/test_cli_agent_seam.py +243 -0
- pysolated-0.1.0/tests/test_codex_command.py +68 -0
- pysolated-0.1.0/tests/test_codex_session_usage.py +105 -0
- pysolated-0.1.0/tests/test_codex_stream_parser.py +143 -0
- pysolated-0.1.0/tests/test_commit_collection.py +174 -0
- pysolated-0.1.0/tests/test_completion_signal.py +83 -0
- pysolated-0.1.0/tests/test_copy_to_worktree.py +301 -0
- pysolated-0.1.0/tests/test_docker.py +783 -0
- pysolated-0.1.0/tests/test_file_display.py +91 -0
- pysolated-0.1.0/tests/test_init_cli.py +293 -0
- pysolated-0.1.0/tests/test_init_scaffold.py +322 -0
- pysolated-0.1.0/tests/test_iteration_loop.py +247 -0
- pysolated-0.1.0/tests/test_no_sandbox.py +98 -0
- pysolated-0.1.0/tests/test_orchestrator.py +296 -0
- pysolated-0.1.0/tests/test_podman.py +751 -0
- pysolated-0.1.0/tests/test_prompt_pipeline.py +308 -0
- pysolated-0.1.0/tests/test_result_event.py +292 -0
- pysolated-0.1.0/tests/test_run_agent_failure.py +104 -0
- pysolated-0.1.0/tests/test_run_file_logging.py +109 -0
- pysolated-0.1.0/tests/test_run_prompt_pipeline.py +223 -0
- pysolated-0.1.0/tests/test_run_structured_output.py +317 -0
- pysolated-0.1.0/tests/test_sandbox_lifecycle.py +249 -0
- pysolated-0.1.0/tests/test_session_usage.py +87 -0
- pysolated-0.1.0/tests/test_stream_parser.py +140 -0
- pysolated-0.1.0/tests/test_structured_output.py +281 -0
- pysolated-0.1.0/tests/test_terminal_display.py +34 -0
- pysolated-0.1.0/tests/test_timer_race.py +223 -0
- pysolated-0.1.0/tests/test_workdir_git_preflight.py +113 -0
- pysolated-0.1.0/tests/test_worktrees.py +410 -0
- pysolated-0.1.0/uv.lock +701 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"env": {
|
|
3
|
+
"CLAUDE_CODE_ENABLE_AUTO_MODE": "1"
|
|
4
|
+
},
|
|
5
|
+
"permissions": {
|
|
6
|
+
"allow": [
|
|
7
|
+
"Bash(gh issue create *)",
|
|
8
|
+
"Bash(uv run *)",
|
|
9
|
+
"Bash(git add *)",
|
|
10
|
+
"Bash(git commit *)",
|
|
11
|
+
"Bash(gh repo *)",
|
|
12
|
+
"Bash(gh label *)",
|
|
13
|
+
"Bash(gh issue *)",
|
|
14
|
+
"Bash(command -v bd)",
|
|
15
|
+
"Bash(command -v podman docker)",
|
|
16
|
+
"Bash(podman image *)",
|
|
17
|
+
"Bash(set +x)",
|
|
18
|
+
"Bash(podman run *)",
|
|
19
|
+
"Bash(gh auth *)"
|
|
20
|
+
]
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/diagnose
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/find-skills
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/grill-me
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/grill-with-docs
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/handoff
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/improve-codebase-architecture
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/prd-to-plan
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/prototype
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/setup-matt-pocock-skills
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/software-design-research/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/tdd
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/teach/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/to-issues
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/to-prd
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/triage
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/write-a-skill
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
../../.agents/skills/zoom-out
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.pytest_cache/
|
|
6
|
+
.ruff_cache/
|
|
7
|
+
.mypy_cache/
|
|
8
|
+
build/
|
|
9
|
+
dist/
|
|
10
|
+
.venv/
|
|
11
|
+
|
|
12
|
+
# uv
|
|
13
|
+
# (uv.lock is committed; the virtualenv is not)
|
|
14
|
+
|
|
15
|
+
# Node reference install (Sandcastle, kept locally for cross-referencing)
|
|
16
|
+
node_modules/
|
|
17
|
+
|
|
18
|
+
# Tooling
|
|
19
|
+
.sandcastle/
|
|
20
|
+
|
|
21
|
+
# Environment variables
|
|
22
|
+
.env
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.15.17
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
name: ruff lint
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
name: ruff format check
|
|
9
|
+
args: [--check]
|
|
10
|
+
|
|
11
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
12
|
+
rev: v2.1.0
|
|
13
|
+
hooks:
|
|
14
|
+
- id: mypy
|
|
15
|
+
args: [--strict, --ignore-missing-imports]
|
|
16
|
+
files: ^src/
|
|
17
|
+
additional_dependencies:
|
|
18
|
+
- pydantic>=2.7
|
|
19
|
+
- typer>=0.12
|
|
20
|
+
- rich>=13.7
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
FROM python:3.13-bookworm
|
|
2
|
+
|
|
3
|
+
# Install system dependencies.
|
|
4
|
+
# gawk is required because the codex installer's checksum lookup uses an
|
|
5
|
+
# interval regex (/^[0-9a-fA-F]{64}$/) that Debian's default mawk does not
|
|
6
|
+
# honor, which makes it fail to find the package digest in SHA256SUMS.
|
|
7
|
+
RUN apt-get update && apt-get install -y \
|
|
8
|
+
git \
|
|
9
|
+
curl \
|
|
10
|
+
jq \
|
|
11
|
+
gawk \
|
|
12
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
13
|
+
|
|
14
|
+
# Install GitHub CLI
|
|
15
|
+
RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
|
|
16
|
+
| dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
|
|
17
|
+
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
|
|
18
|
+
| tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
|
|
19
|
+
&& apt-get update && apt-get install -y gh \
|
|
20
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
21
|
+
|
|
22
|
+
# Build-args for UID/GID alignment: sandcastle docker build-image
|
|
23
|
+
# defaults these to the host user's UID/GID so image-built files
|
|
24
|
+
# and bind-mounted files share an owner without runtime chown.
|
|
25
|
+
ARG AGENT_UID=1000
|
|
26
|
+
ARG AGENT_GID=1000
|
|
27
|
+
|
|
28
|
+
# Add "agent" group
|
|
29
|
+
RUN addgroup --gid ${AGENT_GID} agent
|
|
30
|
+
|
|
31
|
+
# Add "agent" user and align UID/GID.
|
|
32
|
+
RUN adduser --uid ${AGENT_UID} --gid ${AGENT_GID} --home /home/agent agent
|
|
33
|
+
|
|
34
|
+
# Add agent to PATH
|
|
35
|
+
ENV PATH="/home/agent/.local/bin:$PATH"
|
|
36
|
+
|
|
37
|
+
# Install Claude Code CLI as the agent user so the binary lands in
|
|
38
|
+
# /home/agent/.local/bin (the installer targets $HOME/.local/bin).
|
|
39
|
+
USER agent
|
|
40
|
+
RUN curl -fsSL https://claude.ai/install.sh | bash
|
|
41
|
+
RUN curl -fsSL https://chatgpt.com/codex/install.sh | CODEX_NON_INTERACTIVE=1 sh
|
|
42
|
+
|
|
43
|
+
WORKDIR /home/agent
|
|
44
|
+
|
|
45
|
+
# In worktree sandbox mode, Sandcastle bind-mounts the git worktree at /home/agent/workspace
|
|
46
|
+
# and overrides the working directory to /home/agent/workspace at container start.
|
|
47
|
+
# Structure your Dockerfile so that /home/agent/workspace can serve as the project root.
|
|
48
|
+
ENTRYPOINT ["sleep", "infinity"]
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
FROM python:3.13-bookworm
|
|
2
|
+
|
|
3
|
+
# Install system dependencies.
|
|
4
|
+
# gawk is required because the codex installer's checksum lookup uses an
|
|
5
|
+
# interval regex (/^[0-9a-fA-F]{64}$/) that Debian's default mawk does not
|
|
6
|
+
# honor, which makes it fail to find the package digest in SHA256SUMS.
|
|
7
|
+
RUN apt-get update && apt-get install -y \
|
|
8
|
+
git \
|
|
9
|
+
curl \
|
|
10
|
+
jq \
|
|
11
|
+
gawk \
|
|
12
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
13
|
+
|
|
14
|
+
# Install GitHub CLI
|
|
15
|
+
RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
|
|
16
|
+
| dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
|
|
17
|
+
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
|
|
18
|
+
| tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
|
|
19
|
+
&& apt-get update && apt-get install -y gh \
|
|
20
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
21
|
+
|
|
22
|
+
# Build-args for UID/GID alignment: sandcastle docker build-image
|
|
23
|
+
# defaults these to the host user's UID/GID so image-built files
|
|
24
|
+
# and bind-mounted files share an owner without runtime chown.
|
|
25
|
+
ARG AGENT_UID=1000
|
|
26
|
+
ARG AGENT_GID=1000
|
|
27
|
+
|
|
28
|
+
# Add "agent" group
|
|
29
|
+
RUN addgroup --gid ${AGENT_GID} agent
|
|
30
|
+
|
|
31
|
+
# Add "agent" user and align UID/GID.
|
|
32
|
+
RUN adduser --uid ${AGENT_UID} --gid ${AGENT_GID} --home /home/agent agent
|
|
33
|
+
|
|
34
|
+
# Add agent to PATH
|
|
35
|
+
ENV PATH="/home/agent/.local/bin:$PATH"
|
|
36
|
+
|
|
37
|
+
# Install Claude Code CLI as the agent user so the binary lands in
|
|
38
|
+
# /home/agent/.local/bin (the installer targets $HOME/.local/bin).
|
|
39
|
+
USER agent
|
|
40
|
+
RUN curl -fsSL https://claude.ai/install.sh | bash
|
|
41
|
+
RUN curl -fsSL https://chatgpt.com/codex/install.sh | CODEX_NON_INTERACTIVE=1 sh
|
|
42
|
+
|
|
43
|
+
WORKDIR /home/agent
|
|
44
|
+
|
|
45
|
+
# In worktree sandbox mode, Sandcastle bind-mounts the git worktree at /home/agent/workspace
|
|
46
|
+
# and overrides the working directory to /home/agent/workspace at container start.
|
|
47
|
+
# Structure your Dockerfile so that /home/agent/workspace can serve as the project root.
|
|
48
|
+
ENTRYPOINT ["sleep", "infinity"]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from dotenv import load_dotenv
|
|
5
|
+
from pysolated import run, claude_code
|
|
6
|
+
from pysolated.errors import IdleTimeoutError
|
|
7
|
+
from pysolated.sandboxes.podman import podman
|
|
8
|
+
|
|
9
|
+
PROMPT_FILE = Path(__file__).parent / "prompt.md"
|
|
10
|
+
REPO_ROOT = Path(__file__).parent.parent # .pysolated/ lives at the repo root
|
|
11
|
+
|
|
12
|
+
# Load credentials from .pysolated/.env (gitignored) into the host environment
|
|
13
|
+
# so _require_env can read them. The .env is never mounted into the sandbox;
|
|
14
|
+
# the values are passed explicitly via the provider's `env=`.
|
|
15
|
+
load_dotenv(Path(__file__).parent / ".env")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _require_env(name: str) -> str:
|
|
19
|
+
"""Read a required credential from the environment, failing fast if unset.
|
|
20
|
+
|
|
21
|
+
Credentials must never be committed to this driver script — the sandbox
|
|
22
|
+
has no access to the host environment, so they are read here and passed
|
|
23
|
+
explicitly via the provider's `env=`.
|
|
24
|
+
"""
|
|
25
|
+
value = os.environ.get(name)
|
|
26
|
+
if not value:
|
|
27
|
+
raise SystemExit(
|
|
28
|
+
f"missing required environment variable {name!r}; "
|
|
29
|
+
f"export it before running (e.g. `export {name}=...`)"
|
|
30
|
+
)
|
|
31
|
+
return value
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
## figure out HITL reviews
|
|
35
|
+
async def main():
|
|
36
|
+
while True:
|
|
37
|
+
try:
|
|
38
|
+
result = await run(
|
|
39
|
+
agent=claude_code("claude-opus-4-7"),
|
|
40
|
+
sandbox=podman(
|
|
41
|
+
image="pysolated:pysolated",
|
|
42
|
+
env={
|
|
43
|
+
# MUST pass credentials explicitly — the sandbox does
|
|
44
|
+
# not inherit the host environment. Read from the host
|
|
45
|
+
# env here; never hard-code secrets in this file.
|
|
46
|
+
"CLAUDE_CODE_OAUTH_TOKEN": _require_env(
|
|
47
|
+
"CLAUDE_CODE_OAUTH_TOKEN"
|
|
48
|
+
),
|
|
49
|
+
"GH_TOKEN": _require_env("GH_TOKEN"),
|
|
50
|
+
},
|
|
51
|
+
),
|
|
52
|
+
prompt_file=str(PROMPT_FILE),
|
|
53
|
+
prompt_args={"area": "auth"},
|
|
54
|
+
cwd=str(
|
|
55
|
+
REPO_ROOT
|
|
56
|
+
), # mount the repo root, not wherever main.py was launched from
|
|
57
|
+
max_iterations=2,
|
|
58
|
+
completion_signal=[
|
|
59
|
+
"<completion>ISSUE-DONE</completion>",
|
|
60
|
+
"<completion>NO-MORE-ISSUES</completion>",
|
|
61
|
+
"<completion>AWAITING-DEPENDENCIES</completion>",
|
|
62
|
+
],
|
|
63
|
+
idle_timeout_seconds=600,
|
|
64
|
+
completion_timeout_seconds=60,
|
|
65
|
+
)
|
|
66
|
+
except IdleTimeoutError as e:
|
|
67
|
+
print(f"timed out: {e}")
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
print(result.text)
|
|
71
|
+
print(result.branch, result.usage)
|
|
72
|
+
if result.completion_signal == "<completion>NO-MORE-ISSUES</completion>":
|
|
73
|
+
break
|
|
74
|
+
if result.completion_signal == "<completion>AWAITING-DEPENDENCIES</completion>":
|
|
75
|
+
print(
|
|
76
|
+
"All outstanding tasks are blocked, awaiting unresolved dependencies."
|
|
77
|
+
)
|
|
78
|
+
answer = await asyncio.to_thread(
|
|
79
|
+
input, "Confirm the dependencies are resolved to continue [y/N]: "
|
|
80
|
+
)
|
|
81
|
+
if answer.strip().lower() not in ("y", "yes"):
|
|
82
|
+
print("Stopping until dependencies are resolved.")
|
|
83
|
+
break
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
# ISSUES
|
|
2
|
+
|
|
3
|
+
Here are a set of GitHub issues:
|
|
4
|
+
|
|
5
|
+
!`gh issue list --state open --json number,title,body,comments`
|
|
6
|
+
|
|
7
|
+
You will work on one AFK (away from keyboard) issue only, not the HITL (human in the loop) ones.
|
|
8
|
+
|
|
9
|
+
When the task is complete, output <completion>ISSUE-DONE</completion>
|
|
10
|
+
If there are not more AFK issues, output <completion>NO-MORE-ISSUES</completion>
|
|
11
|
+
If the all open AFK issues have unresolved dependencies output <completion>AWAITING-DEPENDENCIES</completion>
|
|
12
|
+
|
|
13
|
+
# TASK SELECTION
|
|
14
|
+
|
|
15
|
+
Pick the next task. Prioritize tasks in this order:
|
|
16
|
+
|
|
17
|
+
1. Critical bugfixes
|
|
18
|
+
2. Development infrastructure
|
|
19
|
+
|
|
20
|
+
Getting development infrastructure like tests and types and dev scripts ready is an important precursor to building features.
|
|
21
|
+
|
|
22
|
+
3. Tracer bullets for new features
|
|
23
|
+
|
|
24
|
+
Tracer bullets are small slices of functionality that go through all layers of the system, allowing you to test and validate your approach early. This helps in identifying potential issues and ensures that the overall architecture is sound before investing significant time in development.
|
|
25
|
+
|
|
26
|
+
TL;DR - build a tiny, end-to-end slice of the feature first, then expand it out.
|
|
27
|
+
|
|
28
|
+
4. Polish and quick wins
|
|
29
|
+
5. Refactors
|
|
30
|
+
|
|
31
|
+
# EXPLORATION
|
|
32
|
+
|
|
33
|
+
Explore the repo.
|
|
34
|
+
|
|
35
|
+
# IMPLEMENTATION
|
|
36
|
+
|
|
37
|
+
Complete the task using test driven development (TDD)
|
|
38
|
+
|
|
39
|
+
## TDD Philosophy
|
|
40
|
+
|
|
41
|
+
**Core principle**: Tests should verify behavior through public interfaces, not implementation details.
|
|
42
|
+
Code can change entirely; tests shouldn't.
|
|
43
|
+
|
|
44
|
+
**Good tests** are integration-style: they exercise real code paths through public APIs.
|
|
45
|
+
They describe _what_ the system does, not _how_ it does it.
|
|
46
|
+
A good test reads like a specification - "user can checkout with valid cart" tells you exactly what capability exists.
|
|
47
|
+
These tests survive refactors because they don't care about internal structure.
|
|
48
|
+
|
|
49
|
+
**Bad tests** are coupled to implementation.
|
|
50
|
+
They mock internal collaborators, test private methods, or verify through external means (like querying a database directly instead of using the interface).
|
|
51
|
+
The warning sign: your test breaks when you refactor, but behavior hasn't changed.
|
|
52
|
+
If you rename an internal function and tests fail, those tests were testing implementation, not behavior.
|
|
53
|
+
|
|
54
|
+
### Test examples
|
|
55
|
+
|
|
56
|
+
#### Good Tests
|
|
57
|
+
|
|
58
|
+
**Integration-style**: Test through real interfaces, not mocks of internal parts.
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
# GOOD: Tests observable behavior
|
|
62
|
+
async def test_user_can_checkout_with_valid_cart() -> None:
|
|
63
|
+
cart = create_cart()
|
|
64
|
+
cart.add(product)
|
|
65
|
+
result = await checkout(cart, payment_method)
|
|
66
|
+
assert result.status == "confirmed"
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Characteristics:
|
|
70
|
+
|
|
71
|
+
- Tests behavior users/callers care about
|
|
72
|
+
- Uses public API only
|
|
73
|
+
- Survives internal refactors
|
|
74
|
+
- Describes WHAT, not HOW
|
|
75
|
+
- One logical assertion per test
|
|
76
|
+
|
|
77
|
+
#### Bad Tests
|
|
78
|
+
|
|
79
|
+
**Implementation-detail tests**: Coupled to internal structure.
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
# BAD: Tests implementation details
|
|
83
|
+
async def test_checkout_calls_payment_service_process() -> None:
|
|
84
|
+
with patch("myapp.checkout.payment_service") as mock_payment:
|
|
85
|
+
await checkout(cart, payment)
|
|
86
|
+
mock_payment.process.assert_called_once_with(cart.total)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
#### Red flags:
|
|
90
|
+
|
|
91
|
+
- Mocking internal collaborators
|
|
92
|
+
- Testing private methods
|
|
93
|
+
- Asserting on call counts/order
|
|
94
|
+
- Test breaks when refactoring without behavior change
|
|
95
|
+
- Test name describes HOW not WHAT
|
|
96
|
+
- Verifying through external means instead of interface
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
# BAD: Bypasses interface to verify
|
|
100
|
+
async def test_create_user_saves_to_database() -> None:
|
|
101
|
+
await create_user({"name": "Alice"})
|
|
102
|
+
row = await db.query("SELECT * FROM users WHERE name = ?", ["Alice"])
|
|
103
|
+
assert row is not None
|
|
104
|
+
|
|
105
|
+
# GOOD: Verifies through interface
|
|
106
|
+
async def test_create_user_makes_user_retrievable() -> None:
|
|
107
|
+
user = await create_user({"name": "Alice"})
|
|
108
|
+
retrieved = await get_user(user.id)
|
|
109
|
+
assert retrieved.name == "Alice"
|
|
110
|
+
```
|
|
111
|
+
### Mocking guidelines
|
|
112
|
+
|
|
113
|
+
#### When to Mock
|
|
114
|
+
|
|
115
|
+
Mock at **system boundaries** only:
|
|
116
|
+
|
|
117
|
+
- External APIs (payment, email, etc.)
|
|
118
|
+
- Databases (sometimes - prefer test DB)
|
|
119
|
+
- Time/randomness
|
|
120
|
+
- File system (sometimes)
|
|
121
|
+
|
|
122
|
+
Don't mock:
|
|
123
|
+
|
|
124
|
+
- Your own classes/modules
|
|
125
|
+
- Internal collaborators
|
|
126
|
+
- Anything you control
|
|
127
|
+
|
|
128
|
+
#### Designing for Mockability
|
|
129
|
+
|
|
130
|
+
At system boundaries, design interfaces that are easy to mock:
|
|
131
|
+
|
|
132
|
+
**1. Use dependency injection**
|
|
133
|
+
|
|
134
|
+
Pass external dependencies in rather than creating them internally:
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
# Easy to mock
|
|
138
|
+
def process_payment(order, payment_client):
|
|
139
|
+
return payment_client.charge(order.total)
|
|
140
|
+
|
|
141
|
+
# Hard to mock
|
|
142
|
+
def process_payment(order):
|
|
143
|
+
client = StripeClient(os.environ["STRIPE_KEY"])
|
|
144
|
+
return client.charge(order.total)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
**2. Prefer SDK-style interfaces over generic fetchers**
|
|
148
|
+
|
|
149
|
+
Create specific functions for each external operation instead of one generic function with conditional logic:
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
# GOOD: Each method is independently mockable
|
|
153
|
+
class Api:
|
|
154
|
+
def get_user(self, id):
|
|
155
|
+
return fetch(f"/users/{id}")
|
|
156
|
+
|
|
157
|
+
def get_orders(self, user_id):
|
|
158
|
+
return fetch(f"/users/{user_id}/orders")
|
|
159
|
+
|
|
160
|
+
def create_order(self, data):
|
|
161
|
+
return fetch("/orders", method="POST", body=data)
|
|
162
|
+
|
|
163
|
+
# BAD: Mocking requires conditional logic inside the mock
|
|
164
|
+
class Api:
|
|
165
|
+
def fetch(self, endpoint, options):
|
|
166
|
+
return fetch(endpoint, options)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
The SDK approach means:
|
|
170
|
+
- Each mock returns one specific shape
|
|
171
|
+
- No conditional logic in test setup
|
|
172
|
+
- Easier to see which endpoints a test exercises
|
|
173
|
+
- Type safety per endpoint
|
|
174
|
+
|
|
175
|
+
## TDD Anti-Pattern: Horizontal Slices
|
|
176
|
+
|
|
177
|
+
**DO NOT write all tests first, then all implementation.** This is "horizontal slicing" - treating RED as "write al
|
|
178
|
+
l tests" and GREEN as "write all code."
|
|
179
|
+
|
|
180
|
+
This produces **crap tests**:
|
|
181
|
+
|
|
182
|
+
- Tests written in bulk test _imagined_ behavior, not _actual_ behavior
|
|
183
|
+
- You end up testing the _shape_ of things (data structures, function signatures) rather than user-facing behavior
|
|
184
|
+
- Tests become insensitive to real changes - they pass when behavior breaks, fail when behavior is fine
|
|
185
|
+
- You outrun your headlights, committing to test structure before understanding the implementation
|
|
186
|
+
|
|
187
|
+
**Correct approach**: Vertical slices via tracer bullets. One test → one implementation → repeat. Each test respond
|
|
188
|
+
s to what you learned from the previous cycle. Because you just wrote the code, you know exactly what behavior matt
|
|
189
|
+
ers and how to verify it.
|
|
190
|
+
|
|
191
|
+
```
|
|
192
|
+
WRONG (horizontal):
|
|
193
|
+
RED: test1, test2, test3, test4, test5
|
|
194
|
+
GREEN: impl1, impl2, impl3, impl4, impl5
|
|
195
|
+
|
|
196
|
+
RIGHT (vertical):
|
|
197
|
+
RED→GREEN: test1→impl1
|
|
198
|
+
RED→GREEN: test2→impl2
|
|
199
|
+
RED→GREEN: test3→impl3
|
|
200
|
+
...
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Definition of deep modules
|
|
204
|
+
|
|
205
|
+
From "A Philosophy of Software Design":
|
|
206
|
+
|
|
207
|
+
**Deep module** = small interface + lots of implementation
|
|
208
|
+
This is good
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
┌─────────────────────┐
|
|
212
|
+
│ Small Interface │ ← Few methods, simple params
|
|
213
|
+
├─────────────────────┤
|
|
214
|
+
│ │
|
|
215
|
+
│ │
|
|
216
|
+
│ Deep Implementation│ ← Complex logic hidden
|
|
217
|
+
│ │
|
|
218
|
+
│ │
|
|
219
|
+
└─────────────────────┘
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
**Shallow module** = large interface + little implementation (avoid)
|
|
223
|
+
This is bad
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
┌─────────────────────────────────┐
|
|
227
|
+
│ Large Interface │ ← Many methods, complex params
|
|
228
|
+
├─────────────────────────────────┤
|
|
229
|
+
│ Thin Implementation │ ← Just passes through
|
|
230
|
+
└─────────────────────────────────┘
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
When designing interfaces, ask:
|
|
234
|
+
|
|
235
|
+
- Can I reduce the number of methods?
|
|
236
|
+
- Can I simplify the parameters?
|
|
237
|
+
- Can I hide more complexity inside?
|
|
238
|
+
|
|
239
|
+
## Interface design for testability
|
|
240
|
+
|
|
241
|
+
Good interfaces make testing natural:
|
|
242
|
+
|
|
243
|
+
1. **Accept dependencies, don't create them**
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
# Testable
|
|
247
|
+
def process_order(order, payment_gateway):
|
|
248
|
+
...
|
|
249
|
+
|
|
250
|
+
# Hard to test
|
|
251
|
+
def process_order(order):
|
|
252
|
+
gateway = StripeGateway()
|
|
253
|
+
...
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
2. **Return results, don't produce side effects**
|
|
257
|
+
|
|
258
|
+
```python
|
|
259
|
+
# Testable
|
|
260
|
+
def calculate_discount(cart) -> Discount:
|
|
261
|
+
...
|
|
262
|
+
|
|
263
|
+
# Hard to test
|
|
264
|
+
def apply_discount(cart) -> None:
|
|
265
|
+
cart.total -= discount
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
3. **Small surface area**
|
|
269
|
+
- Fewer methods = fewer tests needed
|
|
270
|
+
- Fewer params = simpler test setup
|
|
271
|
+
|
|
272
|
+
## TDD Workflow
|
|
273
|
+
|
|
274
|
+
### 1. Planning
|
|
275
|
+
|
|
276
|
+
When exploring the codebase, use the project's domain glossary so that test names and interface vocabulary match the project's language,
|
|
277
|
+
and respect ADRs in the area you're touching.
|
|
278
|
+
|
|
279
|
+
Before writing any code:
|
|
280
|
+
|
|
281
|
+
- [ ] Identify opportunities for deep modules
|
|
282
|
+
- [ ] Design interfaces for testability
|
|
283
|
+
- [ ] List the behaviors to test (not implementation steps)
|
|
284
|
+
- [ ] Get user approval on the plan
|
|
285
|
+
|
|
286
|
+
Consider what the public interface should look like and which behaviors are most important to test
|
|
287
|
+
|
|
288
|
+
**You can't test everything.** Focus testing effort on critical paths and complex logic, not every possible edge case.
|
|
289
|
+
|
|
290
|
+
### 2. Tracer Bullet
|
|
291
|
+
|
|
292
|
+
Write ONE test that confirms ONE thing about the system:
|
|
293
|
+
|
|
294
|
+
```
|
|
295
|
+
RED: Write test for first behavior → test fails
|
|
296
|
+
GREEN: Write minimal code to pass → test passes
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
This is your tracer bullet - proves the path works end-to-end.
|
|
300
|
+
|
|
301
|
+
### 3. Incremental Loop
|
|
302
|
+
|
|
303
|
+
For each remaining behavior:
|
|
304
|
+
|
|
305
|
+
```
|
|
306
|
+
RED: Write next test → fails
|
|
307
|
+
GREEN: Minimal code to pass → passes
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
Rules:
|
|
311
|
+
|
|
312
|
+
- One test at a time
|
|
313
|
+
- Only enough code to pass current test
|
|
314
|
+
- Don't anticipate future tests
|
|
315
|
+
- Keep tests focused on observable behavior
|
|
316
|
+
|
|
317
|
+
### 4. Refactor
|
|
318
|
+
|
|
319
|
+
After all tests pass, look for [refactor candidates](refactoring.md):
|
|
320
|
+
|
|
321
|
+
- [ ] Extract duplication
|
|
322
|
+
- [ ] Break long methods into private helpers (keep tests on public interface)
|
|
323
|
+
- [ ] Deepen modules (move complexity behind simple interfaces)
|
|
324
|
+
- [ ] Move logic to where data lives
|
|
325
|
+
- [ ] Apply SOLID principles where natural
|
|
326
|
+
- [ ] Consider what new code reveals about existing code
|
|
327
|
+
- [ ] Run tests after each refactor step
|
|
328
|
+
|
|
329
|
+
**Never refactor while RED.** Get to GREEN first.
|
|
330
|
+
|
|
331
|
+
## Checklist Per Cycle
|
|
332
|
+
|
|
333
|
+
```
|
|
334
|
+
[ ] Test describes behavior, not implementation
|
|
335
|
+
[ ] Test uses public interface only
|
|
336
|
+
[ ] Test would survive internal refactor
|
|
337
|
+
[ ] Code is minimal for this test
|
|
338
|
+
[ ] No speculative features added
|
|
339
|
+
|
|
340
|
+
# FEEDBACK LOOPS
|
|
341
|
+
|
|
342
|
+
Before committing, run the feedback loops:
|
|
343
|
+
|
|
344
|
+
- run tests for any files that have changed
|
|
345
|
+
- run mypy in strict mode for the files that have changed
|
|
346
|
+
- run ruff to check formatting and linting for files that have changed
|
|
347
|
+
|
|
348
|
+
# COMMIT
|
|
349
|
+
|
|
350
|
+
Make a git commit. The commit message must:
|
|
351
|
+
|
|
352
|
+
1. Include key decisions made
|
|
353
|
+
2. Include files changed
|
|
354
|
+
3. Blockers or notes for next iteration
|
|
355
|
+
|
|
356
|
+
# THE ISSUE
|
|
357
|
+
|
|
358
|
+
If the task is complete, close the original GitHub issue.
|
|
359
|
+
|
|
360
|
+
If the task is not complete, leave a comment on the GitHub issue with what was done.
|
|
361
|
+
|
|
362
|
+
# FINAL RULES
|
|
363
|
+
|
|
364
|
+
ONLY WORK ON A SINGLE TASK. If you receive a multi-phase plan, only work on a single phase of that plan.
|