assertion-cli 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/PKG-INFO +19 -6
- assertion_cli-0.2.0/README.md +47 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/assertion_cli.egg-info/PKG-INFO +19 -6
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/git.py +56 -19
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/main.py +133 -113
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/pyproject.toml +1 -1
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/session.py +5 -3
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/templates/ACTIVATION.md +2 -2
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/templates/SKILL.md +47 -31
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/tests/test_git.py +123 -10
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/tests/test_init.py +7 -7
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/tests/test_stack_resolve.py +1 -1
- assertion_cli-0.1.0/README.md +0 -34
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/api.py +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/assertion_cli.egg-info/SOURCES.txt +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/assertion_cli.egg-info/dependency_links.txt +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/assertion_cli.egg-info/entry_points.txt +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/assertion_cli.egg-info/requires.txt +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/assertion_cli.egg-info/top_level.txt +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/bundle.py +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/link.py +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/models.py +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/setup.cfg +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/templates/__init__.py +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/tests/test_api.py +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/tests/test_bundle.py +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/tests/test_decision.py +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/tests/test_link.py +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/tests/test_main.py +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/tests/test_prompt.py +0 -0
- {assertion_cli-0.1.0 → assertion_cli-0.2.0}/tests/test_session.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: assertion-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: CLI for the Assertion API
|
|
5
5
|
Requires-Python: >=3.13
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -26,14 +26,15 @@ Run locally from the workspace:
|
|
|
26
26
|
uv run --package assertion-cli asrt --help
|
|
27
27
|
```
|
|
28
28
|
|
|
29
|
-
Install
|
|
29
|
+
Install as a `uv` tool from PyPI:
|
|
30
30
|
|
|
31
31
|
```bash
|
|
32
|
-
uv tool install
|
|
32
|
+
uv tool install assertion-cli # latest
|
|
33
|
+
uv tool install --reinstall assertion-cli # upgrade in place
|
|
33
34
|
```
|
|
34
35
|
|
|
35
36
|
The CLI package declares all of its direct runtime dependencies. At the moment
|
|
36
|
-
that set is `httpx`, `pydantic`, and `typer`.
|
|
37
|
+
that set is `httpx`, `pydantic`, `python-dotenv`, and `typer`.
|
|
37
38
|
|
|
38
39
|
After installation:
|
|
39
40
|
|
|
@@ -42,7 +43,19 @@ asrt stacks
|
|
|
42
43
|
asrt checkpoint --stack <stack-id> "Implemented X\nUpdated Y"
|
|
43
44
|
asrt checkpoint --continue "Implemented Y"
|
|
44
45
|
asrt decision --yes <checkpoint-id> # optional, only after a failed checkpoint
|
|
45
|
-
asrt verify
|
|
46
|
+
asrt verify # submit final verification (non-blocking)
|
|
47
|
+
asrt verify-status # one-shot poll; loop with your own sleep until terminal
|
|
46
48
|
```
|
|
47
49
|
|
|
48
|
-
|
|
50
|
+
## Publishing a new version
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# 1. Bump the version in cli/pyproject.toml.
|
|
54
|
+
# 2. Build the sdist + wheel.
|
|
55
|
+
uv build --package assertion-cli
|
|
56
|
+
# 3. Upload (requires UV_PUBLISH_TOKEN, or `--token` on the command).
|
|
57
|
+
uv publish dist/*
|
|
58
|
+
# 4. Tag the release so consumers can correlate to git:
|
|
59
|
+
git tag cli-v$(grep '^version = ' pyproject.toml | head -1 | cut -d'"' -f2)
|
|
60
|
+
git push origin --tags
|
|
61
|
+
```
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Assertion CLI
|
|
2
|
+
|
|
3
|
+
CLI for the Assertion API.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
The CLI currently targets a local backend at `http://localhost:8000`.
|
|
8
|
+
|
|
9
|
+
Run locally from the workspace:
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
uv run --package assertion-cli asrt --help
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Install as a `uv` tool from PyPI:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
uv tool install assertion-cli # latest
|
|
19
|
+
uv tool install --reinstall assertion-cli # upgrade in place
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
The CLI package declares all of its direct runtime dependencies. At the moment
|
|
23
|
+
that set is `httpx`, `pydantic`, `python-dotenv`, and `typer`.
|
|
24
|
+
|
|
25
|
+
After installation:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
asrt stacks
|
|
29
|
+
asrt checkpoint --stack <stack-id> "Implemented X\nUpdated Y"
|
|
30
|
+
asrt checkpoint --continue "Implemented Y"
|
|
31
|
+
asrt decision --yes <checkpoint-id> # optional, only after a failed checkpoint
|
|
32
|
+
asrt verify # submit final verification (non-blocking)
|
|
33
|
+
asrt verify-status # one-shot poll; loop with your own sleep until terminal
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Publishing a new version
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# 1. Bump the version in cli/pyproject.toml.
|
|
40
|
+
# 2. Build the sdist + wheel.
|
|
41
|
+
uv build --package assertion-cli
|
|
42
|
+
# 3. Upload (requires UV_PUBLISH_TOKEN, or `--token` on the command).
|
|
43
|
+
uv publish dist/*
|
|
44
|
+
# 4. Tag the release so consumers can correlate to git:
|
|
45
|
+
git tag cli-v$(grep '^version = ' pyproject.toml | head -1 | cut -d'"' -f2)
|
|
46
|
+
git push origin --tags
|
|
47
|
+
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: assertion-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: CLI for the Assertion API
|
|
5
5
|
Requires-Python: >=3.13
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -26,14 +26,15 @@ Run locally from the workspace:
|
|
|
26
26
|
uv run --package assertion-cli asrt --help
|
|
27
27
|
```
|
|
28
28
|
|
|
29
|
-
Install
|
|
29
|
+
Install as a `uv` tool from PyPI:
|
|
30
30
|
|
|
31
31
|
```bash
|
|
32
|
-
uv tool install
|
|
32
|
+
uv tool install assertion-cli # latest
|
|
33
|
+
uv tool install --reinstall assertion-cli # upgrade in place
|
|
33
34
|
```
|
|
34
35
|
|
|
35
36
|
The CLI package declares all of its direct runtime dependencies. At the moment
|
|
36
|
-
that set is `httpx`, `pydantic`, and `typer`.
|
|
37
|
+
that set is `httpx`, `pydantic`, `python-dotenv`, and `typer`.
|
|
37
38
|
|
|
38
39
|
After installation:
|
|
39
40
|
|
|
@@ -42,7 +43,19 @@ asrt stacks
|
|
|
42
43
|
asrt checkpoint --stack <stack-id> "Implemented X\nUpdated Y"
|
|
43
44
|
asrt checkpoint --continue "Implemented Y"
|
|
44
45
|
asrt decision --yes <checkpoint-id> # optional, only after a failed checkpoint
|
|
45
|
-
asrt verify
|
|
46
|
+
asrt verify # submit final verification (non-blocking)
|
|
47
|
+
asrt verify-status # one-shot poll; loop with your own sleep until terminal
|
|
46
48
|
```
|
|
47
49
|
|
|
48
|
-
|
|
50
|
+
## Publishing a new version
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# 1. Bump the version in cli/pyproject.toml.
|
|
54
|
+
# 2. Build the sdist + wheel.
|
|
55
|
+
uv build --package assertion-cli
|
|
56
|
+
# 3. Upload (requires UV_PUBLISH_TOKEN, or `--token` on the command).
|
|
57
|
+
uv publish dist/*
|
|
58
|
+
# 4. Tag the release so consumers can correlate to git:
|
|
59
|
+
git tag cli-v$(grep '^version = ' pyproject.toml | head -1 | cut -d'"' -f2)
|
|
60
|
+
git push origin --tags
|
|
61
|
+
```
|
|
@@ -37,7 +37,23 @@ def find_git_root(start_path: Path) -> Path:
|
|
|
37
37
|
return Path(completed.stdout.strip())
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
def
|
|
40
|
+
def get_diff_base_sha(repo_root: Path) -> str:
|
|
41
|
+
"""Return the SHA the verifier should diff against — i.e. the most recent
|
|
42
|
+
commit reachable from BOTH local HEAD and some `refs/remotes/*` ref.
|
|
43
|
+
|
|
44
|
+
The verifier clones from GitHub and runs `git checkout <head_sha>` (see
|
|
45
|
+
backend/repo_analysis/clone.py). That checkout fails for any commit that
|
|
46
|
+
only exists locally — which is exactly what Codex Cloud produces when its
|
|
47
|
+
agent autocommits before `asrt checkpoint`. By picking the latest commit
|
|
48
|
+
that's on origin AND reachable from HEAD, the checkout always succeeds;
|
|
49
|
+
any local-only commits on top of that base flow through as part of the
|
|
50
|
+
diff (see `get_uncommitted_diff`), so the verifier still sees the full
|
|
51
|
+
state the agent built.
|
|
52
|
+
|
|
53
|
+
When HEAD itself is on a remote-tracking ref (no local commits),
|
|
54
|
+
`git merge-base HEAD <ref>` returns HEAD, so this collapses to the old
|
|
55
|
+
behavior with no diff growth.
|
|
56
|
+
"""
|
|
41
57
|
try:
|
|
42
58
|
remote_refs = run_git_command(
|
|
43
59
|
repo_root, ["for-each-ref", "--format=%(refname:short)", "refs/remotes"]
|
|
@@ -50,26 +66,42 @@ def require_head_pushed(repo_root: Path) -> None:
|
|
|
50
66
|
]
|
|
51
67
|
if not refs:
|
|
52
68
|
exit_with_error(
|
|
53
|
-
"
|
|
69
|
+
"No remote-tracking branches found. The verifier needs a commit "
|
|
70
|
+
"on origin to apply the diff onto — push at least one branch "
|
|
71
|
+
"(and `git fetch`) before running this command."
|
|
54
72
|
)
|
|
55
73
|
|
|
74
|
+
candidate_shas: list[str] = []
|
|
56
75
|
for ref in refs:
|
|
57
76
|
completed = subprocess.run(
|
|
58
|
-
["git", "merge-base", "
|
|
77
|
+
["git", "merge-base", "HEAD", ref],
|
|
59
78
|
cwd=repo_root,
|
|
60
79
|
capture_output=True,
|
|
61
80
|
text=True,
|
|
62
81
|
check=False,
|
|
63
82
|
)
|
|
64
83
|
if completed.returncode == 0:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
84
|
+
sha = completed.stdout.strip()
|
|
85
|
+
if sha:
|
|
86
|
+
candidate_shas.append(sha)
|
|
87
|
+
|
|
88
|
+
if not candidate_shas:
|
|
89
|
+
exit_with_error(
|
|
90
|
+
"HEAD has no commits in common with any remote-tracking branch. "
|
|
91
|
+
"Push the branch (or its parent commits) to origin and retry."
|
|
92
|
+
)
|
|
71
93
|
|
|
72
|
-
|
|
94
|
+
best_sha = candidate_shas[0]
|
|
95
|
+
best_ts = -1
|
|
96
|
+
for sha in candidate_shas:
|
|
97
|
+
try:
|
|
98
|
+
ts = int(run_git_command(repo_root, ["log", "-1", "--format=%ct", sha]))
|
|
99
|
+
except (RuntimeError, ValueError):
|
|
100
|
+
continue
|
|
101
|
+
if ts > best_ts:
|
|
102
|
+
best_ts = ts
|
|
103
|
+
best_sha = sha
|
|
104
|
+
return best_sha
|
|
73
105
|
|
|
74
106
|
|
|
75
107
|
def get_head_sha(repo_root: Path) -> str:
|
|
@@ -148,9 +180,9 @@ def _build_untracked_diff(repo_root: Path, rel_path: str) -> str:
|
|
|
148
180
|
# Paths the assertion-cli owns end-to-end. Excluded from the diff bundle so
|
|
149
181
|
# reviewers don't flag our own bootstrap files as "unrelated changes" — they're
|
|
150
182
|
# generated/refreshed by `asrt init` and `asrt checkpoint` and have nothing to
|
|
151
|
-
# do with the
|
|
152
|
-
# NOT in this list: those are
|
|
153
|
-
# block into, so the
|
|
183
|
+
# do with the user's feature work. CLAUDE.md / AGENTS.md are intentionally
|
|
184
|
+
# NOT in this list: those are user-owned files we only patch a marked
|
|
185
|
+
# block into, so the user's other edits to them should still flow through.
|
|
154
186
|
_ASSERTION_EXCLUDED_PATHSPECS = [
|
|
155
187
|
":(exclude).assertion",
|
|
156
188
|
":(exclude).claude/skills/assertion-cli",
|
|
@@ -158,13 +190,18 @@ _ASSERTION_EXCLUDED_PATHSPECS = [
|
|
|
158
190
|
]
|
|
159
191
|
|
|
160
192
|
|
|
161
|
-
def get_uncommitted_diff(repo_root: Path) -> str:
|
|
193
|
+
def get_uncommitted_diff(repo_root: Path, base_sha: str) -> str:
|
|
194
|
+
"""Build a unified diff from `base_sha` to the current working tree.
|
|
195
|
+
|
|
196
|
+
`git diff <base_sha>` compares the working tree directly to the base
|
|
197
|
+
commit, which subsumes both committed-since-base and unstaged tweaks in
|
|
198
|
+
one shot — so a single call covers all tracked changes regardless of
|
|
199
|
+
whether the agent committed locally. Untracked files are still added
|
|
200
|
+
separately as new-file diffs.
|
|
201
|
+
"""
|
|
162
202
|
try:
|
|
163
203
|
tracked = run_git_command(
|
|
164
|
-
repo_root, ["diff", "--", *_ASSERTION_EXCLUDED_PATHSPECS]
|
|
165
|
-
)
|
|
166
|
-
staged = run_git_command(
|
|
167
|
-
repo_root, ["diff", "--cached", "--", *_ASSERTION_EXCLUDED_PATHSPECS]
|
|
204
|
+
repo_root, ["diff", base_sha, "--", *_ASSERTION_EXCLUDED_PATHSPECS]
|
|
168
205
|
)
|
|
169
206
|
|
|
170
207
|
untracked_output = run_git_command(
|
|
@@ -183,7 +220,7 @@ def get_uncommitted_diff(repo_root: Path) -> str:
|
|
|
183
220
|
if rel_path
|
|
184
221
|
]
|
|
185
222
|
|
|
186
|
-
parts = [p for p in [tracked,
|
|
223
|
+
parts = [p for p in [tracked, "\n".join(untracked_diffs)] if p]
|
|
187
224
|
return "\n".join(parts)
|
|
188
225
|
except RuntimeError as exc:
|
|
189
226
|
exit_with_error(f"Failed to collect git diff: {exc}")
|
|
@@ -2,8 +2,6 @@ import base64
|
|
|
2
2
|
import importlib.resources
|
|
3
3
|
import json
|
|
4
4
|
import re
|
|
5
|
-
import sys
|
|
6
|
-
import time
|
|
7
5
|
from datetime import datetime, timezone
|
|
8
6
|
from pathlib import Path
|
|
9
7
|
|
|
@@ -14,12 +12,12 @@ from bundle import build_bundle
|
|
|
14
12
|
from git import (
|
|
15
13
|
exit_with_error,
|
|
16
14
|
find_git_root,
|
|
15
|
+
get_diff_base_sha,
|
|
17
16
|
get_head_branch,
|
|
18
|
-
get_head_sha,
|
|
19
17
|
get_uncommitted_diff,
|
|
20
18
|
)
|
|
21
19
|
from link import load_link, save_link
|
|
22
|
-
from models import CheckpointResponse,
|
|
20
|
+
from models import CheckpointResponse, SessionStatus, render_stack_list
|
|
23
21
|
from session import (
|
|
24
22
|
ASSERTION_DIR_NAME,
|
|
25
23
|
METADATA_FILE_NAME,
|
|
@@ -33,8 +31,6 @@ from session import (
|
|
|
33
31
|
|
|
34
32
|
app = typer.Typer(help="Assertion CLI")
|
|
35
33
|
|
|
36
|
-
STATUS_POLL_INTERVAL_SECONDS = 2.0
|
|
37
|
-
|
|
38
34
|
|
|
39
35
|
def render_checkpoint_response(resp: CheckpointResponse) -> str:
|
|
40
36
|
lines = [
|
|
@@ -131,7 +127,7 @@ def _refresh_skill_files(repo_root: Path) -> None:
|
|
|
131
127
|
"""Write/refresh the assertion-owned skill files for each coding agent.
|
|
132
128
|
|
|
133
129
|
Safe to call on every checkpoint — these files are entirely ours; updating
|
|
134
|
-
them to match the installed CLI version cannot clobber
|
|
130
|
+
them to match the installed CLI version cannot clobber user content.
|
|
135
131
|
|
|
136
132
|
`.claude/skills/` — Claude Code
|
|
137
133
|
`.agents/skills/` — Codex + Cursor (Cursor also accepts .cursor/skills/)
|
|
@@ -176,10 +172,10 @@ def _ensure_gitignore_excludes_assertion(repo_root: Path) -> None:
|
|
|
176
172
|
|
|
177
173
|
|
|
178
174
|
def _apply_activation_blocks(repo_root: Path) -> None:
|
|
179
|
-
"""Insert/update the marked activation block in
|
|
175
|
+
"""Insert/update the marked activation block in user-owned CLAUDE.md / AGENTS.md.
|
|
180
176
|
|
|
181
177
|
Only called by `asrt init`. We deliberately do NOT call this on every checkpoint —
|
|
182
|
-
these files may be tracked in the
|
|
178
|
+
these files may be tracked in the user's git, and silently dirtying them on
|
|
183
179
|
every checkpoint is hostile. The block is a stable pointer to the skill file, so
|
|
184
180
|
re-running on every CLI upgrade isn't necessary.
|
|
185
181
|
"""
|
|
@@ -188,16 +184,6 @@ def _apply_activation_blocks(repo_root: Path) -> None:
|
|
|
188
184
|
_apply_marked_block(repo_root / "AGENTS.md", activation_body, repo_root)
|
|
189
185
|
|
|
190
186
|
|
|
191
|
-
def _clear_session_state(ctx: SessionContext) -> None:
|
|
192
|
-
"""Remove per-session files so the next prompt/checkpoint starts cleanly.
|
|
193
|
-
|
|
194
|
-
Leaves the `.assertion/` directory itself (and `link`, `screenshots/`) in
|
|
195
|
-
place so the customer can still retrieve the just-completed session's URL.
|
|
196
|
-
"""
|
|
197
|
-
for path in (ctx.prompts_path, ctx.checkpoint_path, ctx.metadata_path):
|
|
198
|
-
path.unlink(missing_ok=True)
|
|
199
|
-
|
|
200
|
-
|
|
201
187
|
@app.command("init")
|
|
202
188
|
def init() -> None:
|
|
203
189
|
"""One-time bootstrap: install the assertion-cli skill files into this repo.
|
|
@@ -215,16 +201,16 @@ def init() -> None:
|
|
|
215
201
|
_ensure_gitignore_excludes_assertion(repo_root)
|
|
216
202
|
typer.echo("")
|
|
217
203
|
typer.echo("The coding agent will now follow the Assertion workflow:")
|
|
218
|
-
typer.echo(' - asrt prompt "<msg>" on every
|
|
204
|
+
typer.echo(' - asrt prompt "<msg>" on every user turn')
|
|
219
205
|
typer.echo(" - asrt checkpoint at trajectory-feedback moments")
|
|
220
206
|
typer.echo(" - asrt verify at completion, then PR")
|
|
221
207
|
|
|
222
208
|
|
|
223
209
|
@app.command("prompt")
|
|
224
210
|
def prompt_cmd(
|
|
225
|
-
text: str = typer.Argument(..., help="
|
|
211
|
+
text: str = typer.Argument(..., help="User prompt text to record."),
|
|
226
212
|
) -> None:
|
|
227
|
-
"""Append a
|
|
213
|
+
"""Append a user prompt to the session prompt history."""
|
|
228
214
|
normalized = text.strip()
|
|
229
215
|
if not normalized:
|
|
230
216
|
exit_with_error("Prompt text must not be empty.")
|
|
@@ -275,7 +261,7 @@ def checkpoint(
|
|
|
275
261
|
|
|
276
262
|
# Refresh the assertion-owned skill files so the agent always loads the
|
|
277
263
|
# version that ships with the installed CLI. We deliberately do NOT touch
|
|
278
|
-
# CLAUDE.md / AGENTS.md here — those are
|
|
264
|
+
# CLAUDE.md / AGENTS.md here — those are user-owned and only updated
|
|
279
265
|
# by `asrt init`.
|
|
280
266
|
_refresh_skill_files(repo_root)
|
|
281
267
|
|
|
@@ -301,11 +287,15 @@ def checkpoint(
|
|
|
301
287
|
assert stack_id is not None
|
|
302
288
|
|
|
303
289
|
append_checkpoint_entry(ctx.checkpoint_path, message)
|
|
304
|
-
head_sha
|
|
290
|
+
# `head_sha` here is the diff base — the latest commit on origin that's
|
|
291
|
+
# also reachable from local HEAD. Backend uses it as the checkout target
|
|
292
|
+
# before applying the diff; sending the actual local HEAD would break
|
|
293
|
+
# when the agent has uncommitted-to-origin commits (e.g. Codex Cloud).
|
|
294
|
+
base_sha = get_diff_base_sha(ctx.repo_root)
|
|
305
295
|
head_branch = get_head_branch(ctx.repo_root)
|
|
306
|
-
diff_text = get_uncommitted_diff(ctx.repo_root)
|
|
296
|
+
diff_text = get_uncommitted_diff(ctx.repo_root, base_sha)
|
|
307
297
|
metadata = update_metadata_head(
|
|
308
|
-
ctx.metadata_path, metadata,
|
|
298
|
+
ctx.metadata_path, metadata, base_sha, head_branch=head_branch
|
|
309
299
|
)
|
|
310
300
|
|
|
311
301
|
bundle_bytes = build_bundle(
|
|
@@ -329,28 +319,67 @@ def checkpoint(
|
|
|
329
319
|
raise typer.Exit(code=1)
|
|
330
320
|
|
|
331
321
|
|
|
322
|
+
VERIFY_SESSION_FILE_NAME = "verify_session_id"
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _save_verify_session_id(assertion_dir: Path, session_id: str) -> None:
|
|
326
|
+
(assertion_dir / VERIFY_SESSION_FILE_NAME).write_text(
|
|
327
|
+
session_id + "\n", encoding="utf-8"
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _load_verify_session_id(assertion_dir: Path) -> str:
|
|
332
|
+
path = assertion_dir / VERIFY_SESSION_FILE_NAME
|
|
333
|
+
if not path.exists():
|
|
334
|
+
exit_with_error(
|
|
335
|
+
"No verification in flight. Run `asrt verify` before `asrt verify-status`."
|
|
336
|
+
)
|
|
337
|
+
content = path.read_text(encoding="utf-8").strip()
|
|
338
|
+
if not content:
|
|
339
|
+
exit_with_error(
|
|
340
|
+
f"{path.name} is empty. Re-run `asrt verify` to start a new verification."
|
|
341
|
+
)
|
|
342
|
+
return content
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _write_screenshots(
|
|
346
|
+
assertion_dir: Path, session_id: str, screenshots: list[str]
|
|
347
|
+
) -> Path:
|
|
348
|
+
screenshot_dir = assertion_dir / "screenshots" / session_id
|
|
349
|
+
screenshot_dir.mkdir(parents=True, exist_ok=True)
|
|
350
|
+
for idx, uri in enumerate(screenshots):
|
|
351
|
+
_, b64data = uri.split(",", 1)
|
|
352
|
+
ext = "jpeg" if "jpeg" in uri.split(",")[0] else "png"
|
|
353
|
+
(screenshot_dir / f"screenshot_{idx:03d}.{ext}").write_bytes(
|
|
354
|
+
base64.b64decode(b64data)
|
|
355
|
+
)
|
|
356
|
+
return screenshot_dir
|
|
357
|
+
|
|
358
|
+
|
|
332
359
|
@app.command("verify")
|
|
333
360
|
def verify(
|
|
334
361
|
json_output: bool = typer.Option(
|
|
335
362
|
False,
|
|
336
363
|
"--json",
|
|
337
|
-
help="Print the
|
|
338
|
-
),
|
|
339
|
-
no_wait: bool = typer.Option(
|
|
340
|
-
False,
|
|
341
|
-
"--no-wait",
|
|
342
|
-
help="Submit and return immediately without polling. Useful for harnesses.",
|
|
364
|
+
help="Print the verification submission as a single JSON line.",
|
|
343
365
|
),
|
|
344
366
|
) -> None:
|
|
345
|
-
"""Submit final verification
|
|
367
|
+
"""Submit final verification. Returns immediately without polling.
|
|
368
|
+
|
|
369
|
+
Verification can take several minutes on the backend; blocking the agent's
|
|
370
|
+
shell long enough to trip a Bash-tool timeout was a real failure mode. So
|
|
371
|
+
this command only submits — poll `asrt verify-status` (one-shot) on whatever
|
|
372
|
+
interval your harness prefers to learn the outcome.
|
|
373
|
+
"""
|
|
346
374
|
ctx, metadata = load_existing_session(Path.cwd())
|
|
347
375
|
stack_id = metadata.stack_id
|
|
348
376
|
assert stack_id is not None
|
|
349
|
-
|
|
377
|
+
# See checkpoint() for why we send the diff base rather than local HEAD.
|
|
378
|
+
base_sha = get_diff_base_sha(ctx.repo_root)
|
|
350
379
|
head_branch = get_head_branch(ctx.repo_root)
|
|
351
|
-
diff_text = get_uncommitted_diff(ctx.repo_root)
|
|
380
|
+
diff_text = get_uncommitted_diff(ctx.repo_root, base_sha)
|
|
352
381
|
metadata = update_metadata_head(
|
|
353
|
-
ctx.metadata_path, metadata,
|
|
382
|
+
ctx.metadata_path, metadata, base_sha, head_branch=head_branch
|
|
354
383
|
)
|
|
355
384
|
|
|
356
385
|
bundle_bytes = build_bundle(
|
|
@@ -370,99 +399,90 @@ def verify(
|
|
|
370
399
|
session_id = resp.session_id
|
|
371
400
|
session_url = resp.url
|
|
372
401
|
save_link(ctx.assertion_dir, session_url)
|
|
402
|
+
_save_verify_session_id(ctx.assertion_dir, session_id)
|
|
373
403
|
|
|
374
|
-
if
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
}
|
|
383
|
-
)
|
|
404
|
+
if json_output:
|
|
405
|
+
typer.echo(
|
|
406
|
+
json.dumps(
|
|
407
|
+
{
|
|
408
|
+
"session_id": session_id,
|
|
409
|
+
"url": session_url,
|
|
410
|
+
"status": "submitted",
|
|
411
|
+
}
|
|
384
412
|
)
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
413
|
+
)
|
|
414
|
+
else:
|
|
415
|
+
typer.echo(f"Verification submitted ({session_id}). URL: {session_url}")
|
|
416
|
+
typer.echo("Poll `asrt verify-status` to check progress.")
|
|
388
417
|
|
|
389
|
-
if not json_output:
|
|
390
|
-
typer.echo(f"Verification submitted ({session_id}). Waiting for result...")
|
|
391
418
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
419
|
+
@app.command("verify-status")
|
|
420
|
+
def verify_status(
|
|
421
|
+
json_output: bool = typer.Option(
|
|
422
|
+
False,
|
|
423
|
+
"--json",
|
|
424
|
+
help="Print the verification status as a single JSON line.",
|
|
425
|
+
),
|
|
426
|
+
) -> None:
|
|
427
|
+
"""One-shot poll of the in-flight verification.
|
|
395
428
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
429
|
+
Reads the session id saved by `asrt verify` and prints the current status.
|
|
430
|
+
Exits 0 for `created` / `running` / `succeeded`, and 1 for `failed` so a
|
|
431
|
+
polling shell loop can branch on `$?`. When the run reaches a terminal
|
|
432
|
+
state, screenshots are written to `.assertion/screenshots/<session_id>/`
|
|
433
|
+
and the link file is refreshed.
|
|
434
|
+
"""
|
|
435
|
+
ctx, metadata = load_existing_session(Path.cwd())
|
|
436
|
+
_ = metadata # touched only to validate session existence
|
|
437
|
+
session_id = _load_verify_session_id(ctx.assertion_dir)
|
|
403
438
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
sys.stderr.flush()
|
|
439
|
+
client = AssertionClient()
|
|
440
|
+
payload = client.status(session_id=session_id)
|
|
407
441
|
|
|
408
|
-
|
|
409
|
-
|
|
442
|
+
session_url: str | None = payload.url
|
|
443
|
+
if session_url:
|
|
410
444
|
save_link(ctx.assertion_dir, session_url)
|
|
445
|
+
else:
|
|
446
|
+
# Fallback to the link we saved at submission time.
|
|
447
|
+
try:
|
|
448
|
+
session_url = load_link(ctx.assertion_dir)
|
|
449
|
+
except SystemExit:
|
|
450
|
+
session_url = None
|
|
411
451
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
_, b64data = uri.split(",", 1)
|
|
417
|
-
ext = "jpeg" if "jpeg" in uri.split(",")[0] else "png"
|
|
418
|
-
path = screenshot_dir / f"screenshot_{idx:03d}.{ext}"
|
|
419
|
-
path.write_bytes(base64.b64decode(b64data))
|
|
420
|
-
|
|
421
|
-
if json_output:
|
|
422
|
-
typer.echo(
|
|
423
|
-
json.dumps(
|
|
424
|
-
{
|
|
425
|
-
"session_id": session_id,
|
|
426
|
-
"url": session_url,
|
|
427
|
-
"status": str(payload.status),
|
|
428
|
-
"message": payload.message,
|
|
429
|
-
"summary": payload.summary,
|
|
430
|
-
"screenshot_count": len(payload.screenshots),
|
|
431
|
-
}
|
|
432
|
-
)
|
|
433
|
-
)
|
|
434
|
-
if payload.status == SessionStatus.FAILED:
|
|
435
|
-
raise typer.Exit(code=1)
|
|
436
|
-
return
|
|
452
|
+
terminal = payload.status in {SessionStatus.SUCCEEDED, SessionStatus.FAILED}
|
|
453
|
+
screenshot_count = len(payload.screenshots)
|
|
454
|
+
if terminal and payload.screenshots:
|
|
455
|
+
_write_screenshots(ctx.assertion_dir, session_id, payload.screenshots)
|
|
437
456
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
457
|
+
if json_output:
|
|
458
|
+
typer.echo(
|
|
459
|
+
json.dumps(
|
|
460
|
+
{
|
|
461
|
+
"session_id": session_id,
|
|
462
|
+
"url": session_url,
|
|
463
|
+
"status": str(payload.status),
|
|
464
|
+
"message": payload.message,
|
|
465
|
+
"summary": payload.summary,
|
|
466
|
+
"screenshot_count": screenshot_count,
|
|
467
|
+
}
|
|
442
468
|
)
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
typer.echo(payload.
|
|
469
|
+
)
|
|
470
|
+
else:
|
|
471
|
+
typer.echo(f"status: {payload.status}")
|
|
472
|
+
if payload.message:
|
|
473
|
+
typer.echo(payload.message)
|
|
446
474
|
if payload.summary:
|
|
447
475
|
typer.echo(f"\nSummary:\n{payload.summary}")
|
|
448
|
-
if payload.screenshots:
|
|
476
|
+
if terminal and payload.screenshots:
|
|
449
477
|
typer.echo(
|
|
450
|
-
f"\nScreenshots ({
|
|
478
|
+
f"\nScreenshots ({screenshot_count}) saved to "
|
|
451
479
|
f".assertion/screenshots/{session_id}/"
|
|
452
480
|
)
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
# state so the next `asrt prompt` / `asrt checkpoint` starts fresh.
|
|
459
|
-
if sys.stdin.isatty() and typer.confirm(
|
|
460
|
-
"\nStart a new session? This clears .assertion/ (prompts, metadata, checkpoint).",
|
|
461
|
-
default=False,
|
|
462
|
-
):
|
|
463
|
-
_clear_session_state(ctx)
|
|
464
|
-
typer.echo("Cleared. Next `asrt prompt` starts a new session.")
|
|
465
|
-
return
|
|
481
|
+
if session_url:
|
|
482
|
+
typer.echo(f"\nSession: {session_url}")
|
|
483
|
+
|
|
484
|
+
if payload.status == SessionStatus.FAILED:
|
|
485
|
+
raise typer.Exit(code=1)
|
|
466
486
|
|
|
467
487
|
|
|
468
488
|
@app.command("get-link")
|
|
@@ -6,8 +6,8 @@ from api import AssertionClient
|
|
|
6
6
|
from git import (
|
|
7
7
|
exit_with_error,
|
|
8
8
|
find_git_root,
|
|
9
|
+
get_diff_base_sha,
|
|
9
10
|
get_origin_github_repo,
|
|
10
|
-
require_head_pushed,
|
|
11
11
|
)
|
|
12
12
|
from models import MetadataPayload, SessionContext, render_stack_list
|
|
13
13
|
|
|
@@ -19,7 +19,9 @@ METADATA_FILE_NAME = "metadata.json"
|
|
|
19
19
|
|
|
20
20
|
def _require_repo_ready(start_path: Path) -> Path:
|
|
21
21
|
repo_root = find_git_root(start_path)
|
|
22
|
-
|
|
22
|
+
# Fail fast here if no remote-reachable base exists. Result is discarded
|
|
23
|
+
# — callers re-compute at diff time, which is cheap.
|
|
24
|
+
get_diff_base_sha(repo_root)
|
|
23
25
|
return repo_root
|
|
24
26
|
|
|
25
27
|
|
|
@@ -77,7 +79,7 @@ def _require_prompts_file(ctx: SessionContext) -> None:
|
|
|
77
79
|
):
|
|
78
80
|
exit_with_error(
|
|
79
81
|
f"ERROR: {ASSERTION_DIR_NAME}/{PROMPTS_FILE_NAME} is missing or empty.\n"
|
|
80
|
-
'Record at least one
|
|
82
|
+
'Record at least one user prompt with `asrt prompt "<message>"` before running checkpoint.'
|
|
81
83
|
)
|
|
82
84
|
|
|
83
85
|
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# Assertion verification
|
|
2
2
|
|
|
3
|
-
This repo uses the Assertion CLI (`asrt`) to track
|
|
3
|
+
This repo uses the Assertion CLI (`asrt`) to track user intent and verify your work. The full workflow lives in the `assertion-cli` skill:
|
|
4
4
|
|
|
5
5
|
- **Claude Code** — [`.claude/skills/assertion-cli/SKILL.md`](.claude/skills/assertion-cli/SKILL.md)
|
|
6
6
|
- **Codex / Cursor** — [`.agents/skills/assertion-cli/SKILL.md`](.agents/skills/assertion-cli/SKILL.md)
|
|
7
7
|
|
|
8
8
|
Both files are byte-identical. Read whichever your agent loads (or either one directly). In brief, follow these three rules for every conversation:
|
|
9
9
|
|
|
10
|
-
1. **Every
|
|
10
|
+
1. **Every user message:** call `asrt prompt "<exact text of the message>"` before doing any other work for that turn. This is the verifier's source of truth for what the user asked for and how their direction evolves. Apply to every user turn, not just the first.
|
|
11
11
|
2. **Checkpoint at trajectory-feedback moments**, not on every small change. Use `asrt checkpoint "<summary>"` — the CLI auto-resolves the stack from this repo's GitHub origin on the first call and auto-continues the session on subsequent calls. A whole feature build is a handful of checkpoints, not one per file.
|
|
12
12
|
3. **Verify at completion** with `asrt verify`. If feedback requires changes, iterate and re-verify — do **not** drop back into the checkpoint loop. Once verify is clean, open a PR with `gh pr create` and include the session URL (from `asrt get-link`) in the body.
|
|
13
13
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: assertion-cli
|
|
3
|
-
description: Use this skill in non-Plan mode whenever working in a repo that uses the Assertion CLI (`asrt`). It enforces the three-rule workflow — log every
|
|
3
|
+
description: Use this skill in non-Plan mode whenever working in a repo that uses the Assertion CLI (`asrt`). It enforces the three-rule workflow — log every user message with `asrt prompt`, checkpoint at trajectory-feedback moments (not on every change), and verify only at completion before opening a PR. Applies to any coding agent reading this skill.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# Assertion CLI
|
|
7
7
|
|
|
8
|
-
You are operating in a repo that uses the Assertion CLI (`asrt`) to track
|
|
8
|
+
You are operating in a repo that uses the Assertion CLI (`asrt`) to track user intent and verify your work. Follow the three rules below without exception. They apply regardless of which coding agent harness you are running under (Claude Code, Codex, Cursor, etc.).
|
|
9
9
|
|
|
10
10
|
This skill is operational, not advisory. Treat each rule as a hard requirement unless the underlying service is unavailable.
|
|
11
11
|
|
|
@@ -14,7 +14,7 @@ This skill is operational, not advisory. Treat each rule as a hard requirement u
|
|
|
14
14
|
Before any `asrt` call:
|
|
15
15
|
|
|
16
16
|
- You are inside a git repository.
|
|
17
|
-
-
|
|
17
|
+
- The repo has at least one remote-tracking ref that shares history with `HEAD` (the CLI uses the latest such commit as the diff base for checkpoint/verify; local-only commits on top flow through as part of the diff). `asrt prompt` does not require this.
|
|
18
18
|
|
|
19
19
|
No `asrt init` is needed — the CLI creates `.assertion/` on first use. Treat `.assertion/metadata.json` and `.assertion/prompts` as internal state owned by the CLI. Do not create, edit, or delete those files manually — use `asrt prompt` to append to the prompts log.
|
|
20
20
|
|
|
@@ -22,21 +22,21 @@ No `asrt init` is needed — the CLI creates `.assertion/` on first use. Treat `
|
|
|
22
22
|
|
|
23
23
|
---
|
|
24
24
|
|
|
25
|
-
## Rule 1 — Log every
|
|
25
|
+
## Rule 1 — Log every user message, immediately
|
|
26
26
|
|
|
27
|
-
**The first thing you do when a new
|
|
27
|
+
**The first thing you do when a new user message arrives — before reading any file, planning, or editing anything — is run:**
|
|
28
28
|
|
|
29
29
|
```
|
|
30
|
-
asrt prompt "<exact text of the
|
|
30
|
+
asrt prompt "<exact text of the user's message>"
|
|
31
31
|
```
|
|
32
32
|
|
|
33
|
-
This applies to **every
|
|
33
|
+
This applies to **every user turn**, not just the first. It applies even when the message is a one-word direction change ("nope"), a clarification ("use Postgres, not SQLite"), or a follow-up after verify ("also add dark mode").
|
|
34
34
|
|
|
35
|
-
**Why:** The prompts log is the verifier's source of truth for what the
|
|
35
|
+
**Why:** The prompts log is the verifier's source of truth for what the user asked for and how their direction evolved. It is shipped with every checkpoint and the final verify, and persisted in the verification database. A missed turn causes the verifier to flag legitimate work as off-spec because it cannot see the intent change that authorized the work.
|
|
36
36
|
|
|
37
37
|
**Edge cases:**
|
|
38
38
|
- Multi-line / multi-paragraph messages: pass the full text as one quoted argument. Use shell-quoting that preserves newlines (e.g., `$'first line\nsecond line'` in bash, or a heredoc piped via `xargs`).
|
|
39
|
-
- Empty / whitespace-only
|
|
39
|
+
- Empty / whitespace-only user turns: skip — `asrt prompt` rejects them.
|
|
40
40
|
- You misrecorded a prompt: there is no undo. Append a correction as a normal prompt, e.g. `asrt prompt "Correction: previous message should have been '...'"`.
|
|
41
41
|
|
|
42
42
|
---
|
|
@@ -49,7 +49,7 @@ This applies to **every customer turn**, not just the first. It applies even whe
|
|
|
49
49
|
- A meaningful slice of the feature is complete (e.g., the data model, the API layer, the UI scaffold).
|
|
50
50
|
- You made a non-trivial design choice and want it sanity-checked before building further on it.
|
|
51
51
|
- You hit ambiguity and want input on which of two reasonable paths to take.
|
|
52
|
-
- The
|
|
52
|
+
- The user's evolving direction (visible in the prompt log) could legitimately change what comes next.
|
|
53
53
|
|
|
54
54
|
**Do NOT checkpoint when:**
|
|
55
55
|
- You finished a single file edit.
|
|
@@ -68,7 +68,7 @@ You don't need a flag to distinguish "first" from "subsequent" — the CLI does
|
|
|
68
68
|
- No session exists yet → start a new one. The CLI picks the stack by matching the repo's GitHub `owner/name` (from `git remote get-url origin`) against the stacks attached to this repo.
|
|
69
69
|
- A session already exists → continue it.
|
|
70
70
|
|
|
71
|
-
If no stack is attached to this repo on the very first checkpoint, the command fails with a clear message telling the
|
|
71
|
+
If no stack is attached to this repo on the very first checkpoint, the command fails with a clear message telling the user to attach one in the Assertion web app. If multiple stacks are attached, it tells you to pass `--stack <id>` to disambiguate.
|
|
72
72
|
|
|
73
73
|
Flags exist for explicit intent, but you normally don't need them:
|
|
74
74
|
- `--continue` — fail fast if no session exists (otherwise behaves identically to no flag when a session does exist).
|
|
@@ -87,9 +87,9 @@ Use checkpoint messages to summarize substantive progress and decisions since th
|
|
|
87
87
|
Stack selection is **enforced by the CLI**, not by you. When you run `asrt checkpoint "..."` (no `--stack` flag), the CLI:
|
|
88
88
|
|
|
89
89
|
1. Runs `git remote get-url origin` and normalizes to `owner/name`
|
|
90
|
-
2. Calls the backend, fetches every stack in the
|
|
90
|
+
2. Calls the backend, fetches every stack in the user's workspace, and matches by the stack's `repo` field
|
|
91
91
|
3. Picks the single match → starts the session against it
|
|
92
|
-
4. If zero or multiple match, exits non-zero with a clear,
|
|
92
|
+
4. If zero or multiple match, exits non-zero with a clear, user-facing error
|
|
93
93
|
|
|
94
94
|
You don't need to do the matching yourself or pass `--stack`. Just call `asrt checkpoint "..."`.
|
|
95
95
|
|
|
@@ -99,20 +99,20 @@ This is the most common failure. The CLI distinguishes three cases — your resp
|
|
|
99
99
|
|
|
100
100
|
| CLI error starts with | Meaning |
|
|
101
101
|
|---|---|
|
|
102
|
-
| `No verification stacks exist in this workspace yet.` |
|
|
102
|
+
| `No verification stacks exist in this workspace yet.` | User has never set up any stacks. |
|
|
103
103
|
| `No verification stack is attached to this repo (...)` | Stacks exist but none are bound to this repo. |
|
|
104
104
|
| `Multiple stacks are attached to ...` | Two or more stacks match; CLI asks for `--stack <id>`. |
|
|
105
105
|
|
|
106
|
-
**For the first two:** the
|
|
106
|
+
**For the first two:** the user has to take an action in the Assertion web app. Your response:
|
|
107
107
|
|
|
108
|
-
1. Tell the
|
|
109
|
-
2. **Keep the work moving on your side.** You can continue editing files and implementing what the
|
|
110
|
-
3. Keep calling `asrt prompt "..."` on every new
|
|
111
|
-
4. When the
|
|
108
|
+
1. Tell the user the error verbatim. The CLI already wrote the right message.
|
|
109
|
+
2. **Keep the work moving on your side.** You can continue editing files and implementing what the user asked for. Verification is the gate before opening a PR — not a gate on writing code.
|
|
110
|
+
3. Keep calling `asrt prompt "..."` on every new user turn. The prompt log accumulates in `.assertion/prompts` and is *not* lost when checkpoint fails — once the user attaches a stack, the next successful `asrt checkpoint "..."` ships every prompt logged so far in its bundle.
|
|
111
|
+
4. When the user says they've attached a stack (or asks you to retry), call `asrt checkpoint "<what you built so far>"` again. The CLI will resolve the stack this time and start the session with the full prompt history already in the bundle.
|
|
112
112
|
|
|
113
|
-
**Do not** try to work around the missing stack by guessing a stack ID, hardcoding one from another repo, or picking the wrong one with `--stack`. Repo-binding is the
|
|
113
|
+
**Do not** try to work around the missing stack by guessing a stack ID, hardcoding one from another repo, or picking the wrong one with `--stack`. Repo-binding is the user's signal that the stack is configured for this codebase; bypassing it creates a verification session in the wrong context.
|
|
114
114
|
|
|
115
|
-
**For the third case (multiple matches):** the CLI's error lists the candidates. Pick the one whose name/description best fits the primary deliverable (code review, security, docs, perf) and pass `asrt checkpoint --stack <id> "..."`. State your choice briefly so the
|
|
115
|
+
**For the third case (multiple matches):** the CLI's error lists the candidates. Pick the one whose name/description best fits the primary deliverable (code review, security, docs, perf) and pass `asrt checkpoint --stack <id> "..."`. State your choice briefly so the user can correct you.
|
|
116
116
|
|
|
117
117
|
### Optional: confirm before you start
|
|
118
118
|
|
|
@@ -125,35 +125,51 @@ If you want to know which stack the CLI *would* pick before running checkpoint (
|
|
|
125
125
|
`asrt verify` is the terminal validation gate. Call it when **all three** are true:
|
|
126
126
|
|
|
127
127
|
1. The feature is implemented.
|
|
128
|
-
2. The
|
|
128
|
+
2. The user has signalled no further edits or redirections.
|
|
129
129
|
3. All in-flight checkpoint feedback has been addressed.
|
|
130
130
|
|
|
131
131
|
Do not call `verify` mid-task. It is not a substitute for checkpoints; it is the gate after them.
|
|
132
132
|
|
|
133
|
-
`verify`
|
|
133
|
+
`verify` is **non-blocking**: it submits the verification and returns immediately with the session id and URL. The verification itself can take several minutes — you poll `asrt verify-status` on your own cadence to learn the outcome. This avoids tripping any shell-timeout in the harness.
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
asrt verify # submit; prints session id + URL, exits 0
|
|
137
|
+
# … then poll until terminal:
|
|
138
|
+
while :; do
|
|
139
|
+
out=$(asrt verify-status --json)
|
|
140
|
+
st=$(printf '%s' "$out" | jq -r .status)
|
|
141
|
+
case "$st" in
|
|
142
|
+
succeeded|failed) printf '%s\n' "$out"; break ;;
|
|
143
|
+
*) sleep 10 ;;
|
|
144
|
+
esac
|
|
145
|
+
done
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
`asrt verify-status` is a one-shot poll. It prints the current status, refreshes `.assertion/link`, and on terminal status writes screenshots under `.assertion/screenshots/<verification_id>/`. Exit code is `0` for `created` / `running` / `succeeded` and `1` for `failed`, so a polling loop can also branch on `$?`.
|
|
134
149
|
|
|
135
150
|
`asrt get-link` prints the last session URL (e.g., for inclusion in PR descriptions).
|
|
136
151
|
|
|
137
|
-
**After
|
|
152
|
+
**After status reports terminal, branch on the outcome:**
|
|
138
153
|
|
|
139
154
|
| Outcome | What you do next |
|
|
140
155
|
|---|---|
|
|
141
|
-
| **Clean** (succeeded, no required changes) | Open a PR with `gh pr create`. Include the session URL (printed by verify; also `asrt get-link`) in the PR body so reviewers can see the verification record. |
|
|
142
|
-
| **Feedback or failed** (changes required) | Treat the feedback as authoritative. Apply the changes it calls for, then re-run `asrt verify
|
|
143
|
-
| **
|
|
156
|
+
| **Clean** (succeeded, no required changes) | Open a PR with `gh pr create`. Include the session URL (printed by verify/status; also `asrt get-link`) in the PR body so reviewers can see the verification record. |
|
|
157
|
+
| **Feedback or failed** (changes required) | Treat the feedback as authoritative. Apply the changes it calls for, then re-run `asrt verify` and poll `asrt verify-status` again. Repeat until clean. **Do not drop back into the checkpoint loop** — verify is the gate, not a checkpoint. |
|
|
158
|
+
| **User sends a new message during iteration** | Log it with `asrt prompt` first (Rule 1 still applies). Address the new direction. Re-verify before opening the PR. |
|
|
144
159
|
|
|
145
|
-
**Do not open a PR without a clean `asrt verify`.** Do not end a session with a failed/dirty verify unless the verification service itself is unavailable.
|
|
160
|
+
**Do not open a PR without a clean `asrt verify` / `asrt verify-status`.** Do not end a session with a failed/dirty verify unless the verification service itself is unavailable.
|
|
146
161
|
|
|
147
162
|
---
|
|
148
163
|
|
|
149
164
|
## Quick reference
|
|
150
165
|
|
|
151
166
|
```
|
|
152
|
-
asrt prompt "<message>" # every
|
|
167
|
+
asrt prompt "<message>" # every user turn, before anything else
|
|
153
168
|
asrt stacks # optional: list stacks attached to this repo
|
|
154
169
|
asrt checkpoint "..." # at trajectory moments — auto-starts or continues
|
|
155
170
|
asrt decision --yes|--no <ckpt-id> # optional, on failed checkpoints only
|
|
156
|
-
asrt verify #
|
|
171
|
+
asrt verify # submit verification (non-blocking)
|
|
172
|
+
asrt verify-status # one-shot poll; loop with your own sleep
|
|
157
173
|
asrt get-link # retrieve the last session URL
|
|
158
174
|
```
|
|
159
175
|
|
|
@@ -172,6 +188,6 @@ When running under a non-interactive harness (CI, benchmark runner, scripts):
|
|
|
172
188
|
- Apply changes targeted at the cited failure.
|
|
173
189
|
- Re-run `asrt checkpoint "<what you changed>" --json` (auto-continues the existing session).
|
|
174
190
|
- Bound retries: stop after 3 attempts on the same failure mode, or when the same `reasoning` is returned twice in a row.
|
|
175
|
-
4.
|
|
191
|
+
4. `asrt verify --json` submits and exits with a one-line JSON `{"session_id", "url", "status": "submitted"}`. Poll `asrt verify-status --json` on your own cadence — terminal output is `{"session_id", "url", "status", "message", "summary", "screenshot_count"}` with exit `1` on `failed`, `0` otherwise. Choose the poll interval to fit your harness timeouts; do not block a single shell call on verify.
|
|
176
192
|
|
|
177
193
|
Do not mix `--json` output with human-rendered output in the same run. Pick one mode and stick with it.
|
|
@@ -4,7 +4,13 @@ from pathlib import Path
|
|
|
4
4
|
import pytest
|
|
5
5
|
from click.exceptions import Exit as ClickExit
|
|
6
6
|
|
|
7
|
-
from git import
|
|
7
|
+
from git import (
|
|
8
|
+
find_git_root,
|
|
9
|
+
get_diff_base_sha,
|
|
10
|
+
get_head_sha,
|
|
11
|
+
get_uncommitted_diff,
|
|
12
|
+
run_git_command,
|
|
13
|
+
)
|
|
8
14
|
|
|
9
15
|
|
|
10
16
|
# --- run_git_command ---
|
|
@@ -87,7 +93,7 @@ def _init_repo_with_commit(tmp_path: Path) -> None:
|
|
|
87
93
|
|
|
88
94
|
def test_get_uncommitted_diff_clean(tmp_path: Path) -> None:
|
|
89
95
|
_init_repo_with_commit(tmp_path)
|
|
90
|
-
diff = get_uncommitted_diff(tmp_path)
|
|
96
|
+
diff = get_uncommitted_diff(tmp_path, "HEAD")
|
|
91
97
|
assert diff == ""
|
|
92
98
|
|
|
93
99
|
|
|
@@ -107,7 +113,7 @@ def test_get_uncommitted_diff_tracked_changes(tmp_path: Path) -> None:
|
|
|
107
113
|
env={**GIT_ENV, "HOME": str(tmp_path)},
|
|
108
114
|
)
|
|
109
115
|
f.write_text("modified\n")
|
|
110
|
-
diff = get_uncommitted_diff(tmp_path)
|
|
116
|
+
diff = get_uncommitted_diff(tmp_path, "HEAD")
|
|
111
117
|
assert "hello.txt" in diff
|
|
112
118
|
assert "-original" in diff
|
|
113
119
|
assert "+modified" in diff
|
|
@@ -120,7 +126,7 @@ def test_get_uncommitted_diff_staged_changes(tmp_path: Path) -> None:
|
|
|
120
126
|
subprocess.run(
|
|
121
127
|
["git", "add", "staged.txt"], cwd=tmp_path, capture_output=True, check=True
|
|
122
128
|
)
|
|
123
|
-
diff = get_uncommitted_diff(tmp_path)
|
|
129
|
+
diff = get_uncommitted_diff(tmp_path, "HEAD")
|
|
124
130
|
assert "staged.txt" in diff
|
|
125
131
|
assert "+staged content" in diff
|
|
126
132
|
|
|
@@ -129,7 +135,7 @@ def test_get_uncommitted_diff_untracked_files(tmp_path: Path) -> None:
|
|
|
129
135
|
_init_repo_with_commit(tmp_path)
|
|
130
136
|
f = tmp_path / "new_file.py"
|
|
131
137
|
f.write_text("print('hello')\n")
|
|
132
|
-
diff = get_uncommitted_diff(tmp_path)
|
|
138
|
+
diff = get_uncommitted_diff(tmp_path, "HEAD")
|
|
133
139
|
assert "new_file.py" in diff
|
|
134
140
|
assert "new file mode" in diff
|
|
135
141
|
assert "+print('hello')" in diff
|
|
@@ -160,18 +166,18 @@ def test_get_uncommitted_diff_combines_all_sources(tmp_path: Path) -> None:
|
|
|
160
166
|
# Untracked file
|
|
161
167
|
(tmp_path / "untracked.txt").write_text("surprise\n")
|
|
162
168
|
|
|
163
|
-
diff = get_uncommitted_diff(tmp_path)
|
|
169
|
+
diff = get_uncommitted_diff(tmp_path, "HEAD")
|
|
164
170
|
assert "tracked.txt" in diff
|
|
165
171
|
assert "staged.txt" in diff
|
|
166
172
|
assert "untracked.txt" in diff
|
|
167
173
|
|
|
168
174
|
|
|
169
175
|
def test_get_uncommitted_diff_includes_untracked_file(tmp_path: Path) -> None:
|
|
170
|
-
|
|
176
|
+
_init_repo_with_commit(tmp_path)
|
|
171
177
|
new_file = tmp_path / "notes.txt"
|
|
172
178
|
new_file.write_text("hello\nworld\n", encoding="utf-8")
|
|
173
179
|
|
|
174
|
-
diff = get_uncommitted_diff(tmp_path)
|
|
180
|
+
diff = get_uncommitted_diff(tmp_path, "HEAD")
|
|
175
181
|
|
|
176
182
|
assert "diff --git a/notes.txt b/notes.txt" in diff
|
|
177
183
|
assert "new file mode" in diff
|
|
@@ -181,10 +187,117 @@ def test_get_uncommitted_diff_includes_untracked_file(tmp_path: Path) -> None:
|
|
|
181
187
|
|
|
182
188
|
|
|
183
189
|
def test_get_uncommitted_diff_includes_empty_untracked_file(tmp_path: Path) -> None:
|
|
184
|
-
|
|
190
|
+
_init_repo_with_commit(tmp_path)
|
|
185
191
|
(tmp_path / "empty.txt").write_text("", encoding="utf-8")
|
|
186
192
|
|
|
187
|
-
diff = get_uncommitted_diff(tmp_path)
|
|
193
|
+
diff = get_uncommitted_diff(tmp_path, "HEAD")
|
|
188
194
|
|
|
189
195
|
assert "diff --git a/empty.txt b/empty.txt" in diff
|
|
190
196
|
assert "new file mode" in diff
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# --- get_diff_base_sha + local-commit handling ---
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _set_up_repo_with_fake_remote(tmp_path: Path) -> tuple[Path, Path]:
|
|
203
|
+
"""Build a local repo with a `refs/remotes/origin/*` ref pointing at HEAD.
|
|
204
|
+
|
|
205
|
+
Returns (repo_path, bare_remote_path). We push to a real bare remote so the
|
|
206
|
+
`refs/remotes/origin/main` entry exists naturally — `get_diff_base_sha`
|
|
207
|
+
reads it via `git for-each-ref refs/remotes`.
|
|
208
|
+
"""
|
|
209
|
+
bare = tmp_path / "remote.git"
|
|
210
|
+
repo = tmp_path / "repo"
|
|
211
|
+
subprocess.run(
|
|
212
|
+
["git", "init", "--bare", str(bare)], capture_output=True, check=True
|
|
213
|
+
)
|
|
214
|
+
subprocess.run(
|
|
215
|
+
["git", "init", "-b", "main", str(repo)], capture_output=True, check=True
|
|
216
|
+
)
|
|
217
|
+
env = {**GIT_ENV, "HOME": str(repo)}
|
|
218
|
+
subprocess.run(
|
|
219
|
+
["git", "commit", "--allow-empty", "-m", "init"],
|
|
220
|
+
cwd=repo,
|
|
221
|
+
capture_output=True,
|
|
222
|
+
check=True,
|
|
223
|
+
env=env,
|
|
224
|
+
)
|
|
225
|
+
subprocess.run(
|
|
226
|
+
["git", "remote", "add", "origin", str(bare)],
|
|
227
|
+
cwd=repo,
|
|
228
|
+
capture_output=True,
|
|
229
|
+
check=True,
|
|
230
|
+
)
|
|
231
|
+
subprocess.run(
|
|
232
|
+
["git", "push", "-u", "origin", "main"],
|
|
233
|
+
cwd=repo,
|
|
234
|
+
capture_output=True,
|
|
235
|
+
check=True,
|
|
236
|
+
env=env,
|
|
237
|
+
)
|
|
238
|
+
return repo, bare
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def test_get_diff_base_sha_returns_head_when_no_local_commits(tmp_path: Path) -> None:
|
|
242
|
+
repo, _ = _set_up_repo_with_fake_remote(tmp_path)
|
|
243
|
+
head = run_git_command(repo, ["rev-parse", "HEAD"])
|
|
244
|
+
assert get_diff_base_sha(repo) == head
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def test_get_diff_base_sha_returns_pushed_parent_when_local_commit_present(
|
|
248
|
+
tmp_path: Path,
|
|
249
|
+
) -> None:
|
|
250
|
+
"""The bug fix: when the agent commits locally without pushing, the base
|
|
251
|
+
must remain the latest pushed commit — not the local-only HEAD."""
|
|
252
|
+
repo, _ = _set_up_repo_with_fake_remote(tmp_path)
|
|
253
|
+
pushed_head = run_git_command(repo, ["rev-parse", "HEAD"])
|
|
254
|
+
|
|
255
|
+
# Local commit on top of the pushed tip (not pushed back to origin).
|
|
256
|
+
(repo / "local.txt").write_text("local change\n")
|
|
257
|
+
env = {**GIT_ENV, "HOME": str(repo)}
|
|
258
|
+
subprocess.run(
|
|
259
|
+
["git", "add", "local.txt"], cwd=repo, capture_output=True, check=True
|
|
260
|
+
)
|
|
261
|
+
subprocess.run(
|
|
262
|
+
["git", "commit", "-m", "local-only"],
|
|
263
|
+
cwd=repo,
|
|
264
|
+
capture_output=True,
|
|
265
|
+
check=True,
|
|
266
|
+
env=env,
|
|
267
|
+
)
|
|
268
|
+
new_head = run_git_command(repo, ["rev-parse", "HEAD"])
|
|
269
|
+
assert new_head != pushed_head
|
|
270
|
+
|
|
271
|
+
assert get_diff_base_sha(repo) == pushed_head
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def test_get_diff_base_sha_exits_when_no_remote_refs(tmp_path: Path) -> None:
|
|
275
|
+
_init_repo_with_commit(tmp_path)
|
|
276
|
+
with pytest.raises(ClickExit):
|
|
277
|
+
get_diff_base_sha(tmp_path)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def test_get_uncommitted_diff_includes_local_only_commit_against_base(
|
|
281
|
+
tmp_path: Path,
|
|
282
|
+
) -> None:
|
|
283
|
+
"""A local-only commit on top of the pushed base must show up in the diff
|
|
284
|
+
sent to the verifier — otherwise the verifier would never see it."""
|
|
285
|
+
repo, _ = _set_up_repo_with_fake_remote(tmp_path)
|
|
286
|
+
base_sha = run_git_command(repo, ["rev-parse", "HEAD"])
|
|
287
|
+
|
|
288
|
+
(repo / "local.txt").write_text("local change\n")
|
|
289
|
+
env = {**GIT_ENV, "HOME": str(repo)}
|
|
290
|
+
subprocess.run(
|
|
291
|
+
["git", "add", "local.txt"], cwd=repo, capture_output=True, check=True
|
|
292
|
+
)
|
|
293
|
+
subprocess.run(
|
|
294
|
+
["git", "commit", "-m", "local-only"],
|
|
295
|
+
cwd=repo,
|
|
296
|
+
capture_output=True,
|
|
297
|
+
check=True,
|
|
298
|
+
env=env,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
diff = get_uncommitted_diff(repo, base_sha)
|
|
302
|
+
assert "local.txt" in diff
|
|
303
|
+
assert "+local change" in diff
|
|
@@ -114,17 +114,17 @@ def test_init_requires_git_repo(tmp_path: Path, monkeypatch) -> None:
|
|
|
114
114
|
def test_refresh_skill_files_does_not_touch_activation_files(tmp_path: Path) -> None:
|
|
115
115
|
"""`asrt checkpoint` must refresh skill files without dirtying CLAUDE.md / AGENTS.md.
|
|
116
116
|
|
|
117
|
-
|
|
117
|
+
User-authored CLAUDE.md may be tracked in git; silently editing it on every
|
|
118
118
|
checkpoint is hostile. Only `asrt init` is allowed to touch activation blocks.
|
|
119
119
|
"""
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
(tmp_path / "CLAUDE.md").write_text(
|
|
123
|
-
(tmp_path / "AGENTS.md").write_text(
|
|
120
|
+
user_claude = "# My CLAUDE.md\n\nCustom rules.\n"
|
|
121
|
+
user_agents = "# Project guidelines\n"
|
|
122
|
+
(tmp_path / "CLAUDE.md").write_text(user_claude)
|
|
123
|
+
(tmp_path / "AGENTS.md").write_text(user_agents)
|
|
124
124
|
|
|
125
125
|
main._refresh_skill_files(tmp_path)
|
|
126
126
|
|
|
127
|
-
assert (tmp_path / "CLAUDE.md").read_text() ==
|
|
128
|
-
assert (tmp_path / "AGENTS.md").read_text() ==
|
|
127
|
+
assert (tmp_path / "CLAUDE.md").read_text() == user_claude
|
|
128
|
+
assert (tmp_path / "AGENTS.md").read_text() == user_agents
|
|
129
129
|
assert (tmp_path / ".claude" / "skills" / "assertion-cli" / "SKILL.md").exists()
|
|
130
130
|
assert (tmp_path / ".agents" / "skills" / "assertion-cli" / "SKILL.md").exists()
|
|
@@ -97,7 +97,7 @@ def test_resolve_errors_when_workspace_has_no_stacks(tmp_path: Path, capsys) ->
|
|
|
97
97
|
with pytest.raises(typer.Exit):
|
|
98
98
|
resolve_stack_id_for_repo(tmp_path)
|
|
99
99
|
captured = capsys.readouterr()
|
|
100
|
-
# Distinct message — points the
|
|
100
|
+
# Distinct message — points the user at "create a stack", not "attach one"
|
|
101
101
|
assert "No verification stacks exist in this workspace" in captured.err
|
|
102
102
|
|
|
103
103
|
|
assertion_cli-0.1.0/README.md
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
# Assertion CLI
|
|
2
|
-
|
|
3
|
-
CLI for the Assertion API.
|
|
4
|
-
|
|
5
|
-
## Usage
|
|
6
|
-
|
|
7
|
-
The CLI currently targets a local backend at `http://localhost:8000`.
|
|
8
|
-
|
|
9
|
-
Run locally from the workspace:
|
|
10
|
-
|
|
11
|
-
```bash
|
|
12
|
-
uv run --package assertion-cli asrt --help
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
Install from Git as a `uv` tool:
|
|
16
|
-
|
|
17
|
-
```bash
|
|
18
|
-
uv tool install git+ssh://git@github.com/prooflayer-ai/backend.git#subdirectory=cli
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
The CLI package declares all of its direct runtime dependencies. At the moment
|
|
22
|
-
that set is `httpx`, `pydantic`, and `typer`.
|
|
23
|
-
|
|
24
|
-
After installation:
|
|
25
|
-
|
|
26
|
-
```bash
|
|
27
|
-
asrt stacks
|
|
28
|
-
asrt checkpoint --stack <stack-id> "Implemented X\nUpdated Y"
|
|
29
|
-
asrt checkpoint --continue "Implemented Y"
|
|
30
|
-
asrt decision --yes <checkpoint-id> # optional, only after a failed checkpoint
|
|
31
|
-
asrt verify
|
|
32
|
-
```
|
|
33
|
-
|
|
34
|
-
This expects the installer to already have GitHub SSH access to `prooflayer-ai/backend`.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|